| 1 1 1 7 7 7 2 7 4 4 1 2 1 3 1 1 1 1 1 3 3 3 3 3 1 1 1 1 1 1 8 8 2 1 2 2 2 1 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 | // SPDX-License-Identifier: GPL-2.0-only /* * * general timer device for using in ISDN stacks * * Author Karsten Keil <kkeil@novell.com> * * Copyright 2008 by Karsten Keil <kkeil@novell.com> */ #include <linux/poll.h> #include <linux/vmalloc.h> #include <linux/slab.h> #include <linux/timer.h> #include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mISDNif.h> #include <linux/mutex.h> #include <linux/sched/signal.h> #include "core.h" static DEFINE_MUTEX(mISDN_mutex); static u_int *debug; struct mISDNtimerdev { int next_id; struct list_head pending; struct list_head expired; wait_queue_head_t wait; u_int work; spinlock_t lock; /* protect lists */ }; struct mISDNtimer { struct list_head list; struct mISDNtimerdev *dev; struct timer_list tl; int id; }; static int mISDN_open(struct inode *ino, struct file *filep) { struct mISDNtimerdev *dev; if (*debug & DEBUG_TIMER) printk(KERN_DEBUG "%s(%p,%p)\n", __func__, ino, filep); dev = kmalloc(sizeof(struct mISDNtimerdev) , GFP_KERNEL); if (!dev) return -ENOMEM; dev->next_id = 1; INIT_LIST_HEAD(&dev->pending); INIT_LIST_HEAD(&dev->expired); spin_lock_init(&dev->lock); dev->work = 0; init_waitqueue_head(&dev->wait); filep->private_data = dev; return nonseekable_open(ino, filep); } static int mISDN_close(struct inode *ino, struct file *filep) { struct mISDNtimerdev *dev = filep->private_data; struct list_head *list = &dev->pending; struct mISDNtimer *timer, *next; if (*debug & DEBUG_TIMER) printk(KERN_DEBUG "%s(%p,%p)\n", __func__, ino, filep); spin_lock_irq(&dev->lock); while (!list_empty(list)) { timer = list_first_entry(list, struct mISDNtimer, list); spin_unlock_irq(&dev->lock); timer_shutdown_sync(&timer->tl); spin_lock_irq(&dev->lock); /* it might have been moved to ->expired */ list_del(&timer->list); kfree(timer); } spin_unlock_irq(&dev->lock); list_for_each_entry_safe(timer, next, &dev->expired, list) { kfree(timer); } kfree(dev); return 0; } static ssize_t mISDN_read(struct file *filep, char __user *buf, size_t count, loff_t *off) { struct mISDNtimerdev *dev = filep->private_data; struct list_head *list = &dev->expired; struct mISDNtimer *timer; int ret = 0; if (*debug & DEBUG_TIMER) printk(KERN_DEBUG "%s(%p, %p, %d, %p)\n", __func__, filep, buf, (int)count, off); if (count < sizeof(int)) return -ENOSPC; spin_lock_irq(&dev->lock); while (list_empty(list) && (dev->work == 0)) { spin_unlock_irq(&dev->lock); if (filep->f_flags & O_NONBLOCK) return -EAGAIN; wait_event_interruptible(dev->wait, (dev->work || !list_empty(list))); if (signal_pending(current)) return -ERESTARTSYS; spin_lock_irq(&dev->lock); } if (dev->work) dev->work = 0; if (!list_empty(list)) { timer = list_first_entry(list, struct mISDNtimer, list); list_del(&timer->list); spin_unlock_irq(&dev->lock); if (put_user(timer->id, (int __user *)buf)) ret = -EFAULT; else ret = sizeof(int); kfree(timer); } else { spin_unlock_irq(&dev->lock); } return ret; } static __poll_t mISDN_poll(struct file *filep, poll_table *wait) { struct mISDNtimerdev *dev = filep->private_data; __poll_t mask = EPOLLERR; if (*debug & DEBUG_TIMER) printk(KERN_DEBUG "%s(%p, %p)\n", __func__, filep, wait); if (dev) { poll_wait(filep, &dev->wait, wait); mask = 0; if (dev->work || !list_empty(&dev->expired)) mask |= (EPOLLIN | EPOLLRDNORM); if (*debug & DEBUG_TIMER) printk(KERN_DEBUG "%s work(%d) empty(%d)\n", __func__, dev->work, list_empty(&dev->expired)); } return mask; } static void dev_expire_timer(struct timer_list *t) { struct mISDNtimer *timer = from_timer(timer, t, tl); u_long flags; spin_lock_irqsave(&timer->dev->lock, flags); if (timer->id >= 0) list_move_tail(&timer->list, &timer->dev->expired); wake_up_interruptible(&timer->dev->wait); spin_unlock_irqrestore(&timer->dev->lock, flags); } static int misdn_add_timer(struct mISDNtimerdev *dev, int timeout) { int id; struct mISDNtimer *timer; if (!timeout) { dev->work = 1; wake_up_interruptible(&dev->wait); id = 0; } else { timer = kzalloc(sizeof(struct mISDNtimer), GFP_KERNEL); if (!timer) return -ENOMEM; timer->dev = dev; timer_setup(&timer->tl, dev_expire_timer, 0); spin_lock_irq(&dev->lock); id = timer->id = dev->next_id++; if (dev->next_id < 0) dev->next_id = 1; list_add_tail(&timer->list, &dev->pending); timer->tl.expires = jiffies + ((HZ * (u_long)timeout) / 1000); add_timer(&timer->tl); spin_unlock_irq(&dev->lock); } return id; } static int misdn_del_timer(struct mISDNtimerdev *dev, int id) { struct mISDNtimer *timer; spin_lock_irq(&dev->lock); list_for_each_entry(timer, &dev->pending, list) { if (timer->id == id) { list_del_init(&timer->list); timer->id = -1; spin_unlock_irq(&dev->lock); timer_shutdown_sync(&timer->tl); kfree(timer); return id; } } spin_unlock_irq(&dev->lock); return 0; } static long mISDN_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) { struct mISDNtimerdev *dev = filep->private_data; int id, tout, ret = 0; if (*debug & DEBUG_TIMER) printk(KERN_DEBUG "%s(%p, %x, %lx)\n", __func__, filep, cmd, arg); mutex_lock(&mISDN_mutex); switch (cmd) { case IMADDTIMER: if (get_user(tout, (int __user *)arg)) { ret = -EFAULT; break; } id = misdn_add_timer(dev, tout); if (*debug & DEBUG_TIMER) printk(KERN_DEBUG "%s add %d id %d\n", __func__, tout, id); if (id < 0) { ret = id; break; } if (put_user(id, (int __user *)arg)) ret = -EFAULT; break; case IMDELTIMER: if (get_user(id, (int __user *)arg)) { ret = -EFAULT; break; } if (*debug & DEBUG_TIMER) printk(KERN_DEBUG "%s del id %d\n", __func__, id); id = misdn_del_timer(dev, id); if (put_user(id, (int __user *)arg)) ret = -EFAULT; break; default: ret = -EINVAL; } mutex_unlock(&mISDN_mutex); return ret; } static const struct file_operations mISDN_fops = { .owner = THIS_MODULE, .read = mISDN_read, .poll = mISDN_poll, .unlocked_ioctl = mISDN_ioctl, .open = mISDN_open, .release = mISDN_close, .llseek = no_llseek, }; static struct miscdevice mISDNtimer = { .minor = MISC_DYNAMIC_MINOR, .name = "mISDNtimer", .fops = &mISDN_fops, }; int mISDN_inittimer(u_int *deb) { int err; debug = deb; err = misc_register(&mISDNtimer); if (err) printk(KERN_WARNING "mISDN: Could not register timer device\n"); return err; } void mISDN_timer_cleanup(void) { misc_deregister(&mISDNtimer); } |
| 4 1 1 1 1 1 1 1 56 56 7 4 1 1 4 7 8 1 1 1 1 1 1 2 2 5 5 5 5 5 8 7 1 1 1 1 2 1 1 5 5 1 2 3 3 3 117 10 6 1 2 2 1 3 14 1 8 5 1 3 10 10 2 1 3 5 7 1 4 4 1 3 1 3 12 117 2 1 115 117 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 | // SPDX-License-Identifier: GPL-2.0 /* * XFRM virtual interface * * Copyright (C) 2018 secunet Security Networks AG * * Author: * Steffen Klassert <steffen.klassert@secunet.com> */ #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/sockios.h> #include <linux/icmp.h> #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_link.h> #include <linux/if_arp.h> #include <linux/icmpv6.h> #include <linux/init.h> #include <linux/route.h> #include <linux/rtnetlink.h> #include <linux/netfilter_ipv6.h> #include <linux/slab.h> #include <linux/hash.h> #include <linux/uaccess.h> #include <linux/atomic.h> #include <net/gso.h> #include <net/icmp.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_route.h> #include <net/ip_tunnels.h> #include <net/addrconf.h> #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/dst_metadata.h> #include <net/netns/generic.h> #include <linux/etherdevice.h> static int xfrmi_dev_init(struct net_device *dev); static void xfrmi_dev_setup(struct net_device *dev); static struct rtnl_link_ops xfrmi_link_ops __read_mostly; static unsigned int xfrmi_net_id __read_mostly; static const struct net_device_ops xfrmi_netdev_ops; #define XFRMI_HASH_BITS 8 #define XFRMI_HASH_SIZE BIT(XFRMI_HASH_BITS) struct xfrmi_net { /* lists for storing interfaces in use */ struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE]; struct xfrm_if __rcu *collect_md_xfrmi; }; static const struct nla_policy xfrm_lwt_policy[LWT_XFRM_MAX + 1] = { [LWT_XFRM_IF_ID] = NLA_POLICY_MIN(NLA_U32, 1), [LWT_XFRM_LINK] = NLA_POLICY_MIN(NLA_U32, 1), }; static void xfrmi_destroy_state(struct lwtunnel_state *lwt) { } static int xfrmi_build_state(struct net *net, struct nlattr *nla, unsigned int family, const void *cfg, struct lwtunnel_state **ts, struct netlink_ext_ack *extack) { struct nlattr *tb[LWT_XFRM_MAX + 1]; struct lwtunnel_state *new_state; struct xfrm_md_info *info; int ret; ret = nla_parse_nested(tb, LWT_XFRM_MAX, nla, xfrm_lwt_policy, extack); if (ret < 0) return ret; if (!tb[LWT_XFRM_IF_ID]) { NL_SET_ERR_MSG(extack, "if_id must be set"); return -EINVAL; } new_state = lwtunnel_state_alloc(sizeof(*info)); if (!new_state) { NL_SET_ERR_MSG(extack, "failed to create encap info"); return -ENOMEM; } new_state->type = LWTUNNEL_ENCAP_XFRM; info = lwt_xfrm_info(new_state); info->if_id = nla_get_u32(tb[LWT_XFRM_IF_ID]); if (tb[LWT_XFRM_LINK]) info->link = nla_get_u32(tb[LWT_XFRM_LINK]); *ts = new_state; return 0; } static int xfrmi_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwt) { struct xfrm_md_info *info = lwt_xfrm_info(lwt); if (nla_put_u32(skb, LWT_XFRM_IF_ID, info->if_id) || (info->link && nla_put_u32(skb, LWT_XFRM_LINK, info->link))) return -EMSGSIZE; return 0; } static int xfrmi_encap_nlsize(struct lwtunnel_state *lwtstate) { return nla_total_size(sizeof(u32)) + /* LWT_XFRM_IF_ID */ nla_total_size(sizeof(u32)); /* LWT_XFRM_LINK */ } static int xfrmi_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) { struct xfrm_md_info *a_info = lwt_xfrm_info(a); struct xfrm_md_info *b_info = lwt_xfrm_info(b); return memcmp(a_info, b_info, sizeof(*a_info)); } static const struct lwtunnel_encap_ops xfrmi_encap_ops = { .build_state = xfrmi_build_state, .destroy_state = xfrmi_destroy_state, .fill_encap = xfrmi_fill_encap_info, .get_encap_size = xfrmi_encap_nlsize, .cmp_encap = xfrmi_encap_cmp, .owner = THIS_MODULE, }; #define for_each_xfrmi_rcu(start, xi) \ for (xi = rcu_dereference(start); xi; xi = rcu_dereference(xi->next)) static u32 xfrmi_hash(u32 if_id) { return hash_32(if_id, XFRMI_HASH_BITS); } static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) { struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); struct xfrm_if *xi; for_each_xfrmi_rcu(xfrmn->xfrmi[xfrmi_hash(x->if_id)], xi) { if (x->if_id == xi->p.if_id && (xi->dev->flags & IFF_UP)) return xi; } xi = rcu_dereference(xfrmn->collect_md_xfrmi); if (xi && (xi->dev->flags & IFF_UP)) return xi; return NULL; } static bool xfrmi_decode_session(struct sk_buff *skb, unsigned short family, struct xfrm_if_decode_session_result *res) { struct net_device *dev; struct xfrm_if *xi; int ifindex = 0; if (!secpath_exists(skb) || !skb->dev) return false; switch (family) { case AF_INET6: ifindex = inet6_sdif(skb); break; case AF_INET: ifindex = inet_sdif(skb); break; } if (ifindex) { struct net *net = xs_net(xfrm_input_state(skb)); dev = dev_get_by_index_rcu(net, ifindex); } else { dev = skb->dev; } if (!dev || !(dev->flags & IFF_UP)) return false; if (dev->netdev_ops != &xfrmi_netdev_ops) return false; xi = netdev_priv(dev); res->net = xi->net; if (xi->p.collect_md) res->if_id = xfrm_input_state(skb)->if_id; else res->if_id = xi->p.if_id; return true; } static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi) { struct xfrm_if __rcu **xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)]; rcu_assign_pointer(xi->next , rtnl_dereference(*xip)); rcu_assign_pointer(*xip, xi); } static void xfrmi_unlink(struct xfrmi_net *xfrmn, struct xfrm_if *xi) { struct xfrm_if __rcu **xip; struct xfrm_if *iter; for (xip = &xfrmn->xfrmi[xfrmi_hash(xi->p.if_id)]; (iter = rtnl_dereference(*xip)) != NULL; xip = &iter->next) { if (xi == iter) { rcu_assign_pointer(*xip, xi->next); break; } } } static void xfrmi_dev_free(struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); gro_cells_destroy(&xi->gro_cells); } static int xfrmi_create(struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); struct net *net = dev_net(dev); struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); int err; dev->rtnl_link_ops = &xfrmi_link_ops; err = register_netdevice(dev); if (err < 0) goto out; if (xi->p.collect_md) rcu_assign_pointer(xfrmn->collect_md_xfrmi, xi); else xfrmi_link(xfrmn, xi); return 0; out: return err; } static struct xfrm_if *xfrmi_locate(struct net *net, struct xfrm_if_parms *p) { struct xfrm_if __rcu **xip; struct xfrm_if *xi; struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); for (xip = &xfrmn->xfrmi[xfrmi_hash(p->if_id)]; (xi = rtnl_dereference(*xip)) != NULL; xip = &xi->next) if (xi->p.if_id == p->if_id) return xi; return NULL; } static void xfrmi_dev_uninit(struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); if (xi->p.collect_md) RCU_INIT_POINTER(xfrmn->collect_md_xfrmi, NULL); else xfrmi_unlink(xfrmn, xi); } static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) { skb_clear_tstamp(skb); skb->pkt_type = PACKET_HOST; skb->skb_iif = 0; skb->ignore_df = 0; skb_dst_drop(skb); nf_reset_ct(skb); nf_reset_trace(skb); if (!xnet) return; ipvs_reset(skb); secpath_reset(skb); skb_orphan(skb); skb->mark = 0; } static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type, unsigned short family) { struct sec_path *sp; sp = skb_sec_path(skb); if (sp && (sp->len || sp->olen) && !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) goto discard; XFRM_SPI_SKB_CB(skb)->family = family; if (family == AF_INET) { XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; } else { XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr); XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; } return xfrm_input(skb, nexthdr, spi, encap_type); discard: kfree_skb(skb); return 0; } static int xfrmi4_rcv(struct sk_buff *skb) { return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET); } static int xfrmi6_rcv(struct sk_buff *skb) { return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff], 0, 0, AF_INET6); } static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET); } static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) { return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6); } static int xfrmi_rcv_cb(struct sk_buff *skb, int err) { const struct xfrm_mode *inner_mode; struct net_device *dev; struct xfrm_state *x; struct xfrm_if *xi; bool xnet; int link; if (err && !secpath_exists(skb)) return 0; x = xfrm_input_state(skb); xi = xfrmi_lookup(xs_net(x), x); if (!xi) return 1; link = skb->dev->ifindex; dev = xi->dev; skb->dev = dev; if (err) { DEV_STATS_INC(dev, rx_errors); DEV_STATS_INC(dev, rx_dropped); return 0; } xnet = !net_eq(xi->net, dev_net(skb->dev)); if (xnet) { inner_mode = &x->inner_mode; if (x->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol); if (inner_mode == NULL) { XFRM_INC_STATS(dev_net(skb->dev), LINUX_MIB_XFRMINSTATEMODEERROR); return -EINVAL; } } if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, inner_mode->family)) return -EPERM; } xfrmi_scrub_packet(skb, xnet); if (xi->p.collect_md) { struct metadata_dst *md_dst; md_dst = metadata_dst_alloc(0, METADATA_XFRM, GFP_ATOMIC); if (!md_dst) return -ENOMEM; md_dst->u.xfrm_info.if_id = x->if_id; md_dst->u.xfrm_info.link = link; skb_dst_set(skb, (struct dst_entry *)md_dst); } dev_sw_netstats_rx_add(dev, skb->len); return 0; } static int xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { struct xfrm_if *xi = netdev_priv(dev); struct dst_entry *dst = skb_dst(skb); unsigned int length = skb->len; struct net_device *tdev; struct xfrm_state *x; int err = -1; u32 if_id; int mtu; if (xi->p.collect_md) { struct xfrm_md_info *md_info = skb_xfrm_md_info(skb); if (unlikely(!md_info)) return -EINVAL; if_id = md_info->if_id; fl->flowi_oif = md_info->link; if (md_info->dst_orig) { struct dst_entry *tmp_dst = dst; dst = md_info->dst_orig; skb_dst_set(skb, dst); md_info->dst_orig = NULL; dst_release(tmp_dst); } } else { if_id = xi->p.if_id; } dst_hold(dst); dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, if_id); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto tx_err_link_failure; } x = dst->xfrm; if (!x) goto tx_err_link_failure; if (x->if_id != if_id) goto tx_err_link_failure; tdev = dst->dev; if (tdev == dev) { DEV_STATS_INC(dev, collisions); net_warn_ratelimited("%s: Local routing loop detected!\n", dev->name); goto tx_err_dst_release; } mtu = dst_mtu(dst); if ((!skb_is_gso(skb) && skb->len > mtu) || (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))) { skb_dst_update_pmtu_no_confirm(skb, mtu); if (skb->protocol == htons(ETH_P_IPV6)) { if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; if (skb->len > 1280) icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); else goto xmit; } else { if (!(ip_hdr(skb)->frag_off & htons(IP_DF))) goto xmit; icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); } dst_release(dst); return -EMSGSIZE; } xmit: xfrmi_scrub_packet(skb, !net_eq(xi->net, dev_net(dev))); skb_dst_set(skb, dst); skb->dev = tdev; err = dst_output(xi->net, skb->sk, skb); if (net_xmit_eval(err) == 0) { dev_sw_netstats_tx_add(dev, 1, length); } else { DEV_STATS_INC(dev, tx_errors); DEV_STATS_INC(dev, tx_aborted_errors); } return 0; tx_err_link_failure: DEV_STATS_INC(dev, tx_carrier_errors); dst_link_failure(skb); tx_err_dst_release: dst_release(dst); return err; } static netdev_tx_t xfrmi_xmit(struct sk_buff *skb, struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); struct dst_entry *dst = skb_dst(skb); struct flowi fl; int ret; memset(&fl, 0, sizeof(fl)); switch (skb->protocol) { case htons(ETH_P_IPV6): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); if (!dst) { fl.u.ip6.flowi6_oif = dev->ifindex; fl.u.ip6.flowi6_flags |= FLOWI_FLAG_ANYSRC; dst = ip6_route_output(dev_net(dev), NULL, &fl.u.ip6); if (dst->error) { dst_release(dst); DEV_STATS_INC(dev, tx_carrier_errors); goto tx_err; } skb_dst_set(skb, dst); } break; case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); if (!dst) { struct rtable *rt; fl.u.ip4.flowi4_oif = dev->ifindex; fl.u.ip4.flowi4_flags |= FLOWI_FLAG_ANYSRC; rt = __ip_route_output_key(dev_net(dev), &fl.u.ip4); if (IS_ERR(rt)) { DEV_STATS_INC(dev, tx_carrier_errors); goto tx_err; } skb_dst_set(skb, &rt->dst); } break; default: goto tx_err; } fl.flowi_oif = xi->p.link; ret = xfrmi_xmit2(skb, dev, &fl); if (ret < 0) goto tx_err; return NETDEV_TX_OK; tx_err: DEV_STATS_INC(dev, tx_errors); DEV_STATS_INC(dev, tx_dropped); kfree_skb(skb); return NETDEV_TX_OK; } static int xfrmi4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; struct net *net = dev_net(skb->dev); int protocol = iph->protocol; struct ip_comp_hdr *ipch; struct ip_esp_hdr *esph; struct ip_auth_hdr *ah ; struct xfrm_state *x; struct xfrm_if *xi; __be32 spi; switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); spi = esph->spi; break; case IPPROTO_AH: ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); spi = ah->spi; break; case IPPROTO_COMP: ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); spi = htonl(ntohs(ipch->cpi)); break; default: return 0; } switch (icmp_hdr(skb)->type) { case ICMP_DEST_UNREACH: if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return 0; break; case ICMP_REDIRECT: break; default: return 0; } x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, protocol, AF_INET); if (!x) return 0; xi = xfrmi_lookup(net, x); if (!xi) { xfrm_state_put(x); return -1; } if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, protocol); else ipv4_redirect(skb, net, 0, protocol); xfrm_state_put(x); return 0; } static int xfrmi6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; struct net *net = dev_net(skb->dev); int protocol = iph->nexthdr; struct ip_comp_hdr *ipch; struct ip_esp_hdr *esph; struct ip_auth_hdr *ah; struct xfrm_state *x; struct xfrm_if *xi; __be32 spi; switch (protocol) { case IPPROTO_ESP: esph = (struct ip_esp_hdr *)(skb->data + offset); spi = esph->spi; break; case IPPROTO_AH: ah = (struct ip_auth_hdr *)(skb->data + offset); spi = ah->spi; break; case IPPROTO_COMP: ipch = (struct ip_comp_hdr *)(skb->data + offset); spi = htonl(ntohs(ipch->cpi)); break; default: return 0; } if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return 0; x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, protocol, AF_INET6); if (!x) return 0; xi = xfrmi_lookup(net, x); if (!xi) { xfrm_state_put(x); return -1; } if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; } static int xfrmi_change(struct xfrm_if *xi, const struct xfrm_if_parms *p) { if (xi->p.link != p->link) return -EINVAL; xi->p.if_id = p->if_id; return 0; } static int xfrmi_update(struct xfrm_if *xi, struct xfrm_if_parms *p) { struct net *net = xi->net; struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); int err; xfrmi_unlink(xfrmn, xi); synchronize_net(); err = xfrmi_change(xi, p); xfrmi_link(xfrmn, xi); netdev_state_change(xi->dev); return err; } static int xfrmi_get_iflink(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); return READ_ONCE(xi->p.link); } static const struct net_device_ops xfrmi_netdev_ops = { .ndo_init = xfrmi_dev_init, .ndo_uninit = xfrmi_dev_uninit, .ndo_start_xmit = xfrmi_xmit, .ndo_get_stats64 = dev_get_tstats64, .ndo_get_iflink = xfrmi_get_iflink, }; static void xfrmi_dev_setup(struct net_device *dev) { dev->netdev_ops = &xfrmi_netdev_ops; dev->header_ops = &ip_tunnel_header_ops; dev->type = ARPHRD_NONE; dev->mtu = ETH_DATA_LEN; dev->min_mtu = ETH_MIN_MTU; dev->max_mtu = IP_MAX_MTU; dev->flags = IFF_NOARP; dev->needs_free_netdev = true; dev->priv_destructor = xfrmi_dev_free; dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS; netif_keep_dst(dev); eth_broadcast_addr(dev->broadcast); } #define XFRMI_FEATURES (NETIF_F_SG | \ NETIF_F_FRAGLIST | \ NETIF_F_GSO_SOFTWARE | \ NETIF_F_HW_CSUM) static int xfrmi_dev_init(struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); struct net_device *phydev = __dev_get_by_index(xi->net, xi->p.link); int err; err = gro_cells_init(&xi->gro_cells, dev); if (err) return err; dev->features |= NETIF_F_LLTX; dev->features |= XFRMI_FEATURES; dev->hw_features |= XFRMI_FEATURES; if (phydev) { dev->needed_headroom = phydev->needed_headroom; dev->needed_tailroom = phydev->needed_tailroom; if (is_zero_ether_addr(dev->dev_addr)) eth_hw_addr_inherit(dev, phydev); if (is_zero_ether_addr(dev->broadcast)) memcpy(dev->broadcast, phydev->broadcast, dev->addr_len); } else { eth_hw_addr_random(dev); eth_broadcast_addr(dev->broadcast); } return 0; } static int xfrmi_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { return 0; } static void xfrmi_netlink_parms(struct nlattr *data[], struct xfrm_if_parms *parms) { memset(parms, 0, sizeof(*parms)); if (!data) return; if (data[IFLA_XFRM_LINK]) parms->link = nla_get_u32(data[IFLA_XFRM_LINK]); if (data[IFLA_XFRM_IF_ID]) parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]); if (data[IFLA_XFRM_COLLECT_METADATA]) parms->collect_md = true; } static int xfrmi_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct net *net = dev_net(dev); struct xfrm_if_parms p = {}; struct xfrm_if *xi; int err; xfrmi_netlink_parms(data, &p); if (p.collect_md) { struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); if (p.link || p.if_id) { NL_SET_ERR_MSG(extack, "link and if_id must be zero"); return -EINVAL; } if (rtnl_dereference(xfrmn->collect_md_xfrmi)) return -EEXIST; } else { if (!p.if_id) { NL_SET_ERR_MSG(extack, "if_id must be non zero"); return -EINVAL; } xi = xfrmi_locate(net, &p); if (xi) return -EEXIST; } xi = netdev_priv(dev); xi->p = p; xi->net = net; xi->dev = dev; err = xfrmi_create(dev); return err; } static void xfrmi_dellink(struct net_device *dev, struct list_head *head) { unregister_netdevice_queue(dev, head); } static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct xfrm_if *xi = netdev_priv(dev); struct net *net = xi->net; struct xfrm_if_parms p = {}; xfrmi_netlink_parms(data, &p); if (!p.if_id) { NL_SET_ERR_MSG(extack, "if_id must be non zero"); return -EINVAL; } if (p.collect_md) { NL_SET_ERR_MSG(extack, "collect_md can't be changed"); return -EINVAL; } xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); } else { if (xi->dev != dev) return -EEXIST; if (xi->p.collect_md) { NL_SET_ERR_MSG(extack, "device can't be changed to collect_md"); return -EINVAL; } } return xfrmi_update(xi, &p); } static size_t xfrmi_get_size(const struct net_device *dev) { return /* IFLA_XFRM_LINK */ nla_total_size(4) + /* IFLA_XFRM_IF_ID */ nla_total_size(4) + /* IFLA_XFRM_COLLECT_METADATA */ nla_total_size(0) + 0; } static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); struct xfrm_if_parms *parm = &xi->p; if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) || nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id) || (xi->p.collect_md && nla_put_flag(skb, IFLA_XFRM_COLLECT_METADATA))) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static struct net *xfrmi_get_link_net(const struct net_device *dev) { struct xfrm_if *xi = netdev_priv(dev); return READ_ONCE(xi->net); } static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { [IFLA_XFRM_UNSPEC] = { .strict_start_type = IFLA_XFRM_COLLECT_METADATA }, [IFLA_XFRM_LINK] = { .type = NLA_U32 }, [IFLA_XFRM_IF_ID] = { .type = NLA_U32 }, [IFLA_XFRM_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { .kind = "xfrm", .maxtype = IFLA_XFRM_MAX, .policy = xfrmi_policy, .priv_size = sizeof(struct xfrm_if), .setup = xfrmi_dev_setup, .validate = xfrmi_validate, .newlink = xfrmi_newlink, .dellink = xfrmi_dellink, .changelink = xfrmi_changelink, .get_size = xfrmi_get_size, .fill_info = xfrmi_fill_info, .get_link_net = xfrmi_get_link_net, }; static void __net_exit xfrmi_exit_batch_rtnl(struct list_head *net_exit_list, struct list_head *dev_to_kill) { struct net *net; ASSERT_RTNL(); list_for_each_entry(net, net_exit_list, exit_list) { struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); struct xfrm_if __rcu **xip; struct xfrm_if *xi; int i; for (i = 0; i < XFRMI_HASH_SIZE; i++) { for (xip = &xfrmn->xfrmi[i]; (xi = rtnl_dereference(*xip)) != NULL; xip = &xi->next) unregister_netdevice_queue(xi->dev, dev_to_kill); } xi = rtnl_dereference(xfrmn->collect_md_xfrmi); if (xi) unregister_netdevice_queue(xi->dev, dev_to_kill); } } static struct pernet_operations xfrmi_net_ops = { .exit_batch_rtnl = xfrmi_exit_batch_rtnl, .id = &xfrmi_net_id, .size = sizeof(struct xfrmi_net), }; static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = { .handler = xfrmi6_rcv, .input_handler = xfrmi6_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, }; static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = { .handler = xfrm6_rcv, .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, }; static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = { .handler = xfrm6_rcv, .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 10, }; #if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) static int xfrmi6_rcv_tunnel(struct sk_buff *skb) { const xfrm_address_t *saddr; __be32 spi; saddr = (const xfrm_address_t *)&ipv6_hdr(skb)->saddr; spi = xfrm6_tunnel_spi_lookup(dev_net(skb->dev), saddr); return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi, NULL); } static struct xfrm6_tunnel xfrmi_ipv6_handler __read_mostly = { .handler = xfrmi6_rcv_tunnel, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 2, }; static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = { .handler = xfrmi6_rcv_tunnel, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi6_err, .priority = 2, }; #endif static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = { .handler = xfrmi4_rcv, .input_handler = xfrmi4_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, .priority = 10, }; static struct xfrm4_protocol xfrmi_ah4_protocol __read_mostly = { .handler = xfrm4_rcv, .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, .priority = 10, }; static struct xfrm4_protocol xfrmi_ipcomp4_protocol __read_mostly = { .handler = xfrm4_rcv, .input_handler = xfrm_input, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, .priority = 10, }; #if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) static int xfrmi4_rcv_tunnel(struct sk_buff *skb) { return xfrm4_rcv_spi(skb, IPPROTO_IPIP, ip_hdr(skb)->saddr); } static struct xfrm_tunnel xfrmi_ipip_handler __read_mostly = { .handler = xfrmi4_rcv_tunnel, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, .priority = 3, }; static struct xfrm_tunnel xfrmi_ipip6_handler __read_mostly = { .handler = xfrmi4_rcv_tunnel, .cb_handler = xfrmi_rcv_cb, .err_handler = xfrmi4_err, .priority = 2, }; #endif static int __init xfrmi4_init(void) { int err; err = xfrm4_protocol_register(&xfrmi_esp4_protocol, IPPROTO_ESP); if (err < 0) goto xfrm_proto_esp_failed; err = xfrm4_protocol_register(&xfrmi_ah4_protocol, IPPROTO_AH); if (err < 0) goto xfrm_proto_ah_failed; err = xfrm4_protocol_register(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; #if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) err = xfrm4_tunnel_register(&xfrmi_ipip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_ipip_failed; err = xfrm4_tunnel_register(&xfrmi_ipip6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipip6_failed; #endif return 0; #if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) xfrm_tunnel_ipip6_failed: xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); xfrm_tunnel_ipip_failed: xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); #endif xfrm_proto_comp_failed: xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH); xfrm_proto_ah_failed: xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP); xfrm_proto_esp_failed: return err; } static void xfrmi4_fini(void) { #if IS_REACHABLE(CONFIG_INET_XFRM_TUNNEL) xfrm4_tunnel_deregister(&xfrmi_ipip6_handler, AF_INET6); xfrm4_tunnel_deregister(&xfrmi_ipip_handler, AF_INET); #endif xfrm4_protocol_deregister(&xfrmi_ipcomp4_protocol, IPPROTO_COMP); xfrm4_protocol_deregister(&xfrmi_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&xfrmi_esp4_protocol, IPPROTO_ESP); } static int __init xfrmi6_init(void) { int err; err = xfrm6_protocol_register(&xfrmi_esp6_protocol, IPPROTO_ESP); if (err < 0) goto xfrm_proto_esp_failed; err = xfrm6_protocol_register(&xfrmi_ah6_protocol, IPPROTO_AH); if (err < 0) goto xfrm_proto_ah_failed; err = xfrm6_protocol_register(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); if (err < 0) goto xfrm_proto_comp_failed; #if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) err = xfrm6_tunnel_register(&xfrmi_ipv6_handler, AF_INET6); if (err < 0) goto xfrm_tunnel_ipv6_failed; err = xfrm6_tunnel_register(&xfrmi_ip6ip_handler, AF_INET); if (err < 0) goto xfrm_tunnel_ip6ip_failed; #endif return 0; #if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) xfrm_tunnel_ip6ip_failed: xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); xfrm_tunnel_ipv6_failed: xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); #endif xfrm_proto_comp_failed: xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH); xfrm_proto_ah_failed: xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP); xfrm_proto_esp_failed: return err; } static void xfrmi6_fini(void) { #if IS_REACHABLE(CONFIG_INET6_XFRM_TUNNEL) xfrm6_tunnel_deregister(&xfrmi_ip6ip_handler, AF_INET); xfrm6_tunnel_deregister(&xfrmi_ipv6_handler, AF_INET6); #endif xfrm6_protocol_deregister(&xfrmi_ipcomp6_protocol, IPPROTO_COMP); xfrm6_protocol_deregister(&xfrmi_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&xfrmi_esp6_protocol, IPPROTO_ESP); } static const struct xfrm_if_cb xfrm_if_cb = { .decode_session = xfrmi_decode_session, }; static int __init xfrmi_init(void) { const char *msg; int err; pr_info("IPsec XFRM device driver\n"); msg = "tunnel device"; err = register_pernet_device(&xfrmi_net_ops); if (err < 0) goto pernet_dev_failed; msg = "xfrm4 protocols"; err = xfrmi4_init(); if (err < 0) goto xfrmi4_failed; msg = "xfrm6 protocols"; err = xfrmi6_init(); if (err < 0) goto xfrmi6_failed; msg = "netlink interface"; err = rtnl_link_register(&xfrmi_link_ops); if (err < 0) goto rtnl_link_failed; err = register_xfrm_interface_bpf(); if (err < 0) goto kfunc_failed; lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM); xfrm_if_register_cb(&xfrm_if_cb); return err; kfunc_failed: rtnl_link_unregister(&xfrmi_link_ops); rtnl_link_failed: xfrmi6_fini(); xfrmi6_failed: xfrmi4_fini(); xfrmi4_failed: unregister_pernet_device(&xfrmi_net_ops); pernet_dev_failed: pr_err("xfrmi init: failed to register %s\n", msg); return err; } static void __exit xfrmi_fini(void) { xfrm_if_unregister_cb(); lwtunnel_encap_del_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM); rtnl_link_unregister(&xfrmi_link_ops); xfrmi4_fini(); xfrmi6_fini(); unregister_pernet_device(&xfrmi_net_ops); } module_init(xfrmi_init); module_exit(xfrmi_fini); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("xfrm"); MODULE_ALIAS_NETDEV("xfrm0"); MODULE_AUTHOR("Steffen Klassert"); MODULE_DESCRIPTION("XFRM virtual interface"); |
| 27 6 7 15 18 225 247 246 4 4 4 80 67 126 248 2 89 191 339 126 248 328 433 430 40 339 19 1 20 5 19 18 17 3 3 3 20 18 3 2 21 20 2 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 | /* * Copyright (c) 2018 Cumulus Networks. All rights reserved. * Copyright (c) 2018 David Ahern <dsa@cumulusnetworks.com> * * This software is licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this * source tree. * * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. */ #include <linux/bitmap.h> #include <linux/in6.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/rhashtable.h> #include <linux/spinlock_types.h> #include <linux/types.h> #include <net/fib_notifier.h> #include <net/inet_dscp.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/fib_rules.h> #include <net/net_namespace.h> #include <net/nexthop.h> #include <linux/debugfs.h> #include "netdevsim.h" struct nsim_fib_entry { u64 max; atomic64_t num; }; struct nsim_per_fib_data { struct nsim_fib_entry fib; struct nsim_fib_entry rules; }; struct nsim_fib_data { struct notifier_block fib_nb; struct nsim_per_fib_data ipv4; struct nsim_per_fib_data ipv6; struct nsim_fib_entry nexthops; struct rhashtable fib_rt_ht; struct list_head fib_rt_list; struct mutex fib_lock; /* Protects FIB HT and list */ struct notifier_block nexthop_nb; struct rhashtable nexthop_ht; struct devlink *devlink; struct work_struct fib_event_work; struct work_struct fib_flush_work; struct list_head fib_event_queue; spinlock_t fib_event_queue_lock; /* Protects fib event queue list */ struct mutex nh_lock; /* Protects NH HT */ struct dentry *ddir; bool fail_route_offload; bool fail_res_nexthop_group_replace; bool fail_nexthop_bucket_replace; bool fail_route_delete; }; struct nsim_fib_rt_key { unsigned char addr[sizeof(struct in6_addr)]; unsigned char prefix_len; int family; u32 tb_id; }; struct nsim_fib_rt { struct nsim_fib_rt_key key; struct rhash_head ht_node; struct list_head list; /* Member of fib_rt_list */ }; struct nsim_fib4_rt { struct nsim_fib_rt common; struct fib_info *fi; dscp_t dscp; u8 type; }; struct nsim_fib6_rt { struct nsim_fib_rt common; struct list_head nh_list; unsigned int nhs; }; struct nsim_fib6_rt_nh { struct list_head list; /* Member of nh_list */ struct fib6_info *rt; }; struct nsim_fib6_event { struct fib6_info **rt_arr; unsigned int nrt6; }; struct nsim_fib_event { struct list_head list; /* node in fib queue */ union { struct fib_entry_notifier_info fen_info; struct nsim_fib6_event fib6_event; }; struct nsim_fib_data *data; unsigned long event; int family; }; static const struct rhashtable_params nsim_fib_rt_ht_params = { .key_offset = offsetof(struct nsim_fib_rt, key), .head_offset = offsetof(struct nsim_fib_rt, ht_node), .key_len = sizeof(struct nsim_fib_rt_key), .automatic_shrinking = true, }; struct nsim_nexthop { struct rhash_head ht_node; u64 occ; u32 id; bool is_resilient; }; static const struct rhashtable_params nsim_nexthop_ht_params = { .key_offset = offsetof(struct nsim_nexthop, id), .head_offset = offsetof(struct nsim_nexthop, ht_node), .key_len = sizeof(u32), .automatic_shrinking = true, }; u64 nsim_fib_get_val(struct nsim_fib_data *fib_data, enum nsim_resource_id res_id, bool max) { struct nsim_fib_entry *entry; switch (res_id) { case NSIM_RESOURCE_IPV4_FIB: entry = &fib_data->ipv4.fib; break; case NSIM_RESOURCE_IPV4_FIB_RULES: entry = &fib_data->ipv4.rules; break; case NSIM_RESOURCE_IPV6_FIB: entry = &fib_data->ipv6.fib; break; case NSIM_RESOURCE_IPV6_FIB_RULES: entry = &fib_data->ipv6.rules; break; case NSIM_RESOURCE_NEXTHOPS: entry = &fib_data->nexthops; break; default: return 0; } return max ? entry->max : atomic64_read(&entry->num); } static void nsim_fib_set_max(struct nsim_fib_data *fib_data, enum nsim_resource_id res_id, u64 val) { struct nsim_fib_entry *entry; switch (res_id) { case NSIM_RESOURCE_IPV4_FIB: entry = &fib_data->ipv4.fib; break; case NSIM_RESOURCE_IPV4_FIB_RULES: entry = &fib_data->ipv4.rules; break; case NSIM_RESOURCE_IPV6_FIB: entry = &fib_data->ipv6.fib; break; case NSIM_RESOURCE_IPV6_FIB_RULES: entry = &fib_data->ipv6.rules; break; case NSIM_RESOURCE_NEXTHOPS: entry = &fib_data->nexthops; break; default: WARN_ON(1); return; } entry->max = val; } static int nsim_fib_rule_account(struct nsim_fib_entry *entry, bool add, struct netlink_ext_ack *extack) { int err = 0; if (add) { if (!atomic64_add_unless(&entry->num, 1, entry->max)) { err = -ENOSPC; NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib rule entries"); } } else { atomic64_dec_if_positive(&entry->num); } return err; } static int nsim_fib_rule_event(struct nsim_fib_data *data, struct fib_notifier_info *info, bool add) { struct netlink_ext_ack *extack = info->extack; int err = 0; switch (info->family) { case AF_INET: err = nsim_fib_rule_account(&data->ipv4.rules, add, extack); break; case AF_INET6: err = nsim_fib_rule_account(&data->ipv6.rules, add, extack); break; } return err; } static int nsim_fib_account(struct nsim_fib_entry *entry, bool add) { int err = 0; if (add) { if (!atomic64_add_unless(&entry->num, 1, entry->max)) err = -ENOSPC; } else { atomic64_dec_if_positive(&entry->num); } return err; } static void nsim_fib_rt_init(struct nsim_fib_data *data, struct nsim_fib_rt *fib_rt, const void *addr, size_t addr_len, unsigned int prefix_len, int family, u32 tb_id) { memcpy(fib_rt->key.addr, addr, addr_len); fib_rt->key.prefix_len = prefix_len; fib_rt->key.family = family; fib_rt->key.tb_id = tb_id; list_add(&fib_rt->list, &data->fib_rt_list); } static void nsim_fib_rt_fini(struct nsim_fib_rt *fib_rt) { list_del(&fib_rt->list); } static struct nsim_fib_rt *nsim_fib_rt_lookup(struct rhashtable *fib_rt_ht, const void *addr, size_t addr_len, unsigned int prefix_len, int family, u32 tb_id) { struct nsim_fib_rt_key key; memset(&key, 0, sizeof(key)); memcpy(key.addr, addr, addr_len); key.prefix_len = prefix_len; key.family = family; key.tb_id = tb_id; return rhashtable_lookup_fast(fib_rt_ht, &key, nsim_fib_rt_ht_params); } static struct nsim_fib4_rt * nsim_fib4_rt_create(struct nsim_fib_data *data, struct fib_entry_notifier_info *fen_info) { struct nsim_fib4_rt *fib4_rt; fib4_rt = kzalloc(sizeof(*fib4_rt), GFP_KERNEL); if (!fib4_rt) return NULL; nsim_fib_rt_init(data, &fib4_rt->common, &fen_info->dst, sizeof(u32), fen_info->dst_len, AF_INET, fen_info->tb_id); fib4_rt->fi = fen_info->fi; fib_info_hold(fib4_rt->fi); fib4_rt->dscp = fen_info->dscp; fib4_rt->type = fen_info->type; return fib4_rt; } static void nsim_fib4_rt_destroy(struct nsim_fib4_rt *fib4_rt) { fib_info_put(fib4_rt->fi); nsim_fib_rt_fini(&fib4_rt->common); kfree(fib4_rt); } static struct nsim_fib4_rt * nsim_fib4_rt_lookup(struct rhashtable *fib_rt_ht, const struct fib_entry_notifier_info *fen_info) { struct nsim_fib_rt *fib_rt; fib_rt = nsim_fib_rt_lookup(fib_rt_ht, &fen_info->dst, sizeof(u32), fen_info->dst_len, AF_INET, fen_info->tb_id); if (!fib_rt) return NULL; return container_of(fib_rt, struct nsim_fib4_rt, common); } static void nsim_fib4_rt_offload_failed_flag_set(struct net *net, struct fib_entry_notifier_info *fen_info) { u32 *p_dst = (u32 *)&fen_info->dst; struct fib_rt_info fri; fri.fi = fen_info->fi; fri.tb_id = fen_info->tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = fen_info->dst_len; fri.dscp = fen_info->dscp; fri.type = fen_info->type; fri.offload = false; fri.trap = false; fri.offload_failed = true; fib_alias_hw_flags_set(net, &fri); } static void nsim_fib4_rt_hw_flags_set(struct net *net, const struct nsim_fib4_rt *fib4_rt, bool trap) { u32 *p_dst = (u32 *) fib4_rt->common.key.addr; int dst_len = fib4_rt->common.key.prefix_len; struct fib_rt_info fri; fri.fi = fib4_rt->fi; fri.tb_id = fib4_rt->common.key.tb_id; fri.dst = cpu_to_be32(*p_dst); fri.dst_len = dst_len; fri.dscp = fib4_rt->dscp; fri.type = fib4_rt->type; fri.offload = false; fri.trap = trap; fri.offload_failed = false; fib_alias_hw_flags_set(net, &fri); } static int nsim_fib4_rt_add(struct nsim_fib_data *data, struct nsim_fib4_rt *fib4_rt) { struct net *net = devlink_net(data->devlink); int err; err = rhashtable_insert_fast(&data->fib_rt_ht, &fib4_rt->common.ht_node, nsim_fib_rt_ht_params); if (err) goto err_fib_dismiss; /* Simulate hardware programming latency. */ msleep(1); nsim_fib4_rt_hw_flags_set(net, fib4_rt, true); return 0; err_fib_dismiss: /* Drop the accounting that was increased from the notification * context when FIB_EVENT_ENTRY_REPLACE was triggered. */ nsim_fib_account(&data->ipv4.fib, false); return err; } static int nsim_fib4_rt_replace(struct nsim_fib_data *data, struct nsim_fib4_rt *fib4_rt, struct nsim_fib4_rt *fib4_rt_old) { struct net *net = devlink_net(data->devlink); int err; /* We are replacing a route, so need to remove the accounting which * was increased when FIB_EVENT_ENTRY_REPLACE was triggered. */ err = nsim_fib_account(&data->ipv4.fib, false); if (err) return err; err = rhashtable_replace_fast(&data->fib_rt_ht, &fib4_rt_old->common.ht_node, &fib4_rt->common.ht_node, nsim_fib_rt_ht_params); if (err) return err; msleep(1); nsim_fib4_rt_hw_flags_set(net, fib4_rt, true); nsim_fib4_rt_hw_flags_set(net, fib4_rt_old, false); nsim_fib4_rt_destroy(fib4_rt_old); return 0; } static int nsim_fib4_rt_insert(struct nsim_fib_data *data, struct fib_entry_notifier_info *fen_info) { struct nsim_fib4_rt *fib4_rt, *fib4_rt_old; int err; if (data->fail_route_offload) { /* For testing purposes, user set debugfs fail_route_offload * value to true. Simulate hardware programming latency and then * fail. */ msleep(1); return -EINVAL; } fib4_rt = nsim_fib4_rt_create(data, fen_info); if (!fib4_rt) return -ENOMEM; fib4_rt_old = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info); if (!fib4_rt_old) err = nsim_fib4_rt_add(data, fib4_rt); else err = nsim_fib4_rt_replace(data, fib4_rt, fib4_rt_old); if (err) nsim_fib4_rt_destroy(fib4_rt); return err; } static void nsim_fib4_rt_remove(struct nsim_fib_data *data, const struct fib_entry_notifier_info *fen_info) { struct nsim_fib4_rt *fib4_rt; fib4_rt = nsim_fib4_rt_lookup(&data->fib_rt_ht, fen_info); if (!fib4_rt) return; rhashtable_remove_fast(&data->fib_rt_ht, &fib4_rt->common.ht_node, nsim_fib_rt_ht_params); nsim_fib4_rt_destroy(fib4_rt); } static int nsim_fib4_event(struct nsim_fib_data *data, struct fib_entry_notifier_info *fen_info, unsigned long event) { int err = 0; switch (event) { case FIB_EVENT_ENTRY_REPLACE: err = nsim_fib4_rt_insert(data, fen_info); if (err) { struct net *net = devlink_net(data->devlink); nsim_fib4_rt_offload_failed_flag_set(net, fen_info); } break; case FIB_EVENT_ENTRY_DEL: nsim_fib4_rt_remove(data, fen_info); break; default: break; } return err; } static struct nsim_fib6_rt_nh * nsim_fib6_rt_nh_find(const struct nsim_fib6_rt *fib6_rt, const struct fib6_info *rt) { struct nsim_fib6_rt_nh *fib6_rt_nh; list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list) { if (fib6_rt_nh->rt == rt) return fib6_rt_nh; } return NULL; } static int nsim_fib6_rt_nh_add(struct nsim_fib6_rt *fib6_rt, struct fib6_info *rt) { struct nsim_fib6_rt_nh *fib6_rt_nh; fib6_rt_nh = kzalloc(sizeof(*fib6_rt_nh), GFP_KERNEL); if (!fib6_rt_nh) return -ENOMEM; fib6_info_hold(rt); fib6_rt_nh->rt = rt; list_add_tail(&fib6_rt_nh->list, &fib6_rt->nh_list); fib6_rt->nhs++; return 0; } #if IS_ENABLED(CONFIG_IPV6) static void nsim_rt6_release(struct fib6_info *rt) { fib6_info_release(rt); } #else static void nsim_rt6_release(struct fib6_info *rt) { } #endif static void nsim_fib6_rt_nh_del(struct nsim_fib6_rt *fib6_rt, const struct fib6_info *rt) { struct nsim_fib6_rt_nh *fib6_rt_nh; fib6_rt_nh = nsim_fib6_rt_nh_find(fib6_rt, rt); if (!fib6_rt_nh) return; fib6_rt->nhs--; list_del(&fib6_rt_nh->list); nsim_rt6_release(fib6_rt_nh->rt); kfree(fib6_rt_nh); } static struct nsim_fib6_rt * nsim_fib6_rt_create(struct nsim_fib_data *data, struct fib6_info **rt_arr, unsigned int nrt6) { struct fib6_info *rt = rt_arr[0]; struct nsim_fib6_rt *fib6_rt; int i = 0; int err; fib6_rt = kzalloc(sizeof(*fib6_rt), GFP_KERNEL); if (!fib6_rt) return ERR_PTR(-ENOMEM); nsim_fib_rt_init(data, &fib6_rt->common, &rt->fib6_dst.addr, sizeof(rt->fib6_dst.addr), rt->fib6_dst.plen, AF_INET6, rt->fib6_table->tb6_id); /* We consider a multipath IPv6 route as one entry, but it can be made * up from several fib6_info structs (one for each nexthop), so we * add them all to the same list under the entry. */ INIT_LIST_HEAD(&fib6_rt->nh_list); for (i = 0; i < nrt6; i++) { err = nsim_fib6_rt_nh_add(fib6_rt, rt_arr[i]); if (err) goto err_fib6_rt_nh_del; } return fib6_rt; err_fib6_rt_nh_del: for (i--; i >= 0; i--) { nsim_fib6_rt_nh_del(fib6_rt, rt_arr[i]); } nsim_fib_rt_fini(&fib6_rt->common); kfree(fib6_rt); return ERR_PTR(err); } static void nsim_fib6_rt_destroy(struct nsim_fib6_rt *fib6_rt) { struct nsim_fib6_rt_nh *iter, *tmp; list_for_each_entry_safe(iter, tmp, &fib6_rt->nh_list, list) nsim_fib6_rt_nh_del(fib6_rt, iter->rt); WARN_ON_ONCE(!list_empty(&fib6_rt->nh_list)); nsim_fib_rt_fini(&fib6_rt->common); kfree(fib6_rt); } static struct nsim_fib6_rt * nsim_fib6_rt_lookup(struct rhashtable *fib_rt_ht, const struct fib6_info *rt) { struct nsim_fib_rt *fib_rt; fib_rt = nsim_fib_rt_lookup(fib_rt_ht, &rt->fib6_dst.addr, sizeof(rt->fib6_dst.addr), rt->fib6_dst.plen, AF_INET6, rt->fib6_table->tb6_id); if (!fib_rt) return NULL; return container_of(fib_rt, struct nsim_fib6_rt, common); } static int nsim_fib6_rt_append(struct nsim_fib_data *data, struct nsim_fib6_event *fib6_event) { struct fib6_info *rt = fib6_event->rt_arr[0]; struct nsim_fib6_rt *fib6_rt; int i, err; if (data->fail_route_offload) { /* For testing purposes, user set debugfs fail_route_offload * value to true. Simulate hardware programming latency and then * fail. */ msleep(1); return -EINVAL; } fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt); if (!fib6_rt) return -EINVAL; for (i = 0; i < fib6_event->nrt6; i++) { err = nsim_fib6_rt_nh_add(fib6_rt, fib6_event->rt_arr[i]); if (err) goto err_fib6_rt_nh_del; WRITE_ONCE(fib6_event->rt_arr[i]->trap, true); } return 0; err_fib6_rt_nh_del: for (i--; i >= 0; i--) { WRITE_ONCE(fib6_event->rt_arr[i]->trap, false); nsim_fib6_rt_nh_del(fib6_rt, fib6_event->rt_arr[i]); } return err; } #if IS_ENABLED(CONFIG_IPV6) static void nsim_fib6_rt_offload_failed_flag_set(struct nsim_fib_data *data, struct fib6_info **rt_arr, unsigned int nrt6) { struct net *net = devlink_net(data->devlink); int i; for (i = 0; i < nrt6; i++) fib6_info_hw_flags_set(net, rt_arr[i], false, false, true); } #else static void nsim_fib6_rt_offload_failed_flag_set(struct nsim_fib_data *data, struct fib6_info **rt_arr, unsigned int nrt6) { } #endif #if IS_ENABLED(CONFIG_IPV6) static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, const struct nsim_fib6_rt *fib6_rt, bool trap) { struct net *net = devlink_net(data->devlink); struct nsim_fib6_rt_nh *fib6_rt_nh; list_for_each_entry(fib6_rt_nh, &fib6_rt->nh_list, list) fib6_info_hw_flags_set(net, fib6_rt_nh->rt, false, trap, false); } #else static void nsim_fib6_rt_hw_flags_set(struct nsim_fib_data *data, const struct nsim_fib6_rt *fib6_rt, bool trap) { } #endif static int nsim_fib6_rt_add(struct nsim_fib_data *data, struct nsim_fib6_rt *fib6_rt) { int err; err = rhashtable_insert_fast(&data->fib_rt_ht, &fib6_rt->common.ht_node, nsim_fib_rt_ht_params); if (err) goto err_fib_dismiss; msleep(1); nsim_fib6_rt_hw_flags_set(data, fib6_rt, true); return 0; err_fib_dismiss: /* Drop the accounting that was increased from the notification * context when FIB_EVENT_ENTRY_REPLACE was triggered. */ nsim_fib_account(&data->ipv6.fib, false); return err; } static int nsim_fib6_rt_replace(struct nsim_fib_data *data, struct nsim_fib6_rt *fib6_rt, struct nsim_fib6_rt *fib6_rt_old) { int err; /* We are replacing a route, so need to remove the accounting which * was increased when FIB_EVENT_ENTRY_REPLACE was triggered. */ err = nsim_fib_account(&data->ipv6.fib, false); if (err) return err; err = rhashtable_replace_fast(&data->fib_rt_ht, &fib6_rt_old->common.ht_node, &fib6_rt->common.ht_node, nsim_fib_rt_ht_params); if (err) return err; msleep(1); nsim_fib6_rt_hw_flags_set(data, fib6_rt, true); nsim_fib6_rt_hw_flags_set(data, fib6_rt_old, false); nsim_fib6_rt_destroy(fib6_rt_old); return 0; } static int nsim_fib6_rt_insert(struct nsim_fib_data *data, struct nsim_fib6_event *fib6_event) { struct fib6_info *rt = fib6_event->rt_arr[0]; struct nsim_fib6_rt *fib6_rt, *fib6_rt_old; int err; if (data->fail_route_offload) { /* For testing purposes, user set debugfs fail_route_offload * value to true. Simulate hardware programming latency and then * fail. */ msleep(1); return -EINVAL; } fib6_rt = nsim_fib6_rt_create(data, fib6_event->rt_arr, fib6_event->nrt6); if (IS_ERR(fib6_rt)) return PTR_ERR(fib6_rt); fib6_rt_old = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt); if (!fib6_rt_old) err = nsim_fib6_rt_add(data, fib6_rt); else err = nsim_fib6_rt_replace(data, fib6_rt, fib6_rt_old); if (err) nsim_fib6_rt_destroy(fib6_rt); return err; } static void nsim_fib6_rt_remove(struct nsim_fib_data *data, struct nsim_fib6_event *fib6_event) { struct fib6_info *rt = fib6_event->rt_arr[0]; struct nsim_fib6_rt *fib6_rt; int i; /* Multipath routes are first added to the FIB trie and only then * notified. If we vetoed the addition, we will get a delete * notification for a route we do not have. Therefore, do not warn if * route was not found. */ fib6_rt = nsim_fib6_rt_lookup(&data->fib_rt_ht, rt); if (!fib6_rt) return; /* If not all the nexthops are deleted, then only reduce the nexthop * group. */ if (fib6_event->nrt6 != fib6_rt->nhs) { for (i = 0; i < fib6_event->nrt6; i++) nsim_fib6_rt_nh_del(fib6_rt, fib6_event->rt_arr[i]); return; } rhashtable_remove_fast(&data->fib_rt_ht, &fib6_rt->common.ht_node, nsim_fib_rt_ht_params); nsim_fib6_rt_destroy(fib6_rt); } static int nsim_fib6_event_init(struct nsim_fib6_event *fib6_event, struct fib6_entry_notifier_info *fen6_info) { struct fib6_info *rt = fen6_info->rt; struct fib6_info **rt_arr; struct fib6_info *iter; unsigned int nrt6; int i = 0; nrt6 = fen6_info->nsiblings + 1; rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC); if (!rt_arr) return -ENOMEM; fib6_event->rt_arr = rt_arr; fib6_event->nrt6 = nrt6; rt_arr[0] = rt; fib6_info_hold(rt); if (!fen6_info->nsiblings) return 0; list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { if (i == fen6_info->nsiblings) break; rt_arr[i + 1] = iter; fib6_info_hold(iter); i++; } WARN_ON_ONCE(i != fen6_info->nsiblings); return 0; } static void nsim_fib6_event_fini(struct nsim_fib6_event *fib6_event) { int i; for (i = 0; i < fib6_event->nrt6; i++) nsim_rt6_release(fib6_event->rt_arr[i]); kfree(fib6_event->rt_arr); } static int nsim_fib6_event(struct nsim_fib_data *data, struct nsim_fib6_event *fib6_event, unsigned long event) { int err; if (fib6_event->rt_arr[0]->fib6_src.plen) return 0; switch (event) { case FIB_EVENT_ENTRY_REPLACE: err = nsim_fib6_rt_insert(data, fib6_event); if (err) goto err_rt_offload_failed_flag_set; break; case FIB_EVENT_ENTRY_APPEND: err = nsim_fib6_rt_append(data, fib6_event); if (err) goto err_rt_offload_failed_flag_set; break; case FIB_EVENT_ENTRY_DEL: nsim_fib6_rt_remove(data, fib6_event); break; default: break; } return 0; err_rt_offload_failed_flag_set: nsim_fib6_rt_offload_failed_flag_set(data, fib6_event->rt_arr, fib6_event->nrt6); return err; } static void nsim_fib_event(struct nsim_fib_event *fib_event) { switch (fib_event->family) { case AF_INET: nsim_fib4_event(fib_event->data, &fib_event->fen_info, fib_event->event); fib_info_put(fib_event->fen_info.fi); break; case AF_INET6: nsim_fib6_event(fib_event->data, &fib_event->fib6_event, fib_event->event); nsim_fib6_event_fini(&fib_event->fib6_event); break; } } static int nsim_fib4_prepare_event(struct fib_notifier_info *info, struct nsim_fib_event *fib_event, unsigned long event) { struct nsim_fib_data *data = fib_event->data; struct fib_entry_notifier_info *fen_info; struct netlink_ext_ack *extack; int err = 0; fen_info = container_of(info, struct fib_entry_notifier_info, info); fib_event->fen_info = *fen_info; extack = info->extack; switch (event) { case FIB_EVENT_ENTRY_REPLACE: err = nsim_fib_account(&data->ipv4.fib, true); if (err) { NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries"); return err; } break; case FIB_EVENT_ENTRY_DEL: if (data->fail_route_delete) { NL_SET_ERR_MSG_MOD(extack, "Failed to process route deletion"); return -EINVAL; } nsim_fib_account(&data->ipv4.fib, false); break; } /* Take reference on fib_info to prevent it from being * freed while event is queued. Release it afterwards. */ fib_info_hold(fib_event->fen_info.fi); return 0; } static int nsim_fib6_prepare_event(struct fib_notifier_info *info, struct nsim_fib_event *fib_event, unsigned long event) { struct nsim_fib_data *data = fib_event->data; struct fib6_entry_notifier_info *fen6_info; struct netlink_ext_ack *extack; int err = 0; fen6_info = container_of(info, struct fib6_entry_notifier_info, info); err = nsim_fib6_event_init(&fib_event->fib6_event, fen6_info); if (err) return err; extack = info->extack; switch (event) { case FIB_EVENT_ENTRY_REPLACE: err = nsim_fib_account(&data->ipv6.fib, true); if (err) { NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported fib entries"); goto err_fib6_event_fini; } break; case FIB_EVENT_ENTRY_DEL: if (data->fail_route_delete) { err = -EINVAL; NL_SET_ERR_MSG_MOD(extack, "Failed to process route deletion"); goto err_fib6_event_fini; } nsim_fib_account(&data->ipv6.fib, false); break; } return 0; err_fib6_event_fini: nsim_fib6_event_fini(&fib_event->fib6_event); return err; } static int nsim_fib_event_schedule_work(struct nsim_fib_data *data, struct fib_notifier_info *info, unsigned long event) { struct nsim_fib_event *fib_event; int err; if (info->family != AF_INET && info->family != AF_INET6) /* netdevsim does not support 'RTNL_FAMILY_IP6MR' and * 'RTNL_FAMILY_IPMR' and should ignore them. */ return NOTIFY_DONE; fib_event = kzalloc(sizeof(*fib_event), GFP_ATOMIC); if (!fib_event) goto err_fib_event_alloc; fib_event->data = data; fib_event->event = event; fib_event->family = info->family; switch (info->family) { case AF_INET: err = nsim_fib4_prepare_event(info, fib_event, event); break; case AF_INET6: err = nsim_fib6_prepare_event(info, fib_event, event); break; } if (err) goto err_fib_prepare_event; /* Enqueue the event and trigger the work */ spin_lock_bh(&data->fib_event_queue_lock); list_add_tail(&fib_event->list, &data->fib_event_queue); spin_unlock_bh(&data->fib_event_queue_lock); schedule_work(&data->fib_event_work); return NOTIFY_DONE; err_fib_prepare_event: kfree(fib_event); err_fib_event_alloc: if (event == FIB_EVENT_ENTRY_DEL) schedule_work(&data->fib_flush_work); return NOTIFY_BAD; } static int nsim_fib_event_nb(struct notifier_block *nb, unsigned long event, void *ptr) { struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, fib_nb); struct fib_notifier_info *info = ptr; int err; switch (event) { case FIB_EVENT_RULE_ADD: case FIB_EVENT_RULE_DEL: err = nsim_fib_rule_event(data, info, event == FIB_EVENT_RULE_ADD); return notifier_from_errno(err); case FIB_EVENT_ENTRY_REPLACE: case FIB_EVENT_ENTRY_APPEND: case FIB_EVENT_ENTRY_DEL: return nsim_fib_event_schedule_work(data, info, event); } return NOTIFY_DONE; } static void nsim_fib4_rt_free(struct nsim_fib_rt *fib_rt, struct nsim_fib_data *data) { struct devlink *devlink = data->devlink; struct nsim_fib4_rt *fib4_rt; fib4_rt = container_of(fib_rt, struct nsim_fib4_rt, common); nsim_fib4_rt_hw_flags_set(devlink_net(devlink), fib4_rt, false); nsim_fib_account(&data->ipv4.fib, false); nsim_fib4_rt_destroy(fib4_rt); } static void nsim_fib6_rt_free(struct nsim_fib_rt *fib_rt, struct nsim_fib_data *data) { struct nsim_fib6_rt *fib6_rt; fib6_rt = container_of(fib_rt, struct nsim_fib6_rt, common); nsim_fib6_rt_hw_flags_set(data, fib6_rt, false); nsim_fib_account(&data->ipv6.fib, false); nsim_fib6_rt_destroy(fib6_rt); } static void nsim_fib_rt_free(void *ptr, void *arg) { struct nsim_fib_rt *fib_rt = ptr; struct nsim_fib_data *data = arg; switch (fib_rt->key.family) { case AF_INET: nsim_fib4_rt_free(fib_rt, data); break; case AF_INET6: nsim_fib6_rt_free(fib_rt, data); break; default: WARN_ON_ONCE(1); } } /* inconsistent dump, trying again */ static void nsim_fib_dump_inconsistent(struct notifier_block *nb) { struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, fib_nb); struct nsim_fib_rt *fib_rt, *fib_rt_tmp; /* Flush the work to make sure there is no race with notifications. */ flush_work(&data->fib_event_work); /* The notifier block is still not registered, so we do not need to * take any locks here. */ list_for_each_entry_safe(fib_rt, fib_rt_tmp, &data->fib_rt_list, list) { rhashtable_remove_fast(&data->fib_rt_ht, &fib_rt->ht_node, nsim_fib_rt_ht_params); nsim_fib_rt_free(fib_rt, data); } atomic64_set(&data->ipv4.rules.num, 0ULL); atomic64_set(&data->ipv6.rules.num, 0ULL); } static struct nsim_nexthop *nsim_nexthop_create(struct nsim_fib_data *data, struct nh_notifier_info *info) { struct nsim_nexthop *nexthop; u64 occ = 0; int i; nexthop = kzalloc(sizeof(*nexthop), GFP_KERNEL); if (!nexthop) return ERR_PTR(-ENOMEM); nexthop->id = info->id; /* Determine the number of nexthop entries the new nexthop will * occupy. */ switch (info->type) { case NH_NOTIFIER_INFO_TYPE_SINGLE: occ = 1; break; case NH_NOTIFIER_INFO_TYPE_GRP: for (i = 0; i < info->nh_grp->num_nh; i++) occ += info->nh_grp->nh_entries[i].weight; break; case NH_NOTIFIER_INFO_TYPE_RES_TABLE: occ = info->nh_res_table->num_nh_buckets; nexthop->is_resilient = true; break; default: NL_SET_ERR_MSG_MOD(info->extack, "Unsupported nexthop type"); kfree(nexthop); return ERR_PTR(-EOPNOTSUPP); } nexthop->occ = occ; return nexthop; } static void nsim_nexthop_destroy(struct nsim_nexthop *nexthop) { kfree(nexthop); } static int nsim_nexthop_account(struct nsim_fib_data *data, u64 occ, bool add, struct netlink_ext_ack *extack) { int i, err = 0; if (add) { for (i = 0; i < occ; i++) if (!atomic64_add_unless(&data->nexthops.num, 1, data->nexthops.max)) { err = -ENOSPC; NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported nexthops"); goto err_num_decrease; } } else { if (WARN_ON(occ > atomic64_read(&data->nexthops.num))) return -EINVAL; atomic64_sub(occ, &data->nexthops.num); } return err; err_num_decrease: atomic64_sub(i, &data->nexthops.num); return err; } static void nsim_nexthop_hw_flags_set(struct net *net, const struct nsim_nexthop *nexthop, bool trap) { int i; nexthop_set_hw_flags(net, nexthop->id, false, trap); if (!nexthop->is_resilient) return; for (i = 0; i < nexthop->occ; i++) nexthop_bucket_set_hw_flags(net, nexthop->id, i, false, trap); } static int nsim_nexthop_add(struct nsim_fib_data *data, struct nsim_nexthop *nexthop, struct netlink_ext_ack *extack) { struct net *net = devlink_net(data->devlink); int err; err = nsim_nexthop_account(data, nexthop->occ, true, extack); if (err) return err; err = rhashtable_insert_fast(&data->nexthop_ht, &nexthop->ht_node, nsim_nexthop_ht_params); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to insert nexthop"); goto err_nexthop_dismiss; } nsim_nexthop_hw_flags_set(net, nexthop, true); return 0; err_nexthop_dismiss: nsim_nexthop_account(data, nexthop->occ, false, extack); return err; } static int nsim_nexthop_replace(struct nsim_fib_data *data, struct nsim_nexthop *nexthop, struct nsim_nexthop *nexthop_old, struct netlink_ext_ack *extack) { struct net *net = devlink_net(data->devlink); int err; err = nsim_nexthop_account(data, nexthop->occ, true, extack); if (err) return err; err = rhashtable_replace_fast(&data->nexthop_ht, &nexthop_old->ht_node, &nexthop->ht_node, nsim_nexthop_ht_params); if (err) { NL_SET_ERR_MSG_MOD(extack, "Failed to replace nexthop"); goto err_nexthop_dismiss; } nsim_nexthop_hw_flags_set(net, nexthop, true); nsim_nexthop_account(data, nexthop_old->occ, false, extack); nsim_nexthop_destroy(nexthop_old); return 0; err_nexthop_dismiss: nsim_nexthop_account(data, nexthop->occ, false, extack); return err; } static int nsim_nexthop_insert(struct nsim_fib_data *data, struct nh_notifier_info *info) { struct nsim_nexthop *nexthop, *nexthop_old; int err; nexthop = nsim_nexthop_create(data, info); if (IS_ERR(nexthop)) return PTR_ERR(nexthop); nexthop_old = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, nsim_nexthop_ht_params); if (!nexthop_old) err = nsim_nexthop_add(data, nexthop, info->extack); else err = nsim_nexthop_replace(data, nexthop, nexthop_old, info->extack); if (err) nsim_nexthop_destroy(nexthop); return err; } static void nsim_nexthop_remove(struct nsim_fib_data *data, struct nh_notifier_info *info) { struct nsim_nexthop *nexthop; nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &info->id, nsim_nexthop_ht_params); if (!nexthop) return; rhashtable_remove_fast(&data->nexthop_ht, &nexthop->ht_node, nsim_nexthop_ht_params); nsim_nexthop_account(data, nexthop->occ, false, info->extack); nsim_nexthop_destroy(nexthop); } static int nsim_nexthop_res_table_pre_replace(struct nsim_fib_data *data, struct nh_notifier_info *info) { if (data->fail_res_nexthop_group_replace) { NL_SET_ERR_MSG_MOD(info->extack, "Failed to replace a resilient nexthop group"); return -EINVAL; } return 0; } static int nsim_nexthop_bucket_replace(struct nsim_fib_data *data, struct nh_notifier_info *info) { if (data->fail_nexthop_bucket_replace) { NL_SET_ERR_MSG_MOD(info->extack, "Failed to replace nexthop bucket"); return -EINVAL; } nexthop_bucket_set_hw_flags(info->net, info->id, info->nh_res_bucket->bucket_index, false, true); return 0; } static int nsim_nexthop_event_nb(struct notifier_block *nb, unsigned long event, void *ptr) { struct nsim_fib_data *data = container_of(nb, struct nsim_fib_data, nexthop_nb); struct nh_notifier_info *info = ptr; int err = 0; mutex_lock(&data->nh_lock); switch (event) { case NEXTHOP_EVENT_REPLACE: err = nsim_nexthop_insert(data, info); break; case NEXTHOP_EVENT_DEL: nsim_nexthop_remove(data, info); break; case NEXTHOP_EVENT_RES_TABLE_PRE_REPLACE: err = nsim_nexthop_res_table_pre_replace(data, info); break; case NEXTHOP_EVENT_BUCKET_REPLACE: err = nsim_nexthop_bucket_replace(data, info); break; default: break; } mutex_unlock(&data->nh_lock); return notifier_from_errno(err); } static void nsim_nexthop_free(void *ptr, void *arg) { struct nsim_nexthop *nexthop = ptr; struct nsim_fib_data *data = arg; struct net *net; net = devlink_net(data->devlink); nsim_nexthop_hw_flags_set(net, nexthop, false); nsim_nexthop_account(data, nexthop->occ, false, NULL); nsim_nexthop_destroy(nexthop); } static ssize_t nsim_nexthop_bucket_activity_write(struct file *file, const char __user *user_buf, size_t size, loff_t *ppos) { struct nsim_fib_data *data = file->private_data; struct net *net = devlink_net(data->devlink); struct nsim_nexthop *nexthop; unsigned long *activity; loff_t pos = *ppos; u16 bucket_index; char buf[128]; int err = 0; u32 nhid; if (pos != 0) return -EINVAL; if (size > sizeof(buf)) return -EINVAL; if (copy_from_user(buf, user_buf, size)) return -EFAULT; if (sscanf(buf, "%u %hu", &nhid, &bucket_index) != 2) return -EINVAL; rtnl_lock(); nexthop = rhashtable_lookup_fast(&data->nexthop_ht, &nhid, nsim_nexthop_ht_params); if (!nexthop || !nexthop->is_resilient || bucket_index >= nexthop->occ) { err = -EINVAL; goto out; } activity = bitmap_zalloc(nexthop->occ, GFP_KERNEL); if (!activity) { err = -ENOMEM; goto out; } bitmap_set(activity, bucket_index, 1); nexthop_res_grp_activity_update(net, nhid, nexthop->occ, activity); bitmap_free(activity); out: rtnl_unlock(); *ppos = size; return err ?: size; } static const struct file_operations nsim_nexthop_bucket_activity_fops = { .open = simple_open, .write = nsim_nexthop_bucket_activity_write, .llseek = no_llseek, .owner = THIS_MODULE, }; static u64 nsim_fib_ipv4_resource_occ_get(void *priv) { struct nsim_fib_data *data = priv; return nsim_fib_get_val(data, NSIM_RESOURCE_IPV4_FIB, false); } static u64 nsim_fib_ipv4_rules_res_occ_get(void *priv) { struct nsim_fib_data *data = priv; return nsim_fib_get_val(data, NSIM_RESOURCE_IPV4_FIB_RULES, false); } static u64 nsim_fib_ipv6_resource_occ_get(void *priv) { struct nsim_fib_data *data = priv; return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB, false); } static u64 nsim_fib_ipv6_rules_res_occ_get(void *priv) { struct nsim_fib_data *data = priv; return nsim_fib_get_val(data, NSIM_RESOURCE_IPV6_FIB_RULES, false); } static u64 nsim_fib_nexthops_res_occ_get(void *priv) { struct nsim_fib_data *data = priv; return nsim_fib_get_val(data, NSIM_RESOURCE_NEXTHOPS, false); } static void nsim_fib_set_max_all(struct nsim_fib_data *data, struct devlink *devlink) { static const enum nsim_resource_id res_ids[] = { NSIM_RESOURCE_IPV4_FIB, NSIM_RESOURCE_IPV4_FIB_RULES, NSIM_RESOURCE_IPV6_FIB, NSIM_RESOURCE_IPV6_FIB_RULES, NSIM_RESOURCE_NEXTHOPS, }; int i; for (i = 0; i < ARRAY_SIZE(res_ids); i++) { int err; u64 val; err = devl_resource_size_get(devlink, res_ids[i], &val); if (err) val = (u64) -1; nsim_fib_set_max(data, res_ids[i], val); } } static void nsim_fib_event_work(struct work_struct *work) { struct nsim_fib_data *data = container_of(work, struct nsim_fib_data, fib_event_work); struct nsim_fib_event *fib_event, *next_fib_event; LIST_HEAD(fib_event_queue); spin_lock_bh(&data->fib_event_queue_lock); list_splice_init(&data->fib_event_queue, &fib_event_queue); spin_unlock_bh(&data->fib_event_queue_lock); mutex_lock(&data->fib_lock); list_for_each_entry_safe(fib_event, next_fib_event, &fib_event_queue, list) { nsim_fib_event(fib_event); list_del(&fib_event->list); kfree(fib_event); cond_resched(); } mutex_unlock(&data->fib_lock); } static void nsim_fib_flush_work(struct work_struct *work) { struct nsim_fib_data *data = container_of(work, struct nsim_fib_data, fib_flush_work); struct nsim_fib_rt *fib_rt, *fib_rt_tmp; /* Process pending work. */ flush_work(&data->fib_event_work); mutex_lock(&data->fib_lock); list_for_each_entry_safe(fib_rt, fib_rt_tmp, &data->fib_rt_list, list) { rhashtable_remove_fast(&data->fib_rt_ht, &fib_rt->ht_node, nsim_fib_rt_ht_params); nsim_fib_rt_free(fib_rt, data); } mutex_unlock(&data->fib_lock); } static int nsim_fib_debugfs_init(struct nsim_fib_data *data, struct nsim_dev *nsim_dev) { data->ddir = debugfs_create_dir("fib", nsim_dev->ddir); if (IS_ERR(data->ddir)) return PTR_ERR(data->ddir); data->fail_route_offload = false; debugfs_create_bool("fail_route_offload", 0600, data->ddir, &data->fail_route_offload); data->fail_res_nexthop_group_replace = false; debugfs_create_bool("fail_res_nexthop_group_replace", 0600, data->ddir, &data->fail_res_nexthop_group_replace); data->fail_nexthop_bucket_replace = false; debugfs_create_bool("fail_nexthop_bucket_replace", 0600, data->ddir, &data->fail_nexthop_bucket_replace); debugfs_create_file("nexthop_bucket_activity", 0200, data->ddir, data, &nsim_nexthop_bucket_activity_fops); data->fail_route_delete = false; debugfs_create_bool("fail_route_delete", 0600, data->ddir, &data->fail_route_delete); return 0; } static void nsim_fib_debugfs_exit(struct nsim_fib_data *data) { debugfs_remove_recursive(data->ddir); } struct nsim_fib_data *nsim_fib_create(struct devlink *devlink, struct netlink_ext_ack *extack) { struct nsim_fib_data *data; struct nsim_dev *nsim_dev; int err; data = kzalloc(sizeof(*data), GFP_KERNEL); if (!data) return ERR_PTR(-ENOMEM); data->devlink = devlink; nsim_dev = devlink_priv(devlink); err = nsim_fib_debugfs_init(data, nsim_dev); if (err) goto err_data_free; mutex_init(&data->nh_lock); err = rhashtable_init(&data->nexthop_ht, &nsim_nexthop_ht_params); if (err) goto err_debugfs_exit; mutex_init(&data->fib_lock); INIT_LIST_HEAD(&data->fib_rt_list); err = rhashtable_init(&data->fib_rt_ht, &nsim_fib_rt_ht_params); if (err) goto err_rhashtable_nexthop_destroy; INIT_WORK(&data->fib_event_work, nsim_fib_event_work); INIT_WORK(&data->fib_flush_work, nsim_fib_flush_work); INIT_LIST_HEAD(&data->fib_event_queue); spin_lock_init(&data->fib_event_queue_lock); nsim_fib_set_max_all(data, devlink); data->nexthop_nb.notifier_call = nsim_nexthop_event_nb; err = register_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb, extack); if (err) { pr_err("Failed to register nexthop notifier\n"); goto err_rhashtable_fib_destroy; } data->fib_nb.notifier_call = nsim_fib_event_nb; err = register_fib_notifier(devlink_net(devlink), &data->fib_nb, nsim_fib_dump_inconsistent, extack); if (err) { pr_err("Failed to register fib notifier\n"); goto err_nexthop_nb_unregister; } devl_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB, nsim_fib_ipv4_resource_occ_get, data); devl_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV4_FIB_RULES, nsim_fib_ipv4_rules_res_occ_get, data); devl_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB, nsim_fib_ipv6_resource_occ_get, data); devl_resource_occ_get_register(devlink, NSIM_RESOURCE_IPV6_FIB_RULES, nsim_fib_ipv6_rules_res_occ_get, data); devl_resource_occ_get_register(devlink, NSIM_RESOURCE_NEXTHOPS, nsim_fib_nexthops_res_occ_get, data); return data; err_nexthop_nb_unregister: unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); err_rhashtable_fib_destroy: cancel_work_sync(&data->fib_flush_work); flush_work(&data->fib_event_work); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); err_rhashtable_nexthop_destroy: rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, data); mutex_destroy(&data->fib_lock); err_debugfs_exit: mutex_destroy(&data->nh_lock); nsim_fib_debugfs_exit(data); err_data_free: kfree(data); return ERR_PTR(err); } void nsim_fib_destroy(struct devlink *devlink, struct nsim_fib_data *data) { devl_resource_occ_get_unregister(devlink, NSIM_RESOURCE_NEXTHOPS); devl_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV6_FIB_RULES); devl_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV6_FIB); devl_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV4_FIB_RULES); devl_resource_occ_get_unregister(devlink, NSIM_RESOURCE_IPV4_FIB); unregister_fib_notifier(devlink_net(devlink), &data->fib_nb); unregister_nexthop_notifier(devlink_net(devlink), &data->nexthop_nb); cancel_work_sync(&data->fib_flush_work); flush_work(&data->fib_event_work); rhashtable_free_and_destroy(&data->fib_rt_ht, nsim_fib_rt_free, data); rhashtable_free_and_destroy(&data->nexthop_ht, nsim_nexthop_free, data); WARN_ON_ONCE(!list_empty(&data->fib_event_queue)); WARN_ON_ONCE(!list_empty(&data->fib_rt_list)); mutex_destroy(&data->fib_lock); mutex_destroy(&data->nh_lock); nsim_fib_debugfs_exit(data); kfree(data); } |
| 1 3 3 3 3 5 1 1 1 2 3 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 | /* * CTS: Cipher Text Stealing mode * * COPYRIGHT (c) 2008 * The Regents of the University of Michigan * ALL RIGHTS RESERVED * * Permission is granted to use, copy, create derivative works * and redistribute this software and such derivative works * for any purpose, so long as the name of The University of * Michigan is not used in any advertising or publicity * pertaining to the use of distribution of this software * without specific, written prior authorization. If the * above copyright notice or any other identification of the * University of Michigan is included in any copy of any * portion of this software, then the disclaimer below must * also be included. * * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF * SUCH DAMAGES. */ /* Derived from various: * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ /* * This is the Cipher Text Stealing mode as described by * Section 8 of rfc2040 and referenced by rfc3962. * rfc3962 includes errata information in its Appendix A. */ #include <crypto/algapi.h> #include <crypto/internal/skcipher.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/log2.h> #include <linux/module.h> #include <linux/scatterlist.h> #include <crypto/scatterwalk.h> #include <linux/slab.h> #include <linux/compiler.h> struct crypto_cts_ctx { struct crypto_skcipher *child; }; struct crypto_cts_reqctx { struct scatterlist sg[2]; unsigned offset; struct skcipher_request subreq; }; static inline u8 *crypto_cts_reqctx_space(struct skcipher_request *req) { struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *child = ctx->child; return PTR_ALIGN((u8 *)(rctx + 1) + crypto_skcipher_reqsize(child), crypto_skcipher_alignmask(tfm) + 1); } static int crypto_cts_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(parent); struct crypto_skcipher *child = ctx->child; crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(child, crypto_skcipher_get_flags(parent) & CRYPTO_TFM_REQ_MASK); return crypto_skcipher_setkey(child, key, keylen); } static void cts_cbc_crypt_done(void *data, int err) { struct skcipher_request *req = data; if (err == -EINPROGRESS) return; skcipher_request_complete(req, err); } static int cts_cbc_encrypt(struct skcipher_request *req) { struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); u8 d[MAX_CIPHER_BLOCKSIZE * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; int lastn; offset = rctx->offset; lastn = req->cryptlen - offset; sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - bsize); scatterwalk_map_and_copy(d + bsize, sg, 0, bsize, 0); memset(d, 0, bsize); scatterwalk_map_and_copy(d, req->src, offset, lastn, 0); scatterwalk_map_and_copy(d, sg, 0, bsize + lastn, 1); memzero_explicit(d, sizeof(d)); skcipher_request_set_callback(subreq, req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG, cts_cbc_crypt_done, req); skcipher_request_set_crypt(subreq, sg, sg, bsize, req->iv); return crypto_skcipher_encrypt(subreq); } static void crypto_cts_encrypt_done(void *data, int err) { struct skcipher_request *req = data; if (err) goto out; err = cts_cbc_encrypt(req); if (err == -EINPROGRESS || err == -EBUSY) return; out: skcipher_request_complete(req, err); } static int crypto_cts_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); unsigned int nbytes = req->cryptlen; unsigned int offset; skcipher_request_set_tfm(subreq, ctx->child); if (nbytes < bsize) return -EINVAL; if (nbytes == bsize) { skcipher_request_set_callback(subreq, req->base.flags, req->base.complete, req->base.data); skcipher_request_set_crypt(subreq, req->src, req->dst, nbytes, req->iv); return crypto_skcipher_encrypt(subreq); } offset = rounddown(nbytes - 1, bsize); rctx->offset = offset; skcipher_request_set_callback(subreq, req->base.flags, crypto_cts_encrypt_done, req); skcipher_request_set_crypt(subreq, req->src, req->dst, offset, req->iv); return crypto_skcipher_encrypt(subreq) ?: cts_cbc_encrypt(req); } static int cts_cbc_decrypt(struct skcipher_request *req) { struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); u8 d[MAX_CIPHER_BLOCKSIZE * 2] __aligned(__alignof__(u32)); struct scatterlist *sg; unsigned int offset; u8 *space; int lastn; offset = rctx->offset; lastn = req->cryptlen - offset; sg = scatterwalk_ffwd(rctx->sg, req->dst, offset - bsize); /* 1. Decrypt Cn-1 (s) to create Dn */ scatterwalk_map_and_copy(d + bsize, sg, 0, bsize, 0); space = crypto_cts_reqctx_space(req); crypto_xor(d + bsize, space, bsize); /* 2. Pad Cn with zeros at the end to create C of length BB */ memset(d, 0, bsize); scatterwalk_map_and_copy(d, req->src, offset, lastn, 0); /* 3. Exclusive-or Dn with C to create Xn */ /* 4. Select the first Ln bytes of Xn to create Pn */ crypto_xor(d + bsize, d, lastn); /* 5. Append the tail (BB - Ln) bytes of Xn to Cn to create En */ memcpy(d + lastn, d + bsize + lastn, bsize - lastn); /* 6. Decrypt En to create Pn-1 */ scatterwalk_map_and_copy(d, sg, 0, bsize + lastn, 1); memzero_explicit(d, sizeof(d)); skcipher_request_set_callback(subreq, req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG, cts_cbc_crypt_done, req); skcipher_request_set_crypt(subreq, sg, sg, bsize, space); return crypto_skcipher_decrypt(subreq); } static void crypto_cts_decrypt_done(void *data, int err) { struct skcipher_request *req = data; if (err) goto out; err = cts_cbc_decrypt(req); if (err == -EINPROGRESS || err == -EBUSY) return; out: skcipher_request_complete(req, err); } static int crypto_cts_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct crypto_cts_reqctx *rctx = skcipher_request_ctx(req); struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_request *subreq = &rctx->subreq; int bsize = crypto_skcipher_blocksize(tfm); unsigned int nbytes = req->cryptlen; unsigned int offset; u8 *space; skcipher_request_set_tfm(subreq, ctx->child); if (nbytes < bsize) return -EINVAL; if (nbytes == bsize) { skcipher_request_set_callback(subreq, req->base.flags, req->base.complete, req->base.data); skcipher_request_set_crypt(subreq, req->src, req->dst, nbytes, req->iv); return crypto_skcipher_decrypt(subreq); } skcipher_request_set_callback(subreq, req->base.flags, crypto_cts_decrypt_done, req); space = crypto_cts_reqctx_space(req); offset = rounddown(nbytes - 1, bsize); rctx->offset = offset; if (offset <= bsize) memcpy(space, req->iv, bsize); else scatterwalk_map_and_copy(space, req->src, offset - 2 * bsize, bsize, 0); skcipher_request_set_crypt(subreq, req->src, req->dst, offset, req->iv); return crypto_skcipher_decrypt(subreq) ?: cts_cbc_decrypt(req); } static int crypto_cts_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct crypto_skcipher_spawn *spawn = skcipher_instance_ctx(inst); struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *cipher; unsigned reqsize; unsigned bsize; unsigned align; cipher = crypto_spawn_skcipher(spawn); if (IS_ERR(cipher)) return PTR_ERR(cipher); ctx->child = cipher; align = crypto_skcipher_alignmask(tfm); bsize = crypto_skcipher_blocksize(cipher); reqsize = ALIGN(sizeof(struct crypto_cts_reqctx) + crypto_skcipher_reqsize(cipher), crypto_tfm_ctx_alignment()) + (align & ~(crypto_tfm_ctx_alignment() - 1)) + bsize; crypto_skcipher_set_reqsize(tfm, reqsize); return 0; } static void crypto_cts_exit_tfm(struct crypto_skcipher *tfm) { struct crypto_cts_ctx *ctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(ctx->child); } static void crypto_cts_free(struct skcipher_instance *inst) { crypto_drop_skcipher(skcipher_instance_ctx(inst)); kfree(inst); } static int crypto_cts_create(struct crypto_template *tmpl, struct rtattr **tb) { struct crypto_skcipher_spawn *spawn; struct skcipher_alg_common *alg; struct skcipher_instance *inst; u32 mask; int err; err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_SKCIPHER, &mask); if (err) return err; inst = kzalloc(sizeof(*inst) + sizeof(*spawn), GFP_KERNEL); if (!inst) return -ENOMEM; spawn = skcipher_instance_ctx(inst); err = crypto_grab_skcipher(spawn, skcipher_crypto_instance(inst), crypto_attr_alg_name(tb[1]), 0, mask); if (err) goto err_free_inst; alg = crypto_spawn_skcipher_alg_common(spawn); err = -EINVAL; if (alg->ivsize != alg->base.cra_blocksize) goto err_free_inst; if (strncmp(alg->base.cra_name, "cbc(", 4)) goto err_free_inst; err = crypto_inst_setname(skcipher_crypto_instance(inst), "cts", &alg->base); if (err) goto err_free_inst; inst->alg.base.cra_priority = alg->base.cra_priority; inst->alg.base.cra_blocksize = alg->base.cra_blocksize; inst->alg.base.cra_alignmask = alg->base.cra_alignmask; inst->alg.ivsize = alg->base.cra_blocksize; inst->alg.chunksize = alg->chunksize; inst->alg.min_keysize = alg->min_keysize; inst->alg.max_keysize = alg->max_keysize; inst->alg.base.cra_ctxsize = sizeof(struct crypto_cts_ctx); inst->alg.init = crypto_cts_init_tfm; inst->alg.exit = crypto_cts_exit_tfm; inst->alg.setkey = crypto_cts_setkey; inst->alg.encrypt = crypto_cts_encrypt; inst->alg.decrypt = crypto_cts_decrypt; inst->free = crypto_cts_free; err = skcipher_register_instance(tmpl, inst); if (err) { err_free_inst: crypto_cts_free(inst); } return err; } static struct crypto_template crypto_cts_tmpl = { .name = "cts", .create = crypto_cts_create, .module = THIS_MODULE, }; static int __init crypto_cts_module_init(void) { return crypto_register_template(&crypto_cts_tmpl); } static void __exit crypto_cts_module_exit(void) { crypto_unregister_template(&crypto_cts_tmpl); } subsys_initcall(crypto_cts_module_init); module_exit(crypto_cts_module_exit); MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("CTS-CBC CipherText Stealing for CBC"); MODULE_ALIAS_CRYPTO("cts"); |
| 17 17 17 23 23 2 85 1 1 81 1 2 1 50 8 26 1 68 4 1 10 14 81 1 12 4 1 1 14 64 64 2 24 41 61 1 8 4 51 3 58 1 1 1 63 63 62 61 62 1 61 62 62 61 62 46 62 73 73 17 1 15 5 11 12 2 2 1 1 1 1 1 7 7 6 7 34 30 62 1 1 2 64 64 64 78 78 78 78 78 64 1 78 77 1 77 1 77 1 78 72 6 53 26 26 62 16 78 78 26 12 4 77 79 2 64 72 4 69 69 14 12 1 1 53 3 1 2 1 1 19 48 14 14 14 52 66 68 12 12 1 1 12 82 74 54 15 39 36 2 181 174 7 47 47 47 12 35 35 146 82 9 63 13 133 13 14 1 2 3 4 3 1 3 1 2 2 4 2 2 162 162 179 178 1 178 172 173 172 151 88 2 25 1 1 23 19 21 3 88 1 1 3 84 82 3 1 1 66 100 6 196 3 194 190 3 185 164 28 123 123 49 1 8 8 5 9 8 1 1 9 3 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 | // SPDX-License-Identifier: GPL-2.0-or-later /* * TCP over IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Based on: * linux/net/ipv4/tcp.c * linux/net/ipv4/tcp_input.c * linux/net/ipv4/tcp_output.c * * Fixes: * Hideaki YOSHIFUJI : sin6_scope_id support * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind * a single port at the same time. * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file. */ #include <linux/bottom_half.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/jiffies.h> #include <linux/in.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/init.h> #include <linux/jhash.h> #include <linux/ipsec.h> #include <linux/times.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/indirect_call_wrapper.h> #include <net/tcp.h> #include <net/ndisc.h> #include <net/inet6_hashtables.h> #include <net/inet6_connection_sock.h> #include <net/ipv6.h> #include <net/transp_v6.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/ip6_checksum.h> #include <net/inet_ecn.h> #include <net/protocol.h> #include <net/xfrm.h> #include <net/snmp.h> #include <net/dsfield.h> #include <net/timewait_sock.h> #include <net/inet_common.h> #include <net/secure_seq.h> #include <net/hotdata.h> #include <net/busy_poll.h> #include <net/rstreason.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <crypto/hash.h> #include <linux/scatterlist.h> #include <trace/events/tcp.h> static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, enum sk_rst_reason reason); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb); static const struct inet_connection_sock_af_ops ipv6_mapped; const struct inet_connection_sock_af_ops ipv6_specific; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific; static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific; #endif /* Helper returning the inet6 address from a given tcp socket. * It can be used in TCP stack instead of inet6_sk(sk). * This avoids a dereference and allow compiler optimizations. * It is a specialized version of inet6_sk_generic(). */ #define tcp_inet6_sk(sk) (&container_of_const(tcp_sk(sk), \ struct tcp6_sock, tcp)->inet6) static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); if (dst && dst_hold_safe(dst)) { rcu_assign_pointer(sk->sk_rx_dst, dst); sk->sk_rx_dst_ifindex = skb->skb_iif; sk->sk_rx_dst_cookie = rt6_get_cookie(dst_rt6_info(dst)); } } static u32 tcp_v6_init_seq(const struct sk_buff *skb) { return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, tcp_hdr(skb)->dest, tcp_hdr(skb)->source); } static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) { return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32); } static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { /* This check is replicated from tcp_v6_connect() and intended to * prevent BPF program called below from accessing bytes that are out * of the bound specified by user in addr_len. */ if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; sock_owned_by_me(sk); return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr, &addr_len); } static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_connection_sock *icsk = inet_csk(sk); struct in6_addr *saddr = NULL, *final_p, final; struct inet_timewait_death_row *tcp_death_row; struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct ipv6_txoptions *opt; struct dst_entry *dst; struct flowi6 fl6; int addr_type; int err; if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; if (usin->sin6_family != AF_INET6) return -EAFNOSUPPORT; memset(&fl6, 0, sizeof(fl6)); if (inet6_test_bit(SNDFLOW, sk)) { fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; IP6_ECN_flow_init(fl6.flowlabel); if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (IS_ERR(flowlabel)) return -EINVAL; fl6_sock_release(flowlabel); } } /* * connect() to INADDR_ANY means loopback (BSD'ism). */ if (ipv6_addr_any(&usin->sin6_addr)) { if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr)) ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK), &usin->sin6_addr); else usin->sin6_addr = in6addr_loopback; } addr_type = ipv6_addr_type(&usin->sin6_addr); if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type&IPV6_ADDR_LINKLOCAL) { if (addr_len >= sizeof(struct sockaddr_in6) && usin->sin6_scope_id) { /* If interface is set while binding, indices * must coincide. */ if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id)) return -EINVAL; sk->sk_bound_dev_if = usin->sin6_scope_id; } /* Connect to link-local address requires an interface */ if (!sk->sk_bound_dev_if) return -EINVAL; } if (tp->rx_opt.ts_recent_stamp && !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) { tp->rx_opt.ts_recent = 0; tp->rx_opt.ts_recent_stamp = 0; WRITE_ONCE(tp->write_seq, 0); } sk->sk_v6_daddr = usin->sin6_addr; np->flow_label = fl6.flowlabel; /* * TCP over IPv4 */ if (addr_type & IPV6_ADDR_MAPPED) { u32 exthdrlen = icsk->icsk_ext_hdr_len; struct sockaddr_in sin; if (ipv6_only_sock(sk)) return -ENETUNREACH; sin.sin_family = AF_INET; sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin)); if (err) { icsk->icsk_ext_hdr_len = exthdrlen; /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; } np->saddr = sk->sk_v6_rcv_saddr; return err; } if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) saddr = &sk->sk_v6_rcv_saddr; fl6.flowi6_proto = IPPROTO_TCP; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = saddr ? *saddr : np->saddr; fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label); fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = usin->sin6_port; fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sk->sk_uid; opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk)); final_p = fl6_update_dst(&fl6, opt, &final); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } tp->tcp_usec_ts = dst_tcp_usec_ts(dst); tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; if (!saddr) { saddr = &fl6.saddr; err = inet_bhash2_update_saddr(sk, saddr, AF_INET6); if (err) goto failure; } /* set the source address */ np->saddr = *saddr; inet->inet_rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (opt) icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen; tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); err = inet6_hash_connect(tcp_death_row, sk); if (err) goto late_failure; sk_set_txhash(sk); if (likely(!tp->repair)) { if (!tp->write_seq) WRITE_ONCE(tp->write_seq, secure_tcpv6_seq(np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport)); tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32); } if (tcp_fastopen_defer_connect(sk, &err)) return err; if (err) goto late_failure; err = tcp_connect(sk); if (err) goto late_failure; return 0; late_failure: tcp_set_state(sk, TCP_CLOSE); inet_bhash2_reset_saddr(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; return err; } static void tcp_v6_mtu_reduced(struct sock *sk) { struct dst_entry *dst; u32 mtu; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) return; mtu = READ_ONCE(tcp_sk(sk)->mtu_info); /* Drop requests trying to increase our current mss. * Check done in __ip6_rt_update_pmtu() is too late. */ if (tcp_mtu_to_mss(sk, mtu) >= tcp_sk(sk)->mss_cache) return; dst = inet6_csk_update_pmtu(sk, mtu); if (!dst) return; if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) { tcp_sync_mss(sk, dst_mtu(dst)); tcp_simple_retransmit(sk); } } static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct net *net = dev_net(skb->dev); struct request_sock *fastopen; struct ipv6_pinfo *np; struct tcp_sock *tp; __u32 seq, snd_una; struct sock *sk; bool fatal; int err; sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &hdr->daddr, th->dest, &hdr->saddr, ntohs(th->source), skb->dev->ifindex, inet6_sdif(skb)); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return -ENOENT; } if (sk->sk_state == TCP_TIME_WAIT) { /* To increase the counter of ignored icmps for TCP-AO */ tcp_ao_ignore_icmp(sk, AF_INET6, type, code); inet_twsk_put(inet_twsk(sk)); return 0; } seq = ntohl(th->seq); fatal = icmpv6_err_convert(type, code, &err); if (sk->sk_state == TCP_NEW_SYN_RECV) { tcp_req_err(sk, seq, fatal); return 0; } if (tcp_ao_ignore_icmp(sk, AF_INET6, type, code)) { sock_put(sk); return 0; } bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == TCP_CLOSE) goto out; if (static_branch_unlikely(&ip6_min_hopcount)) { /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ if (ipv6_hdr(skb)->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); goto out; } } tp = tcp_sk(sk); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = rcu_dereference(tp->fastopen_rsk); snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && !between(seq, snd_una, tp->snd_nxt)) { __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } np = tcp_inet6_sk(sk); if (type == NDISC_REDIRECT) { if (!sock_owned_by_user(sk)) { struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie); if (dst) dst->ops->redirect(dst, sk, skb); } goto out; } if (type == ICMPV6_PKT_TOOBIG) { u32 mtu = ntohl(info); /* We are not interested in TCP_LISTEN and open_requests * (SYN-ACKs send out by Linux are always <576bytes so * they should go through unfragmented). */ if (sk->sk_state == TCP_LISTEN) goto out; if (!ip6_sk_accept_pmtu(sk)) goto out; if (mtu < IPV6_MIN_MTU) goto out; WRITE_ONCE(tp->mtu_info, mtu); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, &sk->sk_tsq_flags)) sock_hold(sk); goto out; } /* Might be for an request_sock */ switch (sk->sk_state) { case TCP_SYN_SENT: case TCP_SYN_RECV: /* Only in fast or simultaneous open. If a fast open socket is * already accepted it is treated as a connected one below. */ if (fastopen && !fastopen->sk) break; ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th); if (!sock_owned_by_user(sk)) tcp_done_with_error(sk, err); else WRITE_ONCE(sk->sk_err_soft, err); goto out; case TCP_LISTEN: break; default: /* check if this ICMP message allows revert of backoff. * (see RFC 6069) */ if (!fastopen && type == ICMPV6_DEST_UNREACH && code == ICMPV6_NOROUTE) tcp_ld_RTO_revert(sk, seq); } if (!sock_owned_by_user(sk) && inet6_test_bit(RECVERR6, sk)) { WRITE_ONCE(sk->sk_err, err); sk_error_report(sk); } else { WRITE_ONCE(sk->sk_err_soft, err); } out: bh_unlock_sock(sk); sock_put(sk); return 0; } static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, struct flowi *fl, struct request_sock *req, struct tcp_fastopen_cookie *foc, enum tcp_synack_type synack_type, struct sk_buff *syn_skb) { struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct flowi6 *fl6 = &fl->u.ip6; struct sk_buff *skb; int err = -ENOMEM; u8 tclass; /* First, grab a route. */ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req, IPPROTO_TCP)) == NULL) goto done; skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; if (inet6_test_bit(REPFLOW, sk) && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ? (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | (np->tclass & INET_ECN_MASK) : np->tclass; if (!INET_ECN_is_capable(tclass) && tcp_bpf_ca_needs_ecn((struct sock *)req)) tclass |= INET_ECN_ECT_0; rcu_read_lock(); opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark), opt, tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); err = net_xmit_eval(err); } done: return err; } static void tcp_v6_reqsk_destructor(struct request_sock *req) { kfree(inet_rsk(req)->ipv6_opt); consume_skb(inet_rsk(req)->pktopts); } #ifdef CONFIG_TCP_MD5SIG static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk, const struct in6_addr *addr, int l3index) { return tcp_md5_do_lookup(sk, l3index, (union tcp_md5_addr *)addr, AF_INET6); } static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk, const struct sock *addr_sk) { int l3index; l3index = l3mdev_master_ifindex_by_index(sock_net(sk), addr_sk->sk_bound_dev_if); return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr, l3index); } static int tcp_v6_parse_md5_keys(struct sock *sk, int optname, sockptr_t optval, int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; union tcp_ao_addr *addr; int l3index = 0; u8 prefixlen; bool l3flag; u8 flags; if (optlen < sizeof(cmd)) return -EINVAL; if (copy_from_sockptr(&cmd, optval, sizeof(cmd))) return -EFAULT; if (sin6->sin6_family != AF_INET6) return -EINVAL; flags = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; l3flag = cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX; if (optname == TCP_MD5SIG_EXT && cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { prefixlen = cmd.tcpm_prefixlen; if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) && prefixlen > 32)) return -EINVAL; } else { prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128; } if (optname == TCP_MD5SIG_EXT && cmd.tcpm_ifindex && cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) { struct net_device *dev; rcu_read_lock(); dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex); if (dev && netif_is_l3_master(dev)) l3index = dev->ifindex; rcu_read_unlock(); /* ok to reference set/not set outside of rcu; * right now device MUST be an L3 master */ if (!dev || !l3index) return -EINVAL; } if (!cmd.tcpm_keylen) { if (ipv6_addr_v4mapped(&sin6->sin6_addr)) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3], AF_INET, prefixlen, l3index, flags); return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr, AF_INET6, prefixlen, l3index, flags); } if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; if (ipv6_addr_v4mapped(&sin6->sin6_addr)) { addr = (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3]; /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ if (tcp_ao_required(sk, addr, AF_INET, l3flag ? l3index : -1, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } addr = (union tcp_md5_addr *)&sin6->sin6_addr; /* Don't allow keys for peers that have a matching TCP-AO key. * See the comment in tcp_ao_add_cmd() */ if (tcp_ao_required(sk, addr, AF_INET6, l3flag ? l3index : -1, false)) return -EKEYREJECTED; return tcp_md5_do_add(sk, addr, AF_INET6, prefixlen, l3index, flags, cmd.tcpm_key, cmd.tcpm_keylen); } static int tcp_v6_md5_hash_headers(struct tcp_sigpool *hp, const struct in6_addr *daddr, const struct in6_addr *saddr, const struct tcphdr *th, int nbytes) { struct tcp6_pseudohdr *bp; struct scatterlist sg; struct tcphdr *_th; bp = hp->scratch; /* 1. TCP pseudo-header (RFC2460) */ bp->saddr = *saddr; bp->daddr = *daddr; bp->protocol = cpu_to_be32(IPPROTO_TCP); bp->len = cpu_to_be32(nbytes); _th = (struct tcphdr *)(bp + 1); memcpy(_th, th, sizeof(*th)); _th->check = 0; sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th)); ahash_request_set_crypt(hp->req, &sg, NULL, sizeof(*bp) + sizeof(*th)); return crypto_ahash_update(hp->req); } static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, const struct in6_addr *daddr, struct in6_addr *saddr, const struct tcphdr *th) { struct tcp_sigpool hp; if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) goto clear_hash_nostart; if (crypto_ahash_init(hp.req)) goto clear_hash; if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(&hp, key)) goto clear_hash; ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); if (crypto_ahash_final(hp.req)) goto clear_hash; tcp_sigpool_end(&hp); return 0; clear_hash: tcp_sigpool_end(&hp); clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } static int tcp_v6_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb) { const struct tcphdr *th = tcp_hdr(skb); const struct in6_addr *saddr, *daddr; struct tcp_sigpool hp; if (sk) { /* valid for establish/request sockets */ saddr = &sk->sk_v6_rcv_saddr; daddr = &sk->sk_v6_daddr; } else { const struct ipv6hdr *ip6h = ipv6_hdr(skb); saddr = &ip6h->saddr; daddr = &ip6h->daddr; } if (tcp_sigpool_start(tcp_md5_sigpool_id, &hp)) goto clear_hash_nostart; if (crypto_ahash_init(hp.req)) goto clear_hash; if (tcp_v6_md5_hash_headers(&hp, daddr, saddr, th, skb->len)) goto clear_hash; if (tcp_sigpool_hash_skb_data(&hp, skb, th->doff << 2)) goto clear_hash; if (tcp_md5_hash_key(&hp, key)) goto clear_hash; ahash_request_set_crypt(hp.req, NULL, md5_hash, 0); if (crypto_ahash_final(hp.req)) goto clear_hash; tcp_sigpool_end(&hp); return 0; clear_hash: tcp_sigpool_end(&hp); clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } #endif static void tcp_v6_init_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb, u32 tw_isn) { bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener); ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; /* So that link locals have meaning */ if ((!sk_listener->sk_bound_dev_if || l3_slave) && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = tcp_v6_iif(skb); if (!tw_isn && (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || inet6_test_bit(REPFLOW, sk_listener))) { refcount_inc(&skb->users); ireq->pktopts = skb; } } static struct dst_entry *tcp_v6_route_req(const struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct request_sock *req, u32 tw_isn) { tcp_v6_init_req(req, sk, skb, tw_isn); if (security_inet_conn_request(sk, skb, req)) return NULL; return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP); } struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .family = AF_INET6, .obj_size = sizeof(struct tcp6_request_sock), .rtx_syn_ack = tcp_rtx_synack, .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, .syn_ack_timeout = tcp_syn_ack_timeout, }; const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup_rsk, .ao_calc_key = tcp_v6_ao_calc_key_rsk, .ao_synack_hash = tcp_v6_ao_synack_hash, #endif #ifdef CONFIG_SYN_COOKIES .cookie_init_seq = cookie_v6_init_sequence, #endif .route_req = tcp_v6_route_req, .init_seq = tcp_v6_init_seq, .init_ts_off = tcp_v6_init_ts_off, .send_synack = tcp_v6_send_synack, }; static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, int rst, u8 tclass, __be32 label, u32 priority, u32 txhash, struct tcp_key *key) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; struct sk_buff *buff; struct flowi6 fl6; struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); __be32 mrst = 0, *topt; struct dst_entry *dst; __u32 mark = 0; if (tsecr) tot_len += TCPOLEN_TSTAMP_ALIGNED; if (tcp_key_is_md5(key)) tot_len += TCPOLEN_MD5SIG_ALIGNED; if (tcp_key_is_ao(key)) tot_len += tcp_ao_len_aligned(key->ao_key); #ifdef CONFIG_MPTCP if (rst && !tcp_key_is_md5(key)) { mrst = mptcp_reset_option(skb); if (mrst) tot_len += sizeof(__be32); } #endif buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); if (!buff) return; skb_reserve(buff, MAX_TCP_HEADER); t1 = skb_push(buff, tot_len); skb_reset_transport_header(buff); /* Swap the send and the receive. */ memset(t1, 0, sizeof(*t1)); t1->dest = th->source; t1->source = th->dest; t1->doff = tot_len / 4; t1->seq = htonl(seq); t1->ack_seq = htonl(ack); t1->ack = !rst || !th->ack; t1->rst = rst; t1->window = htons(win); topt = (__be32 *)(t1 + 1); if (tsecr) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); *topt++ = htonl(tsval); *topt++ = htonl(tsecr); } if (mrst) *topt++ = mrst; #ifdef CONFIG_TCP_MD5SIG if (tcp_key_is_md5(key)) { *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG); tcp_v6_md5_hash_hdr((__u8 *)topt, key->md5_key, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, t1); } #endif #ifdef CONFIG_TCP_AO if (tcp_key_is_ao(key)) { *topt++ = htonl((TCPOPT_AO << 24) | (tcp_ao_len(key->ao_key) << 16) | (key->ao_key->sndid << 8) | (key->rcv_next)); tcp_ao_hash_hdr(AF_INET6, (char *)topt, key->ao_key, key->traffic_key, (union tcp_ao_addr *)&ipv6_hdr(skb)->saddr, (union tcp_ao_addr *)&ipv6_hdr(skb)->daddr, t1, key->sne); } #endif memset(&fl6, 0, sizeof(fl6)); fl6.daddr = ipv6_hdr(skb)->saddr; fl6.saddr = ipv6_hdr(skb)->daddr; fl6.flowlabel = label; buff->ip_summed = CHECKSUM_PARTIAL; __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); else { if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) oif = skb->skb_iif; fl6.flowi6_oif = oif; } if (sk) { if (sk->sk_state == TCP_TIME_WAIT) mark = inet_twsk(sk)->tw_mark; else mark = READ_ONCE(sk->sk_mark); skb_set_delivery_time(buff, tcp_transmit_time(sk), SKB_CLOCK_MONOTONIC); } if (txhash) { /* autoflowlabel/skb_get_hash_flowi6 rely on buff->hash */ skb_set_hash(buff, txhash, PKT_HASH_TYPE_L4); } fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark; fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6)); /* Pass a socket to ip6_dst_lookup either it is for RST * Underlying function will use this to retrieve the network * namespace */ if (sk && sk->sk_state != TCP_TIME_WAIT) dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); /*sk's xfrm_policy can be referred*/ else dst = ip6_dst_lookup_flow(net, ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass & ~INET_ECN_MASK, priority); TCP_INC_STATS(net, TCP_MIB_OUTSEGS); if (rst) TCP_INC_STATS(net, TCP_MIB_OUTRSTS); return; } kfree_skb(buff); } static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb, enum sk_rst_reason reason) { const struct tcphdr *th = tcp_hdr(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); const __u8 *md5_hash_location = NULL; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) bool allocated_traffic_key = false; #endif const struct tcp_ao_hdr *aoh; struct tcp_key key = {}; u32 seq = 0, ack_seq = 0; __be32 label = 0; u32 priority = 0; struct net *net; u32 txhash = 0; int oif = 0; #ifdef CONFIG_TCP_MD5SIG unsigned char newhash[16]; int genhash; struct sock *sk1 = NULL; #endif if (th->rst) return; /* If sk not NULL, it means we did a successful lookup and incoming * route had to be correct. prequeue might have dropped our dst. */ if (!sk && !ipv6_unicast_destination(skb)) return; net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(th, &md5_hash_location, &aoh)) return; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) rcu_read_lock(); #endif #ifdef CONFIG_TCP_MD5SIG if (sk && sk_fullsock(sk)) { int l3index; /* sdif set, means packet ingressed via a device * in an L3 domain and inet_iif is set to it. */ l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index); if (key.md5_key) key.type = TCP_KEY_MD5; } else if (md5_hash_location) { int dif = tcp_v6_iif_l3_slave(skb); int sdif = tcp_v6_sdif(skb); int l3index; /* * active side is lost. Try to find listening socket through * source port, and then find md5 key through listening socket. * we are not loose security here: * Incoming packet is checked with md5 hash with finding key, * no RST generated if md5 hash doesn't match. */ sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->source), dif, sdif); if (!sk1) goto out; /* sdif set, means packet ingressed via a device * in an L3 domain and dif is set to it. */ l3index = tcp_v6_sdif(skb) ? dif : 0; key.md5_key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index); if (!key.md5_key) goto out; key.type = TCP_KEY_MD5; genhash = tcp_v6_md5_hash_skb(newhash, key.md5_key, NULL, skb); if (genhash || memcmp(md5_hash_location, newhash, 16) != 0) goto out; } #endif if (th->ack) seq = ntohl(th->ack_seq); else ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); #ifdef CONFIG_TCP_AO if (aoh) { int l3index; l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; if (tcp_ao_prepare_reset(sk, skb, aoh, l3index, seq, &key.ao_key, &key.traffic_key, &allocated_traffic_key, &key.rcv_next, &key.sne)) goto out; key.type = TCP_KEY_AO; } #endif if (sk) { oif = sk->sk_bound_dev_if; if (sk_fullsock(sk)) { if (inet6_test_bit(REPFLOW, sk)) label = ip6_flowlabel(ipv6h); priority = READ_ONCE(sk->sk_priority); txhash = sk->sk_txhash; } if (sk->sk_state == TCP_TIME_WAIT) { label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); priority = inet_twsk(sk)->tw_priority; txhash = inet_twsk(sk)->tw_txhash; } } else { if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) label = ip6_flowlabel(ipv6h); } trace_tcp_send_reset(sk, skb, reason); tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, 1, ipv6_get_dsfield(ipv6h), label, priority, txhash, &key); #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) out: if (allocated_traffic_key) kfree(key.traffic_key); rcu_read_unlock(); #endif } static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_key *key, u8 tclass, __be32 label, u32 priority, u32 txhash) { tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, 0, tclass, label, priority, txhash, key); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); struct tcp_key key = {}; #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; if (static_branch_unlikely(&tcp_ao_needed.key)) { /* FIXME: the segment to-be-acked is not verified yet */ ao_info = rcu_dereference(tcptw->ao_info); if (ao_info) { const struct tcp_ao_hdr *aoh; /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) goto out; if (aoh) key.ao_key = tcp_ao_established_key(ao_info, aoh->rnext_keyid, -1); } } if (key.ao_key) { struct tcp_ao_key *rnext_key; key.traffic_key = snd_other_key(key.ao_key); /* rcv_next switches to our rcv_next */ rnext_key = READ_ONCE(ao_info->rnext_key); key.rcv_next = rnext_key->rcvid; key.sne = READ_ONCE(ao_info->snd_sne); key.type = TCP_KEY_AO; #else if (0) { #endif #ifdef CONFIG_TCP_MD5SIG } else if (static_branch_unlikely(&tcp_md5_needed.key)) { key.md5_key = tcp_twsk_md5_key(tcptw); if (key.md5_key) key.type = TCP_KEY_MD5; #endif } tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_tw_tsval(tcptw), READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, &key, tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority, tw->tw_txhash); #ifdef CONFIG_TCP_AO out: #endif inet_twsk_put(tw); } static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req) { struct tcp_key key = {}; #ifdef CONFIG_TCP_AO if (static_branch_unlikely(&tcp_ao_needed.key) && tcp_rsk_used_ao(req)) { const struct in6_addr *addr = &ipv6_hdr(skb)->saddr; const struct tcp_ao_hdr *aoh; int l3index; l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), NULL, &aoh)) return; if (!aoh) return; key.ao_key = tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, AF_INET6, aoh->rnext_keyid, -1); if (unlikely(!key.ao_key)) { /* Send ACK with any matching MKT for the peer */ key.ao_key = tcp_ao_do_lookup(sk, l3index, (union tcp_ao_addr *)addr, AF_INET6, -1, -1); /* Matching key disappeared (user removed the key?) * let the handshake timeout. */ if (!key.ao_key) { net_info_ratelimited("TCP-AO key for (%pI6, %d)->(%pI6, %d) suddenly disappeared, won't ACK new connection\n", addr, ntohs(tcp_hdr(skb)->source), &ipv6_hdr(skb)->daddr, ntohs(tcp_hdr(skb)->dest)); return; } } key.traffic_key = kmalloc(tcp_ao_digest_size(key.ao_key), GFP_ATOMIC); if (!key.traffic_key) return; key.type = TCP_KEY_AO; key.rcv_next = aoh->keyid; tcp_v6_ao_calc_key_rsk(key.ao_key, key.traffic_key, req); #else if (0) { #endif #ifdef CONFIG_TCP_MD5SIG } else if (static_branch_unlikely(&tcp_md5_needed.key)) { int l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0; key.md5_key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index); if (key.md5_key) key.type = TCP_KEY_MD5; #endif } /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV * sk->sk_state == TCP_SYN_RECV -> for Fast Open. */ tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ? tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, tcp_rsk(req)->rcv_nxt, tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, tcp_rsk_tsval(tcp_rsk(req)), READ_ONCE(req->ts_recent), sk->sk_bound_dev_if, &key, ipv6_get_dsfield(ipv6_hdr(skb)), 0, READ_ONCE(sk->sk_priority), READ_ONCE(tcp_rsk(req)->txhash)); if (tcp_key_is_ao(&key)) kfree(key.traffic_key); } static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb) { #ifdef CONFIG_SYN_COOKIES const struct tcphdr *th = tcp_hdr(skb); if (!th->syn) sk = cookie_v6_check(sk, skb); #endif return sk; } u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph, struct tcphdr *th, u32 *cookie) { u16 mss = 0; #ifdef CONFIG_SYN_COOKIES mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops, &tcp_request_sock_ipv6_ops, sk, th); if (mss) { *cookie = __cookie_v6_init_sequence(iph, th, &mss); tcp_synq_overflow(sk); } #endif return mss; } static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); if (!ipv6_unicast_destination(skb)) goto drop; if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) { __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS); return 0; } return tcp_conn_request(&tcp6_request_sock_ops, &tcp_request_sock_ipv6_ops, sk, skb); drop: tcp_listendrop(sk); return 0; /* don't send reset */ } static void tcp_v6_restore_cb(struct sk_buff *skb) { /* We need to move header back to the beginning if xfrm6_policy_check() * and tcp_v6_fill_cb() are going to be called again. * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there. */ memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6, sizeof(struct inet6_skb_parm)); } static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst, struct request_sock *req_unhash, bool *own_req) { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp; const struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct ipv6_txoptions *opt; struct inet_sock *newinet; bool found_dup_sk = false; struct tcp_sock *newtp; struct sock *newsk; #ifdef CONFIG_TCP_MD5SIG struct tcp_md5sig_key *key; int l3index; #endif struct flowi6 fl6; if (skb->protocol == htons(ETH_P_IP)) { /* * v6 mapped */ newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst, req_unhash, own_req); if (!newsk) return NULL; inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); newnp = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; if (sk_is_mptcp(newsk)) mptcpv6_handle_mapped(newsk, true); newsk->sk_backlog_rcv = tcp_v4_do_rcv; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) newtp->af_specific = &tcp_sock_ipv6_mapped_specific; #endif newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = inet_iif(skb); newnp->mcast_hops = ip_hdr(skb)->ttl; newnp->rcv_flowinfo = 0; if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = 0; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count * here, tcp_create_openreq_child now does this for us, see the comment in * that function for the gory details. -acme */ /* It is tricky place. Until this moment IPv4 tcp worked with IPv6 icsk.icsk_af_ops. Sync it now. */ tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie); return newsk; } ireq = inet_rsk(req); if (sk_acceptq_is_full(sk)) goto out_overflow; if (!dst) { dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP); if (!dst) goto out; } newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) goto out_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks * count here, tcp_create_openreq_child now does this for us, see the * comment in that function for the gory details. -acme */ newsk->sk_gso_type = SKB_GSO_TCPV6; inet6_sk_rx_dst_set(newsk, skb); inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); newnp = tcp_inet6_sk(newsk); memcpy(newnp, np, sizeof(struct ipv6_pinfo)); ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; newnp->saddr = ireq->ir_v6_loc_addr; newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; newsk->sk_bound_dev_if = ireq->ir_iif; /* Now IPv6 options... First: no IPv4 options. */ newinet->inet_opt = NULL; newnp->ipv6_mc_list = NULL; newnp->ipv6_ac_list = NULL; newnp->ipv6_fl_list = NULL; /* Clone RX bits */ newnp->rxopt.all = np->rxopt.all; newnp->pktoptions = NULL; newnp->opt = NULL; newnp->mcast_oif = tcp_v6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); if (inet6_test_bit(REPFLOW, sk)) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* Set ToS of the new socket based upon the value of incoming SYN. * ECT bits are set later in tcp_init_transfer(). */ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; /* Clone native IPv6 options from listening socket (if any) Yes, keeping reference count would be much more clever, but we make one more one thing there: reattach optmem to newsk. */ opt = ireq->ipv6_opt; if (!opt) opt = rcu_dereference(np->opt); if (opt) { opt = ipv6_dup_options(newsk, opt); RCU_INIT_POINTER(newnp->opt, opt); } inet_csk(newsk)->icsk_ext_hdr_len = 0; if (opt) inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen + opt->opt_flen; tcp_ca_openreq_child(newsk, dst); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst)); tcp_initialize_rcv_mss(newsk); newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; #ifdef CONFIG_TCP_MD5SIG l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif); if (!tcp_rsk_used_ao(req)) { /* Copy over the MD5 key from the original socket */ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index); if (key) { const union tcp_md5_addr *addr; addr = (union tcp_md5_addr *)&newsk->sk_v6_daddr; if (tcp_md5_key_copy(newsk, addr, AF_INET6, 128, l3index, key)) { inet_csk_prepare_forced_close(newsk); tcp_done(newsk); goto out; } } } #endif #ifdef CONFIG_TCP_AO /* Copy over tcp_ao_info if any */ if (tcp_ao_copy_all_matching(sk, newsk, req, skb, AF_INET6)) goto out; /* OOM */ #endif if (__inet_inherit_port(sk, newsk) < 0) { inet_csk_prepare_forced_close(newsk); tcp_done(newsk); goto out; } *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), &found_dup_sk); if (*own_req) { tcp_move_syn(newtp, req); /* Clone pktoptions received with SYN, if we own the req */ if (ireq->pktopts) { newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk); consume_skb(ireq->pktopts); ireq->pktopts = NULL; if (newnp->pktoptions) tcp_v6_restore_cb(newnp->pktoptions); } } else { if (!req_unhash && found_dup_sk) { /* This code path should only be executed in the * syncookie case only */ bh_unlock_sock(newsk); sock_put(newsk); newsk = NULL; } } return newsk; out_overflow: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); out_nonewsk: dst_release(dst); out: tcp_listendrop(sk); return NULL; } INDIRECT_CALLABLE_DECLARE(struct dst_entry *ipv4_dst_check(struct dst_entry *, u32)); /* The socket must have it's spinlock held when we get * here, unless it is a TCP_LISTEN socket. * * We have a potential double-lock case here, so even when * doing backlog processing we use the BH locking scheme. * This is because we cannot sleep with the original spinlock * held. */ INDIRECT_CALLABLE_SCOPE int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) { struct ipv6_pinfo *np = tcp_inet6_sk(sk); struct sk_buff *opt_skb = NULL; enum skb_drop_reason reason; struct tcp_sock *tp; /* Imagine: socket is IPv6. IPv4 packet arrives, goes to IPv4 receive handler and backlogged. From backlog it always goes here. Kerboom... Fortunately, tcp_rcv_established and rcv_established handle them correctly, but it is not case with tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK */ if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); /* * socket locking is here for SMP purposes as backlog rcv * is currently called with bh processing disabled. */ /* Do Stevens' IPV6_PKTOPTIONS. Yes, guys, it is the only place in our code, where we may make it not affecting IPv4. The rest of code is protocol independent, and I do not like idea to uglify IPv4. Actually, all the idea behind IPV6_PKTOPTIONS looks not very well thought. For now we latch options, received in the last packet, enqueued by tcp. Feel free to propose better solution. --ANK (980728) */ if (np->rxopt.all) opt_skb = skb_clone_and_charge_r(skb, sk); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ struct dst_entry *dst; dst = rcu_dereference_protected(sk->sk_rx_dst, lockdep_sock_is_held(sk)); sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); if (dst) { if (sk->sk_rx_dst_ifindex != skb->skb_iif || INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, dst, sk->sk_rx_dst_cookie) == NULL) { RCU_INIT_POINTER(sk->sk_rx_dst, NULL); dst_release(dst); } } tcp_rcv_established(sk, skb); if (opt_skb) goto ipv6_pktoptions; return 0; } if (tcp_checksum_complete(skb)) goto csum_err; if (sk->sk_state == TCP_LISTEN) { struct sock *nsk = tcp_v6_cookie_check(sk, skb); if (nsk != sk) { if (nsk) { reason = tcp_child_process(sk, nsk, skb); if (reason) goto reset; } if (opt_skb) __kfree_skb(opt_skb); return 0; } } else sock_rps_save_rxhash(sk, skb); reason = tcp_rcv_state_process(sk, skb); if (reason) goto reset; if (opt_skb) goto ipv6_pktoptions; return 0; reset: tcp_v6_send_reset(sk, skb, sk_rst_convert_drop_reason(reason)); discard: if (opt_skb) __kfree_skb(opt_skb); sk_skb_reason_drop(sk, skb, reason); return 0; csum_err: reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); goto discard; ipv6_pktoptions: /* Do you ask, what is it? 1. skb was enqueued by tcp. 2. skb is added to tail of read queue, rather than out of order. 3. socket is not in passive state. 4. Finally, it really contains options, which user wants to receive. */ tp = tcp_sk(sk); if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt && !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) WRITE_ONCE(np->mcast_oif, tcp_v6_iif(opt_skb)); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) WRITE_ONCE(np->mcast_hops, ipv6_hdr(opt_skb)->hop_limit); if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); if (inet6_test_bit(REPFLOW, sk)) np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) { tcp_v6_restore_cb(opt_skb); opt_skb = xchg(&np->pktoptions, opt_skb); } else { __kfree_skb(opt_skb); opt_skb = xchg(&np->pktoptions, NULL); } } consume_skb(opt_skb); return 0; } static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, const struct tcphdr *th) { /* This is tricky: we move IP6CB at its correct location into * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because * _decode_session6() uses IP6CB(). * barrier() makes sure compiler won't play aliasing games. */ memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb), sizeof(struct inet6_skb_parm)); barrier(); TCP_SKB_CB(skb)->seq = ntohl(th->seq); TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff*4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->has_rxtstamp = skb->tstamp || skb_hwtstamps(skb)->hwtstamp; } INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) { enum skb_drop_reason drop_reason; int sdif = inet6_sdif(skb); int dif = inet6_iif(skb); const struct tcphdr *th; const struct ipv6hdr *hdr; struct sock *sk = NULL; bool refcounted; int ret; u32 isn; struct net *net = dev_net(skb->dev); drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; if (skb->pkt_type != PACKET_HOST) goto discard_it; /* * Count it even if it's bad. */ __TCP_INC_STATS(net, TCP_MIB_INSEGS); if (!pskb_may_pull(skb, sizeof(struct tcphdr))) goto discard_it; th = (const struct tcphdr *)skb->data; if (unlikely(th->doff < sizeof(struct tcphdr) / 4)) { drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; goto bad_packet; } if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) goto csum_error; th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); lookup: sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th), th->source, th->dest, inet6_iif(skb), sdif, &refcounted); if (!sk) goto no_tcp_socket; if (sk->sk_state == TCP_TIME_WAIT) goto do_time_wait; if (sk->sk_state == TCP_NEW_SYN_RECV) { struct request_sock *req = inet_reqsk(sk); bool req_stolen = false; struct sock *nsk; sk = req->rsk_listener; if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) drop_reason = SKB_DROP_REASON_XFRM_POLICY; else drop_reason = tcp_inbound_hash(sk, req, skb, &hdr->saddr, &hdr->daddr, AF_INET6, dif, sdif); if (drop_reason) { sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; } if (tcp_checksum_complete(skb)) { reqsk_put(req); goto csum_error; } if (unlikely(sk->sk_state != TCP_LISTEN)) { nsk = reuseport_migrate_sock(sk, req_to_sk(req), skb); if (!nsk) { inet_csk_reqsk_queue_drop_and_put(sk, req); goto lookup; } sk = nsk; /* reuseport_migrate_sock() has already held one sk_refcnt * before returning. */ } else { sock_hold(sk); } refcounted = true; nsk = NULL; if (!tcp_filter(sk, skb)) { th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); nsk = tcp_check_req(sk, skb, req, false, &req_stolen); } else { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } if (!nsk) { reqsk_put(req); if (req_stolen) { /* Another cpu got exclusive access to req * and created a full blown socket. * Try to feed this packet to this socket * instead of discarding it. */ tcp_v6_restore_cb(skb); sock_put(sk); goto lookup; } goto discard_and_relse; } nf_reset_ct(skb); if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); } else { drop_reason = tcp_child_process(sk, nsk, skb); if (drop_reason) { enum sk_rst_reason rst_reason; rst_reason = sk_rst_convert_drop_reason(drop_reason); tcp_v6_send_reset(nsk, skb, rst_reason); goto discard_and_relse; } sock_put(sk); return 0; } } process: if (static_branch_unlikely(&ip6_min_hopcount)) { /* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */ if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) { __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP); drop_reason = SKB_DROP_REASON_TCP_MINTTL; goto discard_and_relse; } } if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto discard_and_relse; } drop_reason = tcp_inbound_hash(sk, NULL, skb, &hdr->saddr, &hdr->daddr, AF_INET6, dif, sdif); if (drop_reason) goto discard_and_relse; nf_reset_ct(skb); if (tcp_filter(sk, skb)) { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto discard_and_relse; } th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); skb->dev = NULL; if (sk->sk_state == TCP_LISTEN) { ret = tcp_v6_do_rcv(sk, skb); goto put_and_return; } sk_incoming_cpu_update(sk); bh_lock_sock_nested(sk); tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { ret = tcp_v6_do_rcv(sk, skb); } else { if (tcp_add_backlog(sk, skb, &drop_reason)) goto discard_and_relse; } bh_unlock_sock(sk); put_and_return: if (refcounted) sock_put(sk); return ret ? -1 : 0; no_tcp_socket: drop_reason = SKB_DROP_REASON_NO_SOCKET; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; tcp_v6_fill_cb(skb, hdr, th); if (tcp_checksum_complete(skb)) { csum_error: drop_reason = SKB_DROP_REASON_TCP_CSUM; trace_tcp_bad_csum(skb); __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS); bad_packet: __TCP_INC_STATS(net, TCP_MIB_INERRS); } else { tcp_v6_send_reset(NULL, skb, sk_rst_convert_drop_reason(drop_reason)); } discard_it: SKB_DR_OR(drop_reason, NOT_SPECIFIED); sk_skb_reason_drop(sk, skb, drop_reason); return 0; discard_and_relse: sk_drops_add(sk, skb); if (refcounted) sock_put(sk); goto discard_it; do_time_wait: if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { drop_reason = SKB_DROP_REASON_XFRM_POLICY; inet_twsk_put(inet_twsk(sk)); goto discard_it; } tcp_v6_fill_cb(skb, hdr, th); if (tcp_checksum_complete(skb)) { inet_twsk_put(inet_twsk(sk)); goto csum_error; } switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) { case TCP_TW_SYN: { struct sock *sk2; sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), tcp_v6_iif_l3_slave(skb), sdif); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule_put(tw); sk = sk2; tcp_v6_restore_cb(skb); refcounted = false; __this_cpu_write(tcp_tw_isn, isn); goto process; } } /* to ACK */ fallthrough; case TCP_TW_ACK: tcp_v6_timewait_ack(sk, skb); break; case TCP_TW_RST: tcp_v6_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); inet_twsk_deschedule_put(inet_twsk(sk)); goto discard_it; case TCP_TW_SUCCESS: ; } goto discard_it; } void tcp_v6_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); const struct ipv6hdr *hdr; const struct tcphdr *th; struct sock *sk; if (skb->pkt_type != PACKET_HOST) return; if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) return; hdr = ipv6_hdr(skb); th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr) / 4) return; /* Note : We use inet6_iif() here, not tcp_v6_iif() */ sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), inet6_iif(skb), inet6_sdif(skb)); if (sk) { skb->sk = sk; skb->destructor = sock_edemux; if (sk_fullsock(sk)) { struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); if (dst) dst = dst_check(dst, sk->sk_rx_dst_cookie); if (dst && sk->sk_rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } } static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_destructor = tcp_twsk_destructor, }; INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) { __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); } const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, .sk_rx_dst_set = inet6_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct ipv6hdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), .mtu_reduced = tcp_v6_mtu_reduced, }; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { #ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, #endif #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup, .calc_ao_hash = tcp_v6_ao_hash_skb, .ao_parse = tcp_v6_parse_ao, .ao_calc_key_sk = tcp_v6_ao_calc_key_sk, #endif }; #endif /* * TCP over IPv4 via INET6 API */ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct iphdr), .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, .addr2sockaddr = inet6_csk_addr2sockaddr, .sockaddr_len = sizeof(struct sockaddr_in6), .mtu_reduced = tcp_v4_mtu_reduced, }; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = { #ifdef CONFIG_TCP_MD5SIG .md5_lookup = tcp_v4_md5_lookup, .calc_md5_hash = tcp_v4_md5_hash_skb, .md5_parse = tcp_v6_parse_md5_keys, #endif #ifdef CONFIG_TCP_AO .ao_lookup = tcp_v6_ao_lookup, .calc_ao_hash = tcp_v4_ao_hash_skb, .ao_parse = tcp_v6_parse_ao, .ao_calc_key_sk = tcp_v4_ao_calc_key_sk, #endif }; #endif /* NOTE: A lot of things set to zero explicitly by call to * sk_alloc() so need not be done here. */ static int tcp_v6_init_sock(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_init_sock(sk); icsk->icsk_af_ops = &ipv6_specific; #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific; #endif return 0; } #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, const struct request_sock *req, int i) { long ttd = req->rsk_timer.expires - jiffies; const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; if (ttd < 0) ttd = 0; seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], inet_rsk(req)->ir_num, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], ntohs(inet_rsk(req)->ir_rmt_port), TCP_SYN_RECV, 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->num_timeout, from_kuid_munged(seq_user_ns(seq), sock_i_uid(req->rsk_listener)), 0, /* non standard timer */ 0, /* open_requests have no inode */ 0, req); } static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) { const struct in6_addr *dest, *src; __u16 destp, srcp; int timer_active; unsigned long timer_expires; const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq; int rx_queue; int state; dest = &sp->sk_v6_daddr; src = &sp->sk_v6_rcv_saddr; destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); if (icsk->icsk_pending == ICSK_TIME_RETRANS || icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; timer_expires = icsk->icsk_timeout; } else if (timer_pending(&sp->sk_timer)) { timer_active = 2; timer_expires = sp->sk_timer.expires; } else { timer_active = 0; timer_expires = jiffies; } state = inet_sk_state_load(sp); if (state == TCP_LISTEN) rx_queue = READ_ONCE(sp->sk_ack_backlog); else /* Because we don't lock the socket, * we might find a transient negative value. */ rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq), 0); seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, state, READ_ONCE(tp->write_seq) - tp->snd_una, rx_queue, timer_active, jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), icsk->icsk_probes_out, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp), tcp_snd_cwnd(tp), state == TCP_LISTEN ? fastopenq->max_qlen : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) ); } static void get_timewait6_sock(struct seq_file *seq, struct inet_timewait_sock *tw, int i) { long delta = tw->tw_timer.expires - jiffies; const struct in6_addr *dest, *src; __u16 destp, srcp; dest = &tw->tw_v6_daddr; src = &tw->tw_v6_rcv_saddr; destp = ntohs(tw->tw_dport); srcp = ntohs(tw->tw_sport); seq_printf(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X " "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], srcp, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, tw->tw_substate, 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, refcount_read(&tw->tw_refcnt), tw); } static int tcp6_seq_show(struct seq_file *seq, void *v) { struct tcp_iter_state *st; struct sock *sk = v; if (v == SEQ_START_TOKEN) { seq_puts(seq, " sl " "local_address " "remote_address " "st tx_queue rx_queue tr tm->when retrnsmt" " uid timeout inode\n"); goto out; } st = seq->private; if (sk->sk_state == TCP_TIME_WAIT) get_timewait6_sock(seq, v, st->num); else if (sk->sk_state == TCP_NEW_SYN_RECV) get_openreq6(seq, v, st->num); else get_tcp6_sock(seq, v, st->num); out: return 0; } static const struct seq_operations tcp6_seq_ops = { .show = tcp6_seq_show, .start = tcp_seq_start, .next = tcp_seq_next, .stop = tcp_seq_stop, }; static struct tcp_seq_afinfo tcp6_seq_afinfo = { .family = AF_INET6, }; int __net_init tcp6_proc_init(struct net *net) { if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops, sizeof(struct tcp_iter_state), &tcp6_seq_afinfo)) return -ENOMEM; return 0; } void tcp6_proc_exit(struct net *net) { remove_proc_entry("tcp6", net->proc_net); } #endif struct proto tcpv6_prot = { .name = "TCPv6", .owner = THIS_MODULE, .close = tcp_close, .pre_connect = tcp_v6_pre_connect, .connect = tcp_v6_connect, .disconnect = tcp_disconnect, .accept = inet_csk_accept, .ioctl = tcp_ioctl, .init = tcp_v6_init_sock, .destroy = tcp_v4_destroy_sock, .shutdown = tcp_shutdown, .setsockopt = tcp_setsockopt, .getsockopt = tcp_getsockopt, .bpf_bypass_getsockopt = tcp_bpf_bypass_getsockopt, .keepalive = tcp_set_keepalive, .recvmsg = tcp_recvmsg, .sendmsg = tcp_sendmsg, .splice_eof = tcp_splice_eof, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, .hash = inet6_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, .put_port = inet_put_port, #ifdef CONFIG_BPF_SYSCALL .psock_update_sk_prot = tcp_bpf_update_proto, #endif .enter_memory_pressure = tcp_enter_memory_pressure, .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .memory_allocated = &tcp_memory_allocated, .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, .memory_pressure = &tcp_memory_pressure, .orphan_count = &tcp_orphan_count, .sysctl_mem = sysctl_tcp_mem, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), .ipv6_pinfo_offset = offsetof(struct tcp6_sock, inet6), .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, .h.hashinfo = NULL, .no_autobind = true, .diag_destroy = tcp_abort, }; EXPORT_SYMBOL_GPL(tcpv6_prot); static struct inet_protosw tcpv6_protosw = { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }; static int __net_init tcpv6_net_init(struct net *net) { int res; res = inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6, SOCK_RAW, IPPROTO_TCP, net); if (!res) net->ipv6.tcp_sk->sk_clockid = CLOCK_MONOTONIC; return res; } static void __net_exit tcpv6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.tcp_sk); } static struct pernet_operations tcpv6_net_ops = { .init = tcpv6_net_init, .exit = tcpv6_net_exit, }; int __init tcpv6_init(void) { int ret; net_hotdata.tcpv6_protocol = (struct inet6_protocol) { .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .flags = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL, }; ret = inet6_add_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); if (ret) goto out; /* register inet6 protocol */ ret = inet6_register_protosw(&tcpv6_protosw); if (ret) goto out_tcpv6_protocol; ret = register_pernet_subsys(&tcpv6_net_ops); if (ret) goto out_tcpv6_protosw; ret = mptcpv6_init(); if (ret) goto out_tcpv6_pernet_subsys; out: return ret; out_tcpv6_pernet_subsys: unregister_pernet_subsys(&tcpv6_net_ops); out_tcpv6_protosw: inet6_unregister_protosw(&tcpv6_protosw); out_tcpv6_protocol: inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); goto out; } void tcpv6_exit(void) { unregister_pernet_subsys(&tcpv6_net_ops); inet6_unregister_protosw(&tcpv6_protosw); inet6_del_protocol(&net_hotdata.tcpv6_protocol, IPPROTO_TCP); } |
| 26 22 23 4 8 5 8 12 14 14 1 21 4 2 25 1 22 4 1 3 3 1 4 1 4 2 15 3 2 4 3 1 3 3 21 28 27 1 13 5 1 22 2 19 10 1 11 22 19 15 6 13 13 13 1 1 12 1 26 26 29 1 22 5 22 6 28 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 | // SPDX-License-Identifier: GPL-2.0 #include "backref.h" #include "btrfs_inode.h" #include "fiemap.h" #include "file.h" #include "file-item.h" struct btrfs_fiemap_entry { u64 offset; u64 phys; u64 len; u32 flags; }; /* * Indicate the caller of emit_fiemap_extent() that it needs to unlock the file * range from the inode's io tree, unlock the subvolume tree search path, flush * the fiemap cache and relock the file range and research the subvolume tree. * The value here is something negative that can't be confused with a valid * errno value and different from 1 because that's also a return value from * fiemap_fill_next_extent() and also it's often used to mean some btree search * did not find a key, so make it some distinct negative value. */ #define BTRFS_FIEMAP_FLUSH_CACHE (-(MAX_ERRNO + 1)) /* * Used to: * * - Cache the next entry to be emitted to the fiemap buffer, so that we can * merge extents that are contiguous and can be grouped as a single one; * * - Store extents ready to be written to the fiemap buffer in an intermediary * buffer. This intermediary buffer is to ensure that in case the fiemap * buffer is memory mapped to the fiemap target file, we don't deadlock * during btrfs_page_mkwrite(). This is because during fiemap we are locking * an extent range in order to prevent races with delalloc flushing and * ordered extent completion, which is needed in order to reliably detect * delalloc in holes and prealloc extents. And this can lead to a deadlock * if the fiemap buffer is memory mapped to the file we are running fiemap * against (a silly, useless in practice scenario, but possible) because * btrfs_page_mkwrite() will try to lock the same extent range. */ struct fiemap_cache { /* An array of ready fiemap entries. */ struct btrfs_fiemap_entry *entries; /* Number of entries in the entries array. */ int entries_size; /* Index of the next entry in the entries array to write to. */ int entries_pos; /* * Once the entries array is full, this indicates what's the offset for * the next file extent item we must search for in the inode's subvolume * tree after unlocking the extent range in the inode's io tree and * releasing the search path. */ u64 next_search_offset; /* * This matches struct fiemap_extent_info::fi_mapped_extents, we use it * to count ourselves emitted extents and stop instead of relying on * fiemap_fill_next_extent() because we buffer ready fiemap entries at * the @entries array, and we want to stop as soon as we hit the max * amount of extents to map, not just to save time but also to make the * logic at extent_fiemap() simpler. */ unsigned int extents_mapped; /* Fields for the cached extent (unsubmitted, not ready, extent). */ u64 offset; u64 phys; u64 len; u32 flags; bool cached; }; static int flush_fiemap_cache(struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache) { for (int i = 0; i < cache->entries_pos; i++) { struct btrfs_fiemap_entry *entry = &cache->entries[i]; int ret; ret = fiemap_fill_next_extent(fieinfo, entry->offset, entry->phys, entry->len, entry->flags); /* * Ignore 1 (reached max entries) because we keep track of that * ourselves in emit_fiemap_extent(). */ if (ret < 0) return ret; } cache->entries_pos = 0; return 0; } /* * Helper to submit fiemap extent. * * Will try to merge current fiemap extent specified by @offset, @phys, * @len and @flags with cached one. * And only when we fails to merge, cached one will be submitted as * fiemap extent. * * Return value is the same as fiemap_fill_next_extent(). */ static int emit_fiemap_extent(struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache, u64 offset, u64 phys, u64 len, u32 flags) { struct btrfs_fiemap_entry *entry; u64 cache_end; /* Set at the end of extent_fiemap(). */ ASSERT((flags & FIEMAP_EXTENT_LAST) == 0); if (!cache->cached) goto assign; /* * When iterating the extents of the inode, at extent_fiemap(), we may * find an extent that starts at an offset behind the end offset of the * previous extent we processed. This happens if fiemap is called * without FIEMAP_FLAG_SYNC and there are ordered extents completing * after we had to unlock the file range, release the search path, emit * the fiemap extents stored in the buffer (cache->entries array) and * the lock the remainder of the range and re-search the btree. * * For example we are in leaf X processing its last item, which is the * file extent item for file range [512K, 1M[, and after * btrfs_next_leaf() releases the path, there's an ordered extent that * completes for the file range [768K, 2M[, and that results in trimming * the file extent item so that it now corresponds to the file range * [512K, 768K[ and a new file extent item is inserted for the file * range [768K, 2M[, which may end up as the last item of leaf X or as * the first item of the next leaf - in either case btrfs_next_leaf() * will leave us with a path pointing to the new extent item, for the * file range [768K, 2M[, since that's the first key that follows the * last one we processed. So in order not to report overlapping extents * to user space, we trim the length of the previously cached extent and * emit it. * * Upon calling btrfs_next_leaf() we may also find an extent with an * offset smaller than or equals to cache->offset, and this happens * when we had a hole or prealloc extent with several delalloc ranges in * it, but after btrfs_next_leaf() released the path, delalloc was * flushed and the resulting ordered extents were completed, so we can * now have found a file extent item for an offset that is smaller than * or equals to what we have in cache->offset. We deal with this as * described below. */ cache_end = cache->offset + cache->len; if (cache_end > offset) { if (offset == cache->offset) { /* * We cached a dealloc range (found in the io tree) for * a hole or prealloc extent and we have now found a * file extent item for the same offset. What we have * now is more recent and up to date, so discard what * we had in the cache and use what we have just found. */ goto assign; } else if (offset > cache->offset) { /* * The extent range we previously found ends after the * offset of the file extent item we found and that * offset falls somewhere in the middle of that previous * extent range. So adjust the range we previously found * to end at the offset of the file extent item we have * just found, since this extent is more up to date. * Emit that adjusted range and cache the file extent * item we have just found. This corresponds to the case * where a previously found file extent item was split * due to an ordered extent completing. */ cache->len = offset - cache->offset; goto emit; } else { const u64 range_end = offset + len; /* * The offset of the file extent item we have just found * is behind the cached offset. This means we were * processing a hole or prealloc extent for which we * have found delalloc ranges (in the io tree), so what * we have in the cache is the last delalloc range we * found while the file extent item we found can be * either for a whole delalloc range we previously * emmitted or only a part of that range. * * We have two cases here: * * 1) The file extent item's range ends at or behind the * cached extent's end. In this case just ignore the * current file extent item because we don't want to * overlap with previous ranges that may have been * emmitted already; * * 2) The file extent item starts behind the currently * cached extent but its end offset goes beyond the * end offset of the cached extent. We don't want to * overlap with a previous range that may have been * emmitted already, so we emit the currently cached * extent and then partially store the current file * extent item's range in the cache, for the subrange * going the cached extent's end to the end of the * file extent item. */ if (range_end <= cache_end) return 0; if (!(flags & (FIEMAP_EXTENT_ENCODED | FIEMAP_EXTENT_DELALLOC))) phys += cache_end - offset; offset = cache_end; len = range_end - cache_end; goto emit; } } /* * Only merges fiemap extents if * 1) Their logical addresses are continuous * * 2) Their physical addresses are continuous * So truly compressed (physical size smaller than logical size) * extents won't get merged with each other * * 3) Share same flags */ if (cache->offset + cache->len == offset && cache->phys + cache->len == phys && cache->flags == flags) { cache->len += len; return 0; } emit: /* Not mergeable, need to submit cached one */ if (cache->entries_pos == cache->entries_size) { /* * We will need to research for the end offset of the last * stored extent and not from the current offset, because after * unlocking the range and releasing the path, if there's a hole * between that end offset and this current offset, a new extent * may have been inserted due to a new write, so we don't want * to miss it. */ entry = &cache->entries[cache->entries_size - 1]; cache->next_search_offset = entry->offset + entry->len; cache->cached = false; return BTRFS_FIEMAP_FLUSH_CACHE; } entry = &cache->entries[cache->entries_pos]; entry->offset = cache->offset; entry->phys = cache->phys; entry->len = cache->len; entry->flags = cache->flags; cache->entries_pos++; cache->extents_mapped++; if (cache->extents_mapped == fieinfo->fi_extents_max) { cache->cached = false; return 1; } assign: cache->cached = true; cache->offset = offset; cache->phys = phys; cache->len = len; cache->flags = flags; return 0; } /* * Emit last fiemap cache * * The last fiemap cache may still be cached in the following case: * 0 4k 8k * |<- Fiemap range ->| * |<------------ First extent ----------->| * * In this case, the first extent range will be cached but not emitted. * So we must emit it before ending extent_fiemap(). */ static int emit_last_fiemap_cache(struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache) { int ret; if (!cache->cached) return 0; ret = fiemap_fill_next_extent(fieinfo, cache->offset, cache->phys, cache->len, cache->flags); cache->cached = false; if (ret > 0) ret = 0; return ret; } static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *path) { struct extent_buffer *clone = path->nodes[0]; struct btrfs_key key; int slot; int ret; path->slots[0]++; if (path->slots[0] < btrfs_header_nritems(path->nodes[0])) return 0; /* * Add a temporary extra ref to an already cloned extent buffer to * prevent btrfs_next_leaf() freeing it, we want to reuse it to avoid * the cost of allocating a new one. */ ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED, &clone->bflags)); atomic_inc(&clone->refs); ret = btrfs_next_leaf(inode->root, path); if (ret != 0) goto out; /* * Don't bother with cloning if there are no more file extent items for * our inode. */ btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid != btrfs_ino(inode) || key.type != BTRFS_EXTENT_DATA_KEY) { ret = 1; goto out; } /* * Important to preserve the start field, for the optimizations when * checking if extents are shared (see extent_fiemap()). * * We must set ->start before calling copy_extent_buffer_full(). If we * are on sub-pagesize blocksize, we use ->start to determine the offset * into the folio where our eb exists, and if we update ->start after * the fact then any subsequent reads of the eb may read from a * different offset in the folio than where we originally copied into. */ clone->start = path->nodes[0]->start; /* See the comment at fiemap_search_slot() about why we clone. */ copy_extent_buffer_full(clone, path->nodes[0]); slot = path->slots[0]; btrfs_release_path(path); path->nodes[0] = clone; path->slots[0] = slot; out: if (ret) free_extent_buffer(clone); return ret; } /* * Search for the first file extent item that starts at a given file offset or * the one that starts immediately before that offset. * Returns: 0 on success, < 0 on error, 1 if not found. */ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path, u64 file_offset) { const u64 ino = btrfs_ino(inode); struct btrfs_root *root = inode->root; struct extent_buffer *clone; struct btrfs_key key; int slot; int ret; key.objectid = ino; key.type = BTRFS_EXTENT_DATA_KEY; key.offset = file_offset; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) return ret; if (ret > 0 && path->slots[0] > 0) { btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0] - 1); if (key.objectid == ino && key.type == BTRFS_EXTENT_DATA_KEY) path->slots[0]--; } if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { ret = btrfs_next_leaf(root, path); if (ret != 0) return ret; btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) return 1; } /* * We clone the leaf and use it during fiemap. This is because while * using the leaf we do expensive things like checking if an extent is * shared, which can take a long time. In order to prevent blocking * other tasks for too long, we use a clone of the leaf. We have locked * the file range in the inode's io tree, so we know none of our file * extent items can change. This way we avoid blocking other tasks that * want to insert items for other inodes in the same leaf or b+tree * rebalance operations (triggered for example when someone is trying * to push items into this leaf when trying to insert an item in a * neighbour leaf). * We also need the private clone because holding a read lock on an * extent buffer of the subvolume's b+tree will make lockdep unhappy * when we check if extents are shared, as backref walking may need to * lock the same leaf we are processing. */ clone = btrfs_clone_extent_buffer(path->nodes[0]); if (!clone) return -ENOMEM; slot = path->slots[0]; btrfs_release_path(path); path->nodes[0] = clone; path->slots[0] = slot; return 0; } /* * Process a range which is a hole or a prealloc extent in the inode's subvolume * btree. If @disk_bytenr is 0, we are dealing with a hole, otherwise a prealloc * extent. The end offset (@end) is inclusive. */ static int fiemap_process_hole(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache, struct extent_state **delalloc_cached_state, struct btrfs_backref_share_check_ctx *backref_ctx, u64 disk_bytenr, u64 extent_offset, u64 extent_gen, u64 start, u64 end) { const u64 i_size = i_size_read(&inode->vfs_inode); u64 cur_offset = start; u64 last_delalloc_end = 0; u32 prealloc_flags = FIEMAP_EXTENT_UNWRITTEN; bool checked_extent_shared = false; int ret; /* * There can be no delalloc past i_size, so don't waste time looking for * it beyond i_size. */ while (cur_offset < end && cur_offset < i_size) { u64 delalloc_start; u64 delalloc_end; u64 prealloc_start; u64 prealloc_len = 0; bool delalloc; delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, delalloc_cached_state, &delalloc_start, &delalloc_end); if (!delalloc) break; /* * If this is a prealloc extent we have to report every section * of it that has no delalloc. */ if (disk_bytenr != 0) { if (last_delalloc_end == 0) { prealloc_start = start; prealloc_len = delalloc_start - start; } else { prealloc_start = last_delalloc_end + 1; prealloc_len = delalloc_start - prealloc_start; } } if (prealloc_len > 0) { if (!checked_extent_shared && fieinfo->fi_extents_max) { ret = btrfs_is_data_extent_shared(inode, disk_bytenr, extent_gen, backref_ctx); if (ret < 0) return ret; else if (ret > 0) prealloc_flags |= FIEMAP_EXTENT_SHARED; checked_extent_shared = true; } ret = emit_fiemap_extent(fieinfo, cache, prealloc_start, disk_bytenr + extent_offset, prealloc_len, prealloc_flags); if (ret) return ret; extent_offset += prealloc_len; } ret = emit_fiemap_extent(fieinfo, cache, delalloc_start, 0, delalloc_end + 1 - delalloc_start, FIEMAP_EXTENT_DELALLOC | FIEMAP_EXTENT_UNKNOWN); if (ret) return ret; last_delalloc_end = delalloc_end; cur_offset = delalloc_end + 1; extent_offset += cur_offset - delalloc_start; cond_resched(); } /* * Either we found no delalloc for the whole prealloc extent or we have * a prealloc extent that spans i_size or starts at or after i_size. */ if (disk_bytenr != 0 && last_delalloc_end < end) { u64 prealloc_start; u64 prealloc_len; if (last_delalloc_end == 0) { prealloc_start = start; prealloc_len = end + 1 - start; } else { prealloc_start = last_delalloc_end + 1; prealloc_len = end + 1 - prealloc_start; } if (!checked_extent_shared && fieinfo->fi_extents_max) { ret = btrfs_is_data_extent_shared(inode, disk_bytenr, extent_gen, backref_ctx); if (ret < 0) return ret; else if (ret > 0) prealloc_flags |= FIEMAP_EXTENT_SHARED; } ret = emit_fiemap_extent(fieinfo, cache, prealloc_start, disk_bytenr + extent_offset, prealloc_len, prealloc_flags); if (ret) return ret; } return 0; } static int fiemap_find_last_extent_offset(struct btrfs_inode *inode, struct btrfs_path *path, u64 *last_extent_end_ret) { const u64 ino = btrfs_ino(inode); struct btrfs_root *root = inode->root; struct extent_buffer *leaf; struct btrfs_file_extent_item *ei; struct btrfs_key key; u64 disk_bytenr; int ret; /* * Lookup the last file extent. We're not using i_size here because * there might be preallocation past i_size. */ ret = btrfs_lookup_file_extent(NULL, root, path, ino, (u64)-1, 0); /* There can't be a file extent item at offset (u64)-1 */ ASSERT(ret != 0); if (ret < 0) return ret; /* * For a non-existing key, btrfs_search_slot() always leaves us at a * slot > 0, except if the btree is empty, which is impossible because * at least it has the inode item for this inode and all the items for * the root inode 256. */ ASSERT(path->slots[0] > 0); path->slots[0]--; leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) { /* No file extent items in the subvolume tree. */ *last_extent_end_ret = 0; return 0; } /* * For an inline extent, the disk_bytenr is where inline data starts at, * so first check if we have an inline extent item before checking if we * have an implicit hole (disk_bytenr == 0). */ ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); if (btrfs_file_extent_type(leaf, ei) == BTRFS_FILE_EXTENT_INLINE) { *last_extent_end_ret = btrfs_file_extent_end(path); return 0; } /* * Find the last file extent item that is not a hole (when NO_HOLES is * not enabled). This should take at most 2 iterations in the worst * case: we have one hole file extent item at slot 0 of a leaf and * another hole file extent item as the last item in the previous leaf. * This is because we merge file extent items that represent holes. */ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); while (disk_bytenr == 0) { ret = btrfs_previous_item(root, path, ino, BTRFS_EXTENT_DATA_KEY); if (ret < 0) { return ret; } else if (ret > 0) { /* No file extent items that are not holes. */ *last_extent_end_ret = 0; return 0; } leaf = path->nodes[0]; ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); } *last_extent_end_ret = btrfs_file_extent_end(path); return 0; } static int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { const u64 ino = btrfs_ino(inode); struct extent_state *cached_state = NULL; struct extent_state *delalloc_cached_state = NULL; struct btrfs_path *path; struct fiemap_cache cache = { 0 }; struct btrfs_backref_share_check_ctx *backref_ctx; u64 last_extent_end = 0; u64 prev_extent_end; u64 range_start; u64 range_end; const u64 sectorsize = inode->root->fs_info->sectorsize; bool stopped = false; int ret; cache.entries_size = PAGE_SIZE / sizeof(struct btrfs_fiemap_entry); cache.entries = kmalloc_array(cache.entries_size, sizeof(struct btrfs_fiemap_entry), GFP_KERNEL); backref_ctx = btrfs_alloc_backref_share_check_ctx(); path = btrfs_alloc_path(); if (!cache.entries || !backref_ctx || !path) { ret = -ENOMEM; goto out; } restart: range_start = round_down(start, sectorsize); range_end = round_up(start + len, sectorsize); prev_extent_end = range_start; lock_extent(&inode->io_tree, range_start, range_end, &cached_state); ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); if (ret < 0) goto out_unlock; btrfs_release_path(path); path->reada = READA_FORWARD; ret = fiemap_search_slot(inode, path, range_start); if (ret < 0) { goto out_unlock; } else if (ret > 0) { /* * No file extent item found, but we may have delalloc between * the current offset and i_size. So check for that. */ ret = 0; goto check_eof_delalloc; } while (prev_extent_end < range_end) { struct extent_buffer *leaf = path->nodes[0]; struct btrfs_file_extent_item *ei; struct btrfs_key key; u64 extent_end; u64 extent_len; u64 extent_offset = 0; u64 extent_gen; u64 disk_bytenr = 0; u64 flags = 0; int extent_type; u8 compression; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); if (key.objectid != ino || key.type != BTRFS_EXTENT_DATA_KEY) break; extent_end = btrfs_file_extent_end(path); /* * The first iteration can leave us at an extent item that ends * before our range's start. Move to the next item. */ if (extent_end <= range_start) goto next_item; backref_ctx->curr_leaf_bytenr = leaf->start; /* We have in implicit hole (NO_HOLES feature enabled). */ if (prev_extent_end < key.offset) { const u64 hole_end = min(key.offset, range_end) - 1; ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, 0, 0, 0, prev_extent_end, hole_end); if (ret < 0) { goto out_unlock; } else if (ret > 0) { /* fiemap_fill_next_extent() told us to stop. */ stopped = true; break; } /* We've reached the end of the fiemap range, stop. */ if (key.offset >= range_end) { stopped = true; break; } } extent_len = extent_end - key.offset; ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); compression = btrfs_file_extent_compression(leaf, ei); extent_type = btrfs_file_extent_type(leaf, ei); extent_gen = btrfs_file_extent_generation(leaf, ei); if (extent_type != BTRFS_FILE_EXTENT_INLINE) { disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, ei); if (compression == BTRFS_COMPRESS_NONE) extent_offset = btrfs_file_extent_offset(leaf, ei); } if (compression != BTRFS_COMPRESS_NONE) flags |= FIEMAP_EXTENT_ENCODED; if (extent_type == BTRFS_FILE_EXTENT_INLINE) { flags |= FIEMAP_EXTENT_DATA_INLINE; flags |= FIEMAP_EXTENT_NOT_ALIGNED; ret = emit_fiemap_extent(fieinfo, &cache, key.offset, 0, extent_len, flags); } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, disk_bytenr, extent_offset, extent_gen, key.offset, extent_end - 1); } else if (disk_bytenr == 0) { /* We have an explicit hole. */ ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, 0, 0, 0, key.offset, extent_end - 1); } else { /* We have a regular extent. */ if (fieinfo->fi_extents_max) { ret = btrfs_is_data_extent_shared(inode, disk_bytenr, extent_gen, backref_ctx); if (ret < 0) goto out_unlock; else if (ret > 0) flags |= FIEMAP_EXTENT_SHARED; } ret = emit_fiemap_extent(fieinfo, &cache, key.offset, disk_bytenr + extent_offset, extent_len, flags); } if (ret < 0) { goto out_unlock; } else if (ret > 0) { /* emit_fiemap_extent() told us to stop. */ stopped = true; break; } prev_extent_end = extent_end; next_item: if (fatal_signal_pending(current)) { ret = -EINTR; goto out_unlock; } ret = fiemap_next_leaf_item(inode, path); if (ret < 0) { goto out_unlock; } else if (ret > 0) { /* No more file extent items for this inode. */ break; } cond_resched(); } check_eof_delalloc: if (!stopped && prev_extent_end < range_end) { ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, 0, 0, 0, prev_extent_end, range_end - 1); if (ret < 0) goto out_unlock; prev_extent_end = range_end; } if (cache.cached && cache.offset + cache.len >= last_extent_end) { const u64 i_size = i_size_read(&inode->vfs_inode); if (prev_extent_end < i_size) { u64 delalloc_start; u64 delalloc_end; bool delalloc; delalloc = btrfs_find_delalloc_in_range(inode, prev_extent_end, i_size - 1, &delalloc_cached_state, &delalloc_start, &delalloc_end); if (!delalloc) cache.flags |= FIEMAP_EXTENT_LAST; } else { cache.flags |= FIEMAP_EXTENT_LAST; } } out_unlock: unlock_extent(&inode->io_tree, range_start, range_end, &cached_state); if (ret == BTRFS_FIEMAP_FLUSH_CACHE) { btrfs_release_path(path); ret = flush_fiemap_cache(fieinfo, &cache); if (ret) goto out; len -= cache.next_search_offset - start; start = cache.next_search_offset; goto restart; } else if (ret < 0) { goto out; } /* * Must free the path before emitting to the fiemap buffer because we * may have a non-cloned leaf and if the fiemap buffer is memory mapped * to a file, a write into it (through btrfs_page_mkwrite()) may trigger * waiting for an ordered extent that in order to complete needs to * modify that leaf, therefore leading to a deadlock. */ btrfs_free_path(path); path = NULL; ret = flush_fiemap_cache(fieinfo, &cache); if (ret) goto out; ret = emit_last_fiemap_cache(fieinfo, &cache); out: free_extent_state(delalloc_cached_state); kfree(cache.entries); btrfs_free_backref_share_ctx(backref_ctx); btrfs_free_path(path); return ret; } int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { struct btrfs_inode *btrfs_inode = BTRFS_I(inode); int ret; ret = fiemap_prep(inode, fieinfo, start, &len, 0); if (ret) return ret; /* * fiemap_prep() called filemap_write_and_wait() for the whole possible * file range (0 to LLONG_MAX), but that is not enough if we have * compression enabled. The first filemap_fdatawrite_range() only kicks * in the compression of data (in an async thread) and will return * before the compression is done and writeback is started. A second * filemap_fdatawrite_range() is needed to wait for the compression to * complete and writeback to start. We also need to wait for ordered * extents to complete, because our fiemap implementation uses mainly * file extent items to list the extents, searching for extent maps * only for file ranges with holes or prealloc extents to figure out * if we have delalloc in those ranges. */ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { ret = btrfs_wait_ordered_range(btrfs_inode, 0, LLONG_MAX); if (ret) return ret; } btrfs_inode_lock(btrfs_inode, BTRFS_ILOCK_SHARED); /* * We did an initial flush to avoid holding the inode's lock while * triggering writeback and waiting for the completion of IO and ordered * extents. Now after we locked the inode we do it again, because it's * possible a new write may have happened in between those two steps. */ if (fieinfo->fi_flags & FIEMAP_FLAG_SYNC) { ret = btrfs_wait_ordered_range(btrfs_inode, 0, LLONG_MAX); if (ret) { btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); return ret; } } ret = extent_fiemap(btrfs_inode, fieinfo, start, len); btrfs_inode_unlock(btrfs_inode, BTRFS_ILOCK_SHARED); return ret; } |
| 63 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 | /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #ifndef __DRM_PLANE_H__ #define __DRM_PLANE_H__ #include <linux/list.h> #include <linux/ctype.h> #include <linux/kmsg_dump.h> #include <drm/drm_mode_object.h> #include <drm/drm_color_mgmt.h> #include <drm/drm_rect.h> #include <drm/drm_modeset_lock.h> #include <drm/drm_util.h> struct drm_crtc; struct drm_plane_size_hint; struct drm_printer; struct drm_modeset_acquire_ctx; enum drm_scaling_filter { DRM_SCALING_FILTER_DEFAULT, DRM_SCALING_FILTER_NEAREST_NEIGHBOR, }; /** * struct drm_plane_state - mutable plane state * * Please note that the destination coordinates @crtc_x, @crtc_y, @crtc_h and * @crtc_w and the source coordinates @src_x, @src_y, @src_h and @src_w are the * raw coordinates provided by userspace. Drivers should use * drm_atomic_helper_check_plane_state() and only use the derived rectangles in * @src and @dst to program the hardware. */ struct drm_plane_state { /** @plane: backpointer to the plane */ struct drm_plane *plane; /** * @crtc: * * Currently bound CRTC, NULL if disabled. Do not write this directly, * use drm_atomic_set_crtc_for_plane() */ struct drm_crtc *crtc; /** * @fb: * * Currently bound framebuffer. Do not write this directly, use * drm_atomic_set_fb_for_plane() */ struct drm_framebuffer *fb; /** * @fence: * * Optional fence to wait for before scanning out @fb. The core atomic * code will set this when userspace is using explicit fencing. Do not * write this field directly for a driver's implicit fence. * * Drivers should store any implicit fence in this from their * &drm_plane_helper_funcs.prepare_fb callback. See * drm_gem_plane_helper_prepare_fb() for a suitable helper. */ struct dma_fence *fence; /** * @crtc_x: * * Left position of visible portion of plane on crtc, signed dest * location allows it to be partially off screen. */ int32_t crtc_x; /** * @crtc_y: * * Upper position of visible portion of plane on crtc, signed dest * location allows it to be partially off screen. */ int32_t crtc_y; /** @crtc_w: width of visible portion of plane on crtc */ /** @crtc_h: height of visible portion of plane on crtc */ uint32_t crtc_w, crtc_h; /** * @src_x: left position of visible portion of plane within plane (in * 16.16 fixed point). */ uint32_t src_x; /** * @src_y: upper position of visible portion of plane within plane (in * 16.16 fixed point). */ uint32_t src_y; /** @src_w: width of visible portion of plane (in 16.16) */ /** @src_h: height of visible portion of plane (in 16.16) */ uint32_t src_h, src_w; /** @hotspot_x: x offset to mouse cursor hotspot */ /** @hotspot_y: y offset to mouse cursor hotspot */ int32_t hotspot_x, hotspot_y; /** * @alpha: * Opacity of the plane with 0 as completely transparent and 0xffff as * completely opaque. See drm_plane_create_alpha_property() for more * details. */ u16 alpha; /** * @pixel_blend_mode: * The alpha blending equation selection, describing how the pixels from * the current plane are composited with the background. Value can be * one of DRM_MODE_BLEND_* */ uint16_t pixel_blend_mode; /** * @rotation: * Rotation of the plane. See drm_plane_create_rotation_property() for * more details. */ unsigned int rotation; /** * @zpos: * Priority of the given plane on crtc (optional). * * User-space may set mutable zpos properties so that multiple active * planes on the same CRTC have identical zpos values. This is a * user-space bug, but drivers can solve the conflict by comparing the * plane object IDs; the plane with a higher ID is stacked on top of a * plane with a lower ID. * * See drm_plane_create_zpos_property() and * drm_plane_create_zpos_immutable_property() for more details. */ unsigned int zpos; /** * @normalized_zpos: * Normalized value of zpos: unique, range from 0 to N-1 where N is the * number of active planes for given crtc. Note that the driver must set * &drm_mode_config.normalize_zpos or call drm_atomic_normalize_zpos() to * update this before it can be trusted. */ unsigned int normalized_zpos; /** * @color_encoding: * * Color encoding for non RGB formats */ enum drm_color_encoding color_encoding; /** * @color_range: * * Color range for non RGB formats */ enum drm_color_range color_range; /** * @fb_damage_clips: * * Blob representing damage (area in plane framebuffer that changed * since last plane update) as an array of &drm_mode_rect in framebuffer * coodinates of the attached framebuffer. Note that unlike plane src, * damage clips are not in 16.16 fixed point. * * See drm_plane_get_damage_clips() and * drm_plane_get_damage_clips_count() for accessing these. */ struct drm_property_blob *fb_damage_clips; /** * @ignore_damage_clips: * * Set by drivers to indicate the drm_atomic_helper_damage_iter_init() * helper that the @fb_damage_clips blob property should be ignored. * * See :ref:`damage_tracking_properties` for more information. */ bool ignore_damage_clips; /** * @src: * * source coordinates of the plane (in 16.16). * * When using drm_atomic_helper_check_plane_state(), * the coordinates are clipped, but the driver may choose * to use unclipped coordinates instead when the hardware * performs the clipping automatically. */ /** * @dst: * * clipped destination coordinates of the plane. * * When using drm_atomic_helper_check_plane_state(), * the coordinates are clipped, but the driver may choose * to use unclipped coordinates instead when the hardware * performs the clipping automatically. */ struct drm_rect src, dst; /** * @visible: * * Visibility of the plane. This can be false even if fb!=NULL and * crtc!=NULL, due to clipping. */ bool visible; /** * @scaling_filter: * * Scaling filter to be applied */ enum drm_scaling_filter scaling_filter; /** * @commit: Tracks the pending commit to prevent use-after-free conditions, * and for async plane updates. * * May be NULL. */ struct drm_crtc_commit *commit; /** @state: backpointer to global drm_atomic_state */ struct drm_atomic_state *state; /** * @color_mgmt_changed: Color management properties have changed. Used * by the atomic helpers and drivers to steer the atomic commit control * flow. */ bool color_mgmt_changed : 1; }; static inline struct drm_rect drm_plane_state_src(const struct drm_plane_state *state) { struct drm_rect src = { .x1 = state->src_x, .y1 = state->src_y, .x2 = state->src_x + state->src_w, .y2 = state->src_y + state->src_h, }; return src; } static inline struct drm_rect drm_plane_state_dest(const struct drm_plane_state *state) { struct drm_rect dest = { .x1 = state->crtc_x, .y1 = state->crtc_y, .x2 = state->crtc_x + state->crtc_w, .y2 = state->crtc_y + state->crtc_h, }; return dest; } /** * struct drm_plane_funcs - driver plane control functions */ struct drm_plane_funcs { /** * @update_plane: * * This is the legacy entry point to enable and configure the plane for * the given CRTC and framebuffer. It is never called to disable the * plane, i.e. the passed-in crtc and fb paramters are never NULL. * * The source rectangle in frame buffer memory coordinates is given by * the src_x, src_y, src_w and src_h parameters (as 16.16 fixed point * values). Devices that don't support subpixel plane coordinates can * ignore the fractional part. * * The destination rectangle in CRTC coordinates is given by the * crtc_x, crtc_y, crtc_w and crtc_h parameters (as integer values). * Devices scale the source rectangle to the destination rectangle. If * scaling is not supported, and the source rectangle size doesn't match * the destination rectangle size, the driver must return a * -<errorname>EINVAL</errorname> error. * * Drivers implementing atomic modeset should use * drm_atomic_helper_update_plane() to implement this hook. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*update_plane)(struct drm_plane *plane, struct drm_crtc *crtc, struct drm_framebuffer *fb, int crtc_x, int crtc_y, unsigned int crtc_w, unsigned int crtc_h, uint32_t src_x, uint32_t src_y, uint32_t src_w, uint32_t src_h, struct drm_modeset_acquire_ctx *ctx); /** * @disable_plane: * * This is the legacy entry point to disable the plane. The DRM core * calls this method in response to a DRM_IOCTL_MODE_SETPLANE IOCTL call * with the frame buffer ID set to 0. Disabled planes must not be * processed by the CRTC. * * Drivers implementing atomic modeset should use * drm_atomic_helper_disable_plane() to implement this hook. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*disable_plane)(struct drm_plane *plane, struct drm_modeset_acquire_ctx *ctx); /** * @destroy: * * Clean up plane resources. This is only called at driver unload time * through drm_mode_config_cleanup() since a plane cannot be hotplugged * in DRM. */ void (*destroy)(struct drm_plane *plane); /** * @reset: * * Reset plane hardware and software state to off. This function isn't * called by the core directly, only through drm_mode_config_reset(). * It's not a helper hook only for historical reasons. * * Atomic drivers can use drm_atomic_helper_plane_reset() to reset * atomic state using this hook. */ void (*reset)(struct drm_plane *plane); /** * @set_property: * * This is the legacy entry point to update a property attached to the * plane. * * This callback is optional if the driver does not support any legacy * driver-private properties. For atomic drivers it is not used because * property handling is done entirely in the DRM core. * * RETURNS: * * 0 on success or a negative error code on failure. */ int (*set_property)(struct drm_plane *plane, struct drm_property *property, uint64_t val); /** * @atomic_duplicate_state: * * Duplicate the current atomic state for this plane and return it. * The core and helpers guarantee that any atomic state duplicated with * this hook and still owned by the caller (i.e. not transferred to the * driver by calling &drm_mode_config_funcs.atomic_commit) will be * cleaned up by calling the @atomic_destroy_state hook in this * structure. * * This callback is mandatory for atomic drivers. * * Atomic drivers which don't subclass &struct drm_plane_state should use * drm_atomic_helper_plane_duplicate_state(). Drivers that subclass the * state structure to extend it with driver-private state should use * __drm_atomic_helper_plane_duplicate_state() to make sure shared state is * duplicated in a consistent fashion across drivers. * * It is an error to call this hook before &drm_plane.state has been * initialized correctly. * * NOTE: * * If the duplicate state references refcounted resources this hook must * acquire a reference for each of them. The driver must release these * references again in @atomic_destroy_state. * * RETURNS: * * Duplicated atomic state or NULL when the allocation failed. */ struct drm_plane_state *(*atomic_duplicate_state)(struct drm_plane *plane); /** * @atomic_destroy_state: * * Destroy a state duplicated with @atomic_duplicate_state and release * or unreference all resources it references * * This callback is mandatory for atomic drivers. */ void (*atomic_destroy_state)(struct drm_plane *plane, struct drm_plane_state *state); /** * @atomic_set_property: * * Decode a driver-private property value and store the decoded value * into the passed-in state structure. Since the atomic core decodes all * standardized properties (even for extensions beyond the core set of * properties which might not be implemented by all drivers) this * requires drivers to subclass the state structure. * * Such driver-private properties should really only be implemented for * truly hardware/vendor specific state. Instead it is preferred to * standardize atomic extension and decode the properties used to expose * such an extension in the core. * * Do not call this function directly, use * drm_atomic_plane_set_property() instead. * * This callback is optional if the driver does not support any * driver-private atomic properties. * * NOTE: * * This function is called in the state assembly phase of atomic * modesets, which can be aborted for any reason (including on * userspace's request to just check whether a configuration would be * possible). Drivers MUST NOT touch any persistent state (hardware or * software) or data structures except the passed in @state parameter. * * Also since userspace controls in which order properties are set this * function must not do any input validation (since the state update is * incomplete and hence likely inconsistent). Instead any such input * validation must be done in the various atomic_check callbacks. * * RETURNS: * * 0 if the property has been found, -EINVAL if the property isn't * implemented by the driver (which shouldn't ever happen, the core only * asks for properties attached to this plane). No other validation is * allowed by the driver. The core already checks that the property * value is within the range (integer, valid enum value, ...) the driver * set when registering the property. */ int (*atomic_set_property)(struct drm_plane *plane, struct drm_plane_state *state, struct drm_property *property, uint64_t val); /** * @atomic_get_property: * * Reads out the decoded driver-private property. This is used to * implement the GETPLANE IOCTL. * * Do not call this function directly, use * drm_atomic_plane_get_property() instead. * * This callback is optional if the driver does not support any * driver-private atomic properties. * * RETURNS: * * 0 on success, -EINVAL if the property isn't implemented by the * driver (which should never happen, the core only asks for * properties attached to this plane). */ int (*atomic_get_property)(struct drm_plane *plane, const struct drm_plane_state *state, struct drm_property *property, uint64_t *val); /** * @late_register: * * This optional hook can be used to register additional userspace * interfaces attached to the plane like debugfs interfaces. * It is called late in the driver load sequence from drm_dev_register(). * Everything added from this callback should be unregistered in * the early_unregister callback. * * Returns: * * 0 on success, or a negative error code on failure. */ int (*late_register)(struct drm_plane *plane); /** * @early_unregister: * * This optional hook should be used to unregister the additional * userspace interfaces attached to the plane from * @late_register. It is called from drm_dev_unregister(), * early in the driver unload sequence to disable userspace access * before data structures are torndown. */ void (*early_unregister)(struct drm_plane *plane); /** * @atomic_print_state: * * If driver subclasses &struct drm_plane_state, it should implement * this optional hook for printing additional driver specific state. * * Do not call this directly, use drm_atomic_plane_print_state() * instead. */ void (*atomic_print_state)(struct drm_printer *p, const struct drm_plane_state *state); /** * @format_mod_supported: * * This optional hook is used for the DRM to determine if the given * format/modifier combination is valid for the plane. This allows the * DRM to generate the correct format bitmask (which formats apply to * which modifier), and to validate modifiers at atomic_check time. * * If not present, then any modifier in the plane's modifier * list is allowed with any of the plane's formats. * * Returns: * * True if the given modifier is valid for that format on the plane. * False otherwise. */ bool (*format_mod_supported)(struct drm_plane *plane, uint32_t format, uint64_t modifier); }; /** * enum drm_plane_type - uapi plane type enumeration * * For historical reasons not all planes are made the same. This enumeration is * used to tell the different types of planes apart to implement the different * uapi semantics for them. For userspace which is universal plane aware and * which is using that atomic IOCTL there's no difference between these planes * (beyong what the driver and hardware can support of course). * * For compatibility with legacy userspace, only overlay planes are made * available to userspace by default. Userspace clients may set the * &DRM_CLIENT_CAP_UNIVERSAL_PLANES client capability bit to indicate that they * wish to receive a universal plane list containing all plane types. See also * drm_for_each_legacy_plane(). * * In addition to setting each plane's type, drivers need to setup the * &drm_crtc.primary and optionally &drm_crtc.cursor pointers for legacy * IOCTLs. See drm_crtc_init_with_planes(). * * WARNING: The values of this enum is UABI since they're exposed in the "type" * property. */ enum drm_plane_type { /** * @DRM_PLANE_TYPE_OVERLAY: * * Overlay planes represent all non-primary, non-cursor planes. Some * drivers refer to these types of planes as "sprites" internally. */ DRM_PLANE_TYPE_OVERLAY, /** * @DRM_PLANE_TYPE_PRIMARY: * * A primary plane attached to a CRTC is the most likely to be able to * light up the CRTC when no scaling/cropping is used and the plane * covers the whole CRTC. */ DRM_PLANE_TYPE_PRIMARY, /** * @DRM_PLANE_TYPE_CURSOR: * * A cursor plane attached to a CRTC is more likely to be able to be * enabled when no scaling/cropping is used and the framebuffer has the * size indicated by &drm_mode_config.cursor_width and * &drm_mode_config.cursor_height. Additionally, if the driver doesn't * support modifiers, the framebuffer should have a linear layout. */ DRM_PLANE_TYPE_CURSOR, }; /** * struct drm_plane - central DRM plane control structure * * Planes represent the scanout hardware of a display block. They receive their * input data from a &drm_framebuffer and feed it to a &drm_crtc. Planes control * the color conversion, see `Plane Composition Properties`_ for more details, * and are also involved in the color conversion of input pixels, see `Color * Management Properties`_ for details on that. */ struct drm_plane { /** @dev: DRM device this plane belongs to */ struct drm_device *dev; /** * @head: * * List of all planes on @dev, linked from &drm_mode_config.plane_list. * Invariant over the lifetime of @dev and therefore does not need * locking. */ struct list_head head; /** @name: human readable name, can be overwritten by the driver */ char *name; /** * @mutex: * * Protects modeset plane state, together with the &drm_crtc.mutex of * CRTC this plane is linked to (when active, getting activated or * getting disabled). * * For atomic drivers specifically this protects @state. */ struct drm_modeset_lock mutex; /** @base: base mode object */ struct drm_mode_object base; /** * @possible_crtcs: pipes this plane can be bound to constructed from * drm_crtc_mask() */ uint32_t possible_crtcs; /** @format_types: array of formats supported by this plane */ uint32_t *format_types; /** @format_count: Size of the array pointed at by @format_types. */ unsigned int format_count; /** * @format_default: driver hasn't supplied supported formats for the * plane. Used by the non-atomic driver compatibility wrapper only. */ bool format_default; /** @modifiers: array of modifiers supported by this plane */ uint64_t *modifiers; /** @modifier_count: Size of the array pointed at by @modifier_count. */ unsigned int modifier_count; /** * @crtc: * * Currently bound CRTC, only meaningful for non-atomic drivers. For * atomic drivers this is forced to be NULL, atomic drivers should * instead check &drm_plane_state.crtc. */ struct drm_crtc *crtc; /** * @fb: * * Currently bound framebuffer, only meaningful for non-atomic drivers. * For atomic drivers this is forced to be NULL, atomic drivers should * instead check &drm_plane_state.fb. */ struct drm_framebuffer *fb; /** * @old_fb: * * Temporary tracking of the old fb while a modeset is ongoing. Only * used by non-atomic drivers, forced to be NULL for atomic drivers. */ struct drm_framebuffer *old_fb; /** @funcs: plane control functions */ const struct drm_plane_funcs *funcs; /** @properties: property tracking for this plane */ struct drm_object_properties properties; /** @type: Type of plane, see &enum drm_plane_type for details. */ enum drm_plane_type type; /** * @index: Position inside the mode_config.list, can be used as an array * index. It is invariant over the lifetime of the plane. */ unsigned index; /** @helper_private: mid-layer private data */ const struct drm_plane_helper_funcs *helper_private; /** * @state: * * Current atomic state for this plane. * * This is protected by @mutex. Note that nonblocking atomic commits * access the current plane state without taking locks. Either by going * through the &struct drm_atomic_state pointers, see * for_each_oldnew_plane_in_state(), for_each_old_plane_in_state() and * for_each_new_plane_in_state(). Or through careful ordering of atomic * commit operations as implemented in the atomic helpers, see * &struct drm_crtc_commit. */ struct drm_plane_state *state; /** * @alpha_property: * Optional alpha property for this plane. See * drm_plane_create_alpha_property(). */ struct drm_property *alpha_property; /** * @zpos_property: * Optional zpos property for this plane. See * drm_plane_create_zpos_property(). */ struct drm_property *zpos_property; /** * @rotation_property: * Optional rotation property for this plane. See * drm_plane_create_rotation_property(). */ struct drm_property *rotation_property; /** * @blend_mode_property: * Optional "pixel blend mode" enum property for this plane. * Blend mode property represents the alpha blending equation selection, * describing how the pixels from the current plane are composited with * the background. */ struct drm_property *blend_mode_property; /** * @color_encoding_property: * * Optional "COLOR_ENCODING" enum property for specifying * color encoding for non RGB formats. * See drm_plane_create_color_properties(). */ struct drm_property *color_encoding_property; /** * @color_range_property: * * Optional "COLOR_RANGE" enum property for specifying * color range for non RGB formats. * See drm_plane_create_color_properties(). */ struct drm_property *color_range_property; /** * @scaling_filter_property: property to apply a particular filter while * scaling. */ struct drm_property *scaling_filter_property; /** * @hotspot_x_property: property to set mouse hotspot x offset. */ struct drm_property *hotspot_x_property; /** * @hotspot_y_property: property to set mouse hotspot y offset. */ struct drm_property *hotspot_y_property; /** * @kmsg_panic: Used to register a panic notifier for this plane */ struct kmsg_dumper kmsg_panic; }; #define obj_to_plane(x) container_of(x, struct drm_plane, base) __printf(9, 10) int drm_universal_plane_init(struct drm_device *dev, struct drm_plane *plane, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, const uint64_t *format_modifiers, enum drm_plane_type type, const char *name, ...); void drm_plane_cleanup(struct drm_plane *plane); __printf(10, 11) void *__drmm_universal_plane_alloc(struct drm_device *dev, size_t size, size_t offset, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, const uint64_t *format_modifiers, enum drm_plane_type plane_type, const char *name, ...); /** * drmm_universal_plane_alloc - Allocate and initialize an universal plane object * @dev: DRM device * @type: the type of the struct which contains struct &drm_plane * @member: the name of the &drm_plane within @type * @possible_crtcs: bitmask of possible CRTCs * @funcs: callbacks for the new plane * @formats: array of supported formats (DRM_FORMAT\_\*) * @format_count: number of elements in @formats * @format_modifiers: array of struct drm_format modifiers terminated by * DRM_FORMAT_MOD_INVALID * @plane_type: type of plane (overlay, primary, cursor) * @name: printf style format string for the plane name, or NULL for default name * * Allocates and initializes a plane object of type @type. Cleanup is * automatically handled through registering drm_plane_cleanup() with * drmm_add_action(). * * The @drm_plane_funcs.destroy hook must be NULL. * * Drivers that only support the DRM_FORMAT_MOD_LINEAR modifier support may set * @format_modifiers to NULL. The plane will advertise the linear modifier. * * Returns: * Pointer to new plane, or ERR_PTR on failure. */ #define drmm_universal_plane_alloc(dev, type, member, possible_crtcs, funcs, formats, \ format_count, format_modifiers, plane_type, name, ...) \ ((type *)__drmm_universal_plane_alloc(dev, sizeof(type), \ offsetof(type, member), \ possible_crtcs, funcs, formats, \ format_count, format_modifiers, \ plane_type, name, ##__VA_ARGS__)) __printf(10, 11) void *__drm_universal_plane_alloc(struct drm_device *dev, size_t size, size_t offset, uint32_t possible_crtcs, const struct drm_plane_funcs *funcs, const uint32_t *formats, unsigned int format_count, const uint64_t *format_modifiers, enum drm_plane_type plane_type, const char *name, ...); /** * drm_universal_plane_alloc() - Allocate and initialize an universal plane object * @dev: DRM device * @type: the type of the struct which contains struct &drm_plane * @member: the name of the &drm_plane within @type * @possible_crtcs: bitmask of possible CRTCs * @funcs: callbacks for the new plane * @formats: array of supported formats (DRM_FORMAT\_\*) * @format_count: number of elements in @formats * @format_modifiers: array of struct drm_format modifiers terminated by * DRM_FORMAT_MOD_INVALID * @plane_type: type of plane (overlay, primary, cursor) * @name: printf style format string for the plane name, or NULL for default name * * Allocates and initializes a plane object of type @type. The caller * is responsible for releasing the allocated memory with kfree(). * * Drivers are encouraged to use drmm_universal_plane_alloc() instead. * * Drivers that only support the DRM_FORMAT_MOD_LINEAR modifier support may set * @format_modifiers to NULL. The plane will advertise the linear modifier. * * Returns: * Pointer to new plane, or ERR_PTR on failure. */ #define drm_universal_plane_alloc(dev, type, member, possible_crtcs, funcs, formats, \ format_count, format_modifiers, plane_type, name, ...) \ ((type *)__drm_universal_plane_alloc(dev, sizeof(type), \ offsetof(type, member), \ possible_crtcs, funcs, formats, \ format_count, format_modifiers, \ plane_type, name, ##__VA_ARGS__)) /** * drm_plane_index - find the index of a registered plane * @plane: plane to find index for * * Given a registered plane, return the index of that plane within a DRM * device's list of planes. */ static inline unsigned int drm_plane_index(const struct drm_plane *plane) { return plane->index; } /** * drm_plane_mask - find the mask of a registered plane * @plane: plane to find mask for */ static inline u32 drm_plane_mask(const struct drm_plane *plane) { return 1 << drm_plane_index(plane); } struct drm_plane * drm_plane_from_index(struct drm_device *dev, int idx); void drm_plane_force_disable(struct drm_plane *plane); int drm_mode_plane_set_obj_prop(struct drm_plane *plane, struct drm_property *property, uint64_t value); /** * drm_plane_find - find a &drm_plane * @dev: DRM device * @file_priv: drm file to check for lease against. * @id: plane id * * Returns the plane with @id, NULL if it doesn't exist. Simple wrapper around * drm_mode_object_find(). */ static inline struct drm_plane *drm_plane_find(struct drm_device *dev, struct drm_file *file_priv, uint32_t id) { struct drm_mode_object *mo; mo = drm_mode_object_find(dev, file_priv, id, DRM_MODE_OBJECT_PLANE); return mo ? obj_to_plane(mo) : NULL; } /** * drm_for_each_plane_mask - iterate over planes specified by bitmask * @plane: the loop cursor * @dev: the DRM device * @plane_mask: bitmask of plane indices * * Iterate over all planes specified by bitmask. */ #define drm_for_each_plane_mask(plane, dev, plane_mask) \ list_for_each_entry((plane), &(dev)->mode_config.plane_list, head) \ for_each_if ((plane_mask) & drm_plane_mask(plane)) /** * drm_for_each_legacy_plane - iterate over all planes for legacy userspace * @plane: the loop cursor * @dev: the DRM device * * Iterate over all legacy planes of @dev, excluding primary and cursor planes. * This is useful for implementing userspace apis when userspace is not * universal plane aware. See also &enum drm_plane_type. */ #define drm_for_each_legacy_plane(plane, dev) \ list_for_each_entry(plane, &(dev)->mode_config.plane_list, head) \ for_each_if (plane->type == DRM_PLANE_TYPE_OVERLAY) /** * drm_for_each_plane - iterate over all planes * @plane: the loop cursor * @dev: the DRM device * * Iterate over all planes of @dev, include primary and cursor planes. */ #define drm_for_each_plane(plane, dev) \ list_for_each_entry(plane, &(dev)->mode_config.plane_list, head) bool drm_plane_has_format(struct drm_plane *plane, u32 format, u64 modifier); bool drm_any_plane_has_format(struct drm_device *dev, u32 format, u64 modifier); void drm_plane_enable_fb_damage_clips(struct drm_plane *plane); unsigned int drm_plane_get_damage_clips_count(const struct drm_plane_state *state); struct drm_mode_rect * drm_plane_get_damage_clips(const struct drm_plane_state *state); int drm_plane_create_scaling_filter_property(struct drm_plane *plane, unsigned int supported_filters); int drm_plane_add_size_hints_property(struct drm_plane *plane, const struct drm_plane_size_hint *hints, int num_hints); #endif |
| 28 1 10 24 24 5 24 11 4 9 28 25 4 27 11 11 2 9 29 29 29 29 27 29 2 2 9 9 9 28 60 19 8 2 19 9 9 7 7 27 27 27 67 6 67 37 33 29 9 27 9 31 1 1 29 29 11 10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 | // SPDX-License-Identifier: GPL-2.0 /* * uprobes-based tracing events * * Copyright (C) IBM Corporation, 2010-2012 * Author: Srikar Dronamraju <srikar@linux.vnet.ibm.com> */ #define pr_fmt(fmt) "trace_uprobe: " fmt #include <linux/bpf-cgroup.h> #include <linux/security.h> #include <linux/ctype.h> #include <linux/module.h> #include <linux/uaccess.h> #include <linux/uprobes.h> #include <linux/namei.h> #include <linux/string.h> #include <linux/rculist.h> #include <linux/filter.h> #include "trace_dynevent.h" #include "trace_probe.h" #include "trace_probe_tmpl.h" #define UPROBE_EVENT_SYSTEM "uprobes" struct uprobe_trace_entry_head { struct trace_entry ent; unsigned long vaddr[]; }; #define SIZEOF_TRACE_ENTRY(is_return) \ (sizeof(struct uprobe_trace_entry_head) + \ sizeof(unsigned long) * (is_return ? 2 : 1)) #define DATAOF_TRACE_ENTRY(entry, is_return) \ ((void*)(entry) + SIZEOF_TRACE_ENTRY(is_return)) static int trace_uprobe_create(const char *raw_command); static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev); static int trace_uprobe_release(struct dyn_event *ev); static bool trace_uprobe_is_busy(struct dyn_event *ev); static bool trace_uprobe_match(const char *system, const char *event, int argc, const char **argv, struct dyn_event *ev); static struct dyn_event_operations trace_uprobe_ops = { .create = trace_uprobe_create, .show = trace_uprobe_show, .is_busy = trace_uprobe_is_busy, .free = trace_uprobe_release, .match = trace_uprobe_match, }; /* * uprobe event core functions */ struct trace_uprobe { struct dyn_event devent; struct uprobe_consumer consumer; struct path path; struct inode *inode; char *filename; unsigned long offset; unsigned long ref_ctr_offset; unsigned long nhit; struct trace_probe tp; }; static bool is_trace_uprobe(struct dyn_event *ev) { return ev->ops == &trace_uprobe_ops; } static struct trace_uprobe *to_trace_uprobe(struct dyn_event *ev) { return container_of(ev, struct trace_uprobe, devent); } /** * for_each_trace_uprobe - iterate over the trace_uprobe list * @pos: the struct trace_uprobe * for each entry * @dpos: the struct dyn_event * to use as a loop cursor */ #define for_each_trace_uprobe(pos, dpos) \ for_each_dyn_event(dpos) \ if (is_trace_uprobe(dpos) && (pos = to_trace_uprobe(dpos))) static int register_uprobe_event(struct trace_uprobe *tu); static int unregister_uprobe_event(struct trace_uprobe *tu); static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs); static int uretprobe_dispatcher(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs); #ifdef CONFIG_STACK_GROWSUP static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) { return addr - (n * sizeof(long)); } #else static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n) { return addr + (n * sizeof(long)); } #endif static unsigned long get_user_stack_nth(struct pt_regs *regs, unsigned int n) { unsigned long ret; unsigned long addr = user_stack_pointer(regs); addr = adjust_stack_addr(addr, n); if (copy_from_user(&ret, (void __force __user *) addr, sizeof(ret))) return 0; return ret; } /* * Uprobes-specific fetch functions */ static nokprobe_inline int probe_mem_read(void *dest, void *src, size_t size) { void __user *vaddr = (void __force __user *)src; return copy_from_user(dest, vaddr, size) ? -EFAULT : 0; } static nokprobe_inline int probe_mem_read_user(void *dest, void *src, size_t size) { return probe_mem_read(dest, src, size); } /* * Fetch a null-terminated string. Caller MUST set *(u32 *)dest with max * length and relative data location. */ static nokprobe_inline int fetch_store_string(unsigned long addr, void *dest, void *base) { long ret; u32 loc = *(u32 *)dest; int maxlen = get_loc_len(loc); u8 *dst = get_loc_data(dest, base); void __user *src = (void __force __user *) addr; if (unlikely(!maxlen)) return -ENOMEM; if (addr == FETCH_TOKEN_COMM) ret = strscpy(dst, current->comm, maxlen); else ret = strncpy_from_user(dst, src, maxlen); if (ret >= 0) { if (ret == maxlen) dst[ret - 1] = '\0'; else /* * Include the terminating null byte. In this case it * was copied by strncpy_from_user but not accounted * for in ret. */ ret++; *(u32 *)dest = make_data_loc(ret, (void *)dst - base); } else *(u32 *)dest = make_data_loc(0, (void *)dst - base); return ret; } static nokprobe_inline int fetch_store_string_user(unsigned long addr, void *dest, void *base) { return fetch_store_string(addr, dest, base); } /* Return the length of string -- including null terminal byte */ static nokprobe_inline int fetch_store_strlen(unsigned long addr) { int len; void __user *vaddr = (void __force __user *) addr; if (addr == FETCH_TOKEN_COMM) len = strlen(current->comm) + 1; else len = strnlen_user(vaddr, MAX_STRING_SIZE); return (len > MAX_STRING_SIZE) ? 0 : len; } static nokprobe_inline int fetch_store_strlen_user(unsigned long addr) { return fetch_store_strlen(addr); } static unsigned long translate_user_vaddr(unsigned long file_offset) { unsigned long base_addr; struct uprobe_dispatch_data *udd; udd = (void *) current->utask->vaddr; base_addr = udd->bp_addr - udd->tu->offset; return base_addr + file_offset; } /* Note that we don't verify it, since the code does not come from user space */ static int process_fetch_insn(struct fetch_insn *code, void *rec, void *edata, void *dest, void *base) { struct pt_regs *regs = rec; unsigned long val; int ret; /* 1st stage: get value from context */ switch (code->op) { case FETCH_OP_REG: val = regs_get_register(regs, code->param); break; case FETCH_OP_STACK: val = get_user_stack_nth(regs, code->param); break; case FETCH_OP_STACKP: val = user_stack_pointer(regs); break; case FETCH_OP_RETVAL: val = regs_return_value(regs); break; case FETCH_OP_COMM: val = FETCH_TOKEN_COMM; break; case FETCH_OP_FOFFS: val = translate_user_vaddr(code->immediate); break; default: ret = process_common_fetch_insn(code, &val); if (ret < 0) return ret; } code++; return process_fetch_insn_bottom(code, val, dest, base); } NOKPROBE_SYMBOL(process_fetch_insn) static inline void init_trace_uprobe_filter(struct trace_uprobe_filter *filter) { rwlock_init(&filter->rwlock); filter->nr_systemwide = 0; INIT_LIST_HEAD(&filter->perf_events); } static inline bool uprobe_filter_is_empty(struct trace_uprobe_filter *filter) { return !filter->nr_systemwide && list_empty(&filter->perf_events); } static inline bool is_ret_probe(struct trace_uprobe *tu) { return tu->consumer.ret_handler != NULL; } static bool trace_uprobe_is_busy(struct dyn_event *ev) { struct trace_uprobe *tu = to_trace_uprobe(ev); return trace_probe_is_enabled(&tu->tp); } static bool trace_uprobe_match_command_head(struct trace_uprobe *tu, int argc, const char **argv) { char buf[MAX_ARGSTR_LEN + 1]; int len; if (!argc) return true; len = strlen(tu->filename); if (strncmp(tu->filename, argv[0], len) || argv[0][len] != ':') return false; if (tu->ref_ctr_offset == 0) snprintf(buf, sizeof(buf), "0x%0*lx", (int)(sizeof(void *) * 2), tu->offset); else snprintf(buf, sizeof(buf), "0x%0*lx(0x%lx)", (int)(sizeof(void *) * 2), tu->offset, tu->ref_ctr_offset); if (strcmp(buf, &argv[0][len + 1])) return false; argc--; argv++; return trace_probe_match_command_args(&tu->tp, argc, argv); } static bool trace_uprobe_match(const char *system, const char *event, int argc, const char **argv, struct dyn_event *ev) { struct trace_uprobe *tu = to_trace_uprobe(ev); return (event[0] == '\0' || strcmp(trace_probe_name(&tu->tp), event) == 0) && (!system || strcmp(trace_probe_group_name(&tu->tp), system) == 0) && trace_uprobe_match_command_head(tu, argc, argv); } static nokprobe_inline struct trace_uprobe * trace_uprobe_primary_from_call(struct trace_event_call *call) { struct trace_probe *tp; tp = trace_probe_primary_from_call(call); if (WARN_ON_ONCE(!tp)) return NULL; return container_of(tp, struct trace_uprobe, tp); } /* * Allocate new trace_uprobe and initialize it (including uprobes). */ static struct trace_uprobe * alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret) { struct trace_uprobe *tu; int ret; tu = kzalloc(struct_size(tu, tp.args, nargs), GFP_KERNEL); if (!tu) return ERR_PTR(-ENOMEM); ret = trace_probe_init(&tu->tp, event, group, true, nargs); if (ret < 0) goto error; dyn_event_init(&tu->devent, &trace_uprobe_ops); tu->consumer.handler = uprobe_dispatcher; if (is_ret) tu->consumer.ret_handler = uretprobe_dispatcher; init_trace_uprobe_filter(tu->tp.event->filter); return tu; error: kfree(tu); return ERR_PTR(ret); } static void free_trace_uprobe(struct trace_uprobe *tu) { if (!tu) return; path_put(&tu->path); trace_probe_cleanup(&tu->tp); kfree(tu->filename); kfree(tu); } static struct trace_uprobe *find_probe_event(const char *event, const char *group) { struct dyn_event *pos; struct trace_uprobe *tu; for_each_trace_uprobe(tu, pos) if (strcmp(trace_probe_name(&tu->tp), event) == 0 && strcmp(trace_probe_group_name(&tu->tp), group) == 0) return tu; return NULL; } /* Unregister a trace_uprobe and probe_event */ static int unregister_trace_uprobe(struct trace_uprobe *tu) { int ret; if (trace_probe_has_sibling(&tu->tp)) goto unreg; /* If there's a reference to the dynamic event */ if (trace_event_dyn_busy(trace_probe_event_call(&tu->tp))) return -EBUSY; ret = unregister_uprobe_event(tu); if (ret) return ret; unreg: dyn_event_remove(&tu->devent); trace_probe_unlink(&tu->tp); free_trace_uprobe(tu); return 0; } static bool trace_uprobe_has_same_uprobe(struct trace_uprobe *orig, struct trace_uprobe *comp) { struct trace_probe_event *tpe = orig->tp.event; struct inode *comp_inode = d_real_inode(comp->path.dentry); int i; list_for_each_entry(orig, &tpe->probes, tp.list) { if (comp_inode != d_real_inode(orig->path.dentry) || comp->offset != orig->offset) continue; /* * trace_probe_compare_arg_type() ensured that nr_args and * each argument name and type are same. Let's compare comm. */ for (i = 0; i < orig->tp.nr_args; i++) { if (strcmp(orig->tp.args[i].comm, comp->tp.args[i].comm)) break; } if (i == orig->tp.nr_args) return true; } return false; } static int append_trace_uprobe(struct trace_uprobe *tu, struct trace_uprobe *to) { int ret; ret = trace_probe_compare_arg_type(&tu->tp, &to->tp); if (ret) { /* Note that argument starts index = 2 */ trace_probe_log_set_index(ret + 1); trace_probe_log_err(0, DIFF_ARG_TYPE); return -EEXIST; } if (trace_uprobe_has_same_uprobe(to, tu)) { trace_probe_log_set_index(0); trace_probe_log_err(0, SAME_PROBE); return -EEXIST; } /* Append to existing event */ ret = trace_probe_append(&tu->tp, &to->tp); if (!ret) dyn_event_add(&tu->devent, trace_probe_event_call(&tu->tp)); return ret; } /* * Uprobe with multiple reference counter is not allowed. i.e. * If inode and offset matches, reference counter offset *must* * match as well. Though, there is one exception: If user is * replacing old trace_uprobe with new one(same group/event), * then we allow same uprobe with new reference counter as far * as the new one does not conflict with any other existing * ones. */ static int validate_ref_ctr_offset(struct trace_uprobe *new) { struct dyn_event *pos; struct trace_uprobe *tmp; struct inode *new_inode = d_real_inode(new->path.dentry); for_each_trace_uprobe(tmp, pos) { if (new_inode == d_real_inode(tmp->path.dentry) && new->offset == tmp->offset && new->ref_ctr_offset != tmp->ref_ctr_offset) { pr_warn("Reference counter offset mismatch."); return -EINVAL; } } return 0; } /* Register a trace_uprobe and probe_event */ static int register_trace_uprobe(struct trace_uprobe *tu) { struct trace_uprobe *old_tu; int ret; mutex_lock(&event_mutex); ret = validate_ref_ctr_offset(tu); if (ret) goto end; /* register as an event */ old_tu = find_probe_event(trace_probe_name(&tu->tp), trace_probe_group_name(&tu->tp)); if (old_tu) { if (is_ret_probe(tu) != is_ret_probe(old_tu)) { trace_probe_log_set_index(0); trace_probe_log_err(0, DIFF_PROBE_TYPE); ret = -EEXIST; } else { ret = append_trace_uprobe(tu, old_tu); } goto end; } ret = register_uprobe_event(tu); if (ret) { if (ret == -EEXIST) { trace_probe_log_set_index(0); trace_probe_log_err(0, EVENT_EXIST); } else pr_warn("Failed to register probe event(%d)\n", ret); goto end; } dyn_event_add(&tu->devent, trace_probe_event_call(&tu->tp)); end: mutex_unlock(&event_mutex); return ret; } /* * Argument syntax: * - Add uprobe: p|r[:[GRP/][EVENT]] PATH:OFFSET[%return][(REF)] [FETCHARGS] */ static int __trace_uprobe_create(int argc, const char **argv) { struct trace_uprobe *tu; const char *event = NULL, *group = UPROBE_EVENT_SYSTEM; char *arg, *filename, *rctr, *rctr_end, *tmp; char buf[MAX_EVENT_NAME_LEN]; char gbuf[MAX_EVENT_NAME_LEN]; enum probe_print_type ptype; struct path path; unsigned long offset, ref_ctr_offset; bool is_return = false; int i, ret; ref_ctr_offset = 0; switch (argv[0][0]) { case 'r': is_return = true; break; case 'p': break; default: return -ECANCELED; } if (argc < 2) return -ECANCELED; if (argv[0][1] == ':') event = &argv[0][2]; if (!strchr(argv[1], '/')) return -ECANCELED; filename = kstrdup(argv[1], GFP_KERNEL); if (!filename) return -ENOMEM; /* Find the last occurrence, in case the path contains ':' too. */ arg = strrchr(filename, ':'); if (!arg || !isdigit(arg[1])) { kfree(filename); return -ECANCELED; } trace_probe_log_init("trace_uprobe", argc, argv); trace_probe_log_set_index(1); /* filename is the 2nd argument */ *arg++ = '\0'; ret = kern_path(filename, LOOKUP_FOLLOW, &path); if (ret) { trace_probe_log_err(0, FILE_NOT_FOUND); kfree(filename); trace_probe_log_clear(); return ret; } if (!d_is_reg(path.dentry)) { trace_probe_log_err(0, NO_REGULAR_FILE); ret = -EINVAL; goto fail_address_parse; } /* Parse reference counter offset if specified. */ rctr = strchr(arg, '('); if (rctr) { rctr_end = strchr(rctr, ')'); if (!rctr_end) { ret = -EINVAL; rctr_end = rctr + strlen(rctr); trace_probe_log_err(rctr_end - filename, REFCNT_OPEN_BRACE); goto fail_address_parse; } else if (rctr_end[1] != '\0') { ret = -EINVAL; trace_probe_log_err(rctr_end + 1 - filename, BAD_REFCNT_SUFFIX); goto fail_address_parse; } *rctr++ = '\0'; *rctr_end = '\0'; ret = kstrtoul(rctr, 0, &ref_ctr_offset); if (ret) { trace_probe_log_err(rctr - filename, BAD_REFCNT); goto fail_address_parse; } } /* Check if there is %return suffix */ tmp = strchr(arg, '%'); if (tmp) { if (!strcmp(tmp, "%return")) { *tmp = '\0'; is_return = true; } else { trace_probe_log_err(tmp - filename, BAD_ADDR_SUFFIX); ret = -EINVAL; goto fail_address_parse; } } /* Parse uprobe offset. */ ret = kstrtoul(arg, 0, &offset); if (ret) { trace_probe_log_err(arg - filename, BAD_UPROBE_OFFS); goto fail_address_parse; } /* setup a probe */ trace_probe_log_set_index(0); if (event) { ret = traceprobe_parse_event_name(&event, &group, gbuf, event - argv[0]); if (ret) goto fail_address_parse; } if (!event) { char *tail; char *ptr; tail = kstrdup(kbasename(filename), GFP_KERNEL); if (!tail) { ret = -ENOMEM; goto fail_address_parse; } ptr = strpbrk(tail, ".-_"); if (ptr) *ptr = '\0'; snprintf(buf, MAX_EVENT_NAME_LEN, "%c_%s_0x%lx", 'p', tail, offset); event = buf; kfree(tail); } argc -= 2; argv += 2; tu = alloc_trace_uprobe(group, event, argc, is_return); if (IS_ERR(tu)) { ret = PTR_ERR(tu); /* This must return -ENOMEM otherwise there is a bug */ WARN_ON_ONCE(ret != -ENOMEM); goto fail_address_parse; } tu->offset = offset; tu->ref_ctr_offset = ref_ctr_offset; tu->path = path; tu->filename = filename; /* parse arguments */ for (i = 0; i < argc && i < MAX_TRACE_ARGS; i++) { struct traceprobe_parse_context ctx = { .flags = (is_return ? TPARG_FL_RETURN : 0) | TPARG_FL_USER, }; trace_probe_log_set_index(i + 2); ret = traceprobe_parse_probe_arg(&tu->tp, i, argv[i], &ctx); traceprobe_finish_parse(&ctx); if (ret) goto error; } ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL; ret = traceprobe_set_print_fmt(&tu->tp, ptype); if (ret < 0) goto error; ret = register_trace_uprobe(tu); if (!ret) goto out; error: free_trace_uprobe(tu); out: trace_probe_log_clear(); return ret; fail_address_parse: trace_probe_log_clear(); path_put(&path); kfree(filename); return ret; } int trace_uprobe_create(const char *raw_command) { return trace_probe_create(raw_command, __trace_uprobe_create); } static int create_or_delete_trace_uprobe(const char *raw_command) { int ret; if (raw_command[0] == '-') return dyn_event_release(raw_command, &trace_uprobe_ops); ret = trace_uprobe_create(raw_command); return ret == -ECANCELED ? -EINVAL : ret; } static int trace_uprobe_release(struct dyn_event *ev) { struct trace_uprobe *tu = to_trace_uprobe(ev); return unregister_trace_uprobe(tu); } /* Probes listing interfaces */ static int trace_uprobe_show(struct seq_file *m, struct dyn_event *ev) { struct trace_uprobe *tu = to_trace_uprobe(ev); char c = is_ret_probe(tu) ? 'r' : 'p'; int i; seq_printf(m, "%c:%s/%s %s:0x%0*lx", c, trace_probe_group_name(&tu->tp), trace_probe_name(&tu->tp), tu->filename, (int)(sizeof(void *) * 2), tu->offset); if (tu->ref_ctr_offset) seq_printf(m, "(0x%lx)", tu->ref_ctr_offset); for (i = 0; i < tu->tp.nr_args; i++) seq_printf(m, " %s=%s", tu->tp.args[i].name, tu->tp.args[i].comm); seq_putc(m, '\n'); return 0; } static int probes_seq_show(struct seq_file *m, void *v) { struct dyn_event *ev = v; if (!is_trace_uprobe(ev)) return 0; return trace_uprobe_show(m, ev); } static const struct seq_operations probes_seq_op = { .start = dyn_event_seq_start, .next = dyn_event_seq_next, .stop = dyn_event_seq_stop, .show = probes_seq_show }; static int probes_open(struct inode *inode, struct file *file) { int ret; ret = security_locked_down(LOCKDOWN_TRACEFS); if (ret) return ret; if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) { ret = dyn_events_release_all(&trace_uprobe_ops); if (ret) return ret; } return seq_open(file, &probes_seq_op); } static ssize_t probes_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { return trace_parse_run_command(file, buffer, count, ppos, create_or_delete_trace_uprobe); } static const struct file_operations uprobe_events_ops = { .owner = THIS_MODULE, .open = probes_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, .write = probes_write, }; /* Probes profiling interfaces */ static int probes_profile_seq_show(struct seq_file *m, void *v) { struct dyn_event *ev = v; struct trace_uprobe *tu; if (!is_trace_uprobe(ev)) return 0; tu = to_trace_uprobe(ev); seq_printf(m, " %s %-44s %15lu\n", tu->filename, trace_probe_name(&tu->tp), tu->nhit); return 0; } static const struct seq_operations profile_seq_op = { .start = dyn_event_seq_start, .next = dyn_event_seq_next, .stop = dyn_event_seq_stop, .show = probes_profile_seq_show }; static int profile_open(struct inode *inode, struct file *file) { int ret; ret = security_locked_down(LOCKDOWN_TRACEFS); if (ret) return ret; return seq_open(file, &profile_seq_op); } static const struct file_operations uprobe_profile_ops = { .owner = THIS_MODULE, .open = profile_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release, }; struct uprobe_cpu_buffer { struct mutex mutex; void *buf; int dsize; }; static struct uprobe_cpu_buffer __percpu *uprobe_cpu_buffer; static int uprobe_buffer_refcnt; static int uprobe_buffer_init(void) { int cpu, err_cpu; uprobe_cpu_buffer = alloc_percpu(struct uprobe_cpu_buffer); if (uprobe_cpu_buffer == NULL) return -ENOMEM; for_each_possible_cpu(cpu) { struct page *p = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, 0); if (p == NULL) { err_cpu = cpu; goto err; } per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf = page_address(p); mutex_init(&per_cpu_ptr(uprobe_cpu_buffer, cpu)->mutex); } return 0; err: for_each_possible_cpu(cpu) { if (cpu == err_cpu) break; free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf); } free_percpu(uprobe_cpu_buffer); return -ENOMEM; } static int uprobe_buffer_enable(void) { int ret = 0; BUG_ON(!mutex_is_locked(&event_mutex)); if (uprobe_buffer_refcnt++ == 0) { ret = uprobe_buffer_init(); if (ret < 0) uprobe_buffer_refcnt--; } return ret; } static void uprobe_buffer_disable(void) { int cpu; BUG_ON(!mutex_is_locked(&event_mutex)); if (--uprobe_buffer_refcnt == 0) { for_each_possible_cpu(cpu) free_page((unsigned long)per_cpu_ptr(uprobe_cpu_buffer, cpu)->buf); free_percpu(uprobe_cpu_buffer); uprobe_cpu_buffer = NULL; } } static struct uprobe_cpu_buffer *uprobe_buffer_get(void) { struct uprobe_cpu_buffer *ucb; int cpu; cpu = raw_smp_processor_id(); ucb = per_cpu_ptr(uprobe_cpu_buffer, cpu); /* * Use per-cpu buffers for fastest access, but we might migrate * so the mutex makes sure we have sole access to it. */ mutex_lock(&ucb->mutex); return ucb; } static void uprobe_buffer_put(struct uprobe_cpu_buffer *ucb) { if (!ucb) return; mutex_unlock(&ucb->mutex); } static struct uprobe_cpu_buffer *prepare_uprobe_buffer(struct trace_uprobe *tu, struct pt_regs *regs, struct uprobe_cpu_buffer **ucbp) { struct uprobe_cpu_buffer *ucb; int dsize, esize; if (*ucbp) return *ucbp; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); dsize = __get_data_size(&tu->tp, regs, NULL); ucb = uprobe_buffer_get(); ucb->dsize = tu->tp.size + dsize; store_trace_args(ucb->buf, &tu->tp, regs, NULL, esize, dsize); *ucbp = ucb; return ucb; } static void __uprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, struct uprobe_cpu_buffer *ucb, struct trace_event_file *trace_file) { struct uprobe_trace_entry_head *entry; struct trace_event_buffer fbuffer; void *data; int size, esize; struct trace_event_call *call = trace_probe_event_call(&tu->tp); WARN_ON(call != trace_file->event_call); if (WARN_ON_ONCE(ucb->dsize > PAGE_SIZE)) return; if (trace_trigger_soft_disabled(trace_file)) return; esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); size = esize + ucb->dsize; entry = trace_event_buffer_reserve(&fbuffer, trace_file, size); if (!entry) return; if (is_ret_probe(tu)) { entry->vaddr[0] = func; entry->vaddr[1] = instruction_pointer(regs); data = DATAOF_TRACE_ENTRY(entry, true); } else { entry->vaddr[0] = instruction_pointer(regs); data = DATAOF_TRACE_ENTRY(entry, false); } memcpy(data, ucb->buf, ucb->dsize); trace_event_buffer_commit(&fbuffer); } /* uprobe handler */ static int uprobe_trace_func(struct trace_uprobe *tu, struct pt_regs *regs, struct uprobe_cpu_buffer **ucbp) { struct event_file_link *link; struct uprobe_cpu_buffer *ucb; if (is_ret_probe(tu)) return 0; ucb = prepare_uprobe_buffer(tu, regs, ucbp); rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) __uprobe_trace_func(tu, 0, regs, ucb, link->file); rcu_read_unlock(); return 0; } static void uretprobe_trace_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, struct uprobe_cpu_buffer **ucbp) { struct event_file_link *link; struct uprobe_cpu_buffer *ucb; ucb = prepare_uprobe_buffer(tu, regs, ucbp); rcu_read_lock(); trace_probe_for_each_link_rcu(link, &tu->tp) __uprobe_trace_func(tu, func, regs, ucb, link->file); rcu_read_unlock(); } /* Event entry printers */ static enum print_line_t print_uprobe_event(struct trace_iterator *iter, int flags, struct trace_event *event) { struct uprobe_trace_entry_head *entry; struct trace_seq *s = &iter->seq; struct trace_uprobe *tu; u8 *data; entry = (struct uprobe_trace_entry_head *)iter->ent; tu = trace_uprobe_primary_from_call( container_of(event, struct trace_event_call, event)); if (unlikely(!tu)) goto out; if (is_ret_probe(tu)) { trace_seq_printf(s, "%s: (0x%lx <- 0x%lx)", trace_probe_name(&tu->tp), entry->vaddr[1], entry->vaddr[0]); data = DATAOF_TRACE_ENTRY(entry, true); } else { trace_seq_printf(s, "%s: (0x%lx)", trace_probe_name(&tu->tp), entry->vaddr[0]); data = DATAOF_TRACE_ENTRY(entry, false); } if (trace_probe_print_args(s, tu->tp.args, tu->tp.nr_args, data, entry) < 0) goto out; trace_seq_putc(s, '\n'); out: return trace_handle_return(s); } typedef bool (*filter_func_t)(struct uprobe_consumer *self, enum uprobe_filter_ctx ctx, struct mm_struct *mm); static int trace_uprobe_enable(struct trace_uprobe *tu, filter_func_t filter) { int ret; tu->consumer.filter = filter; tu->inode = d_real_inode(tu->path.dentry); if (tu->ref_ctr_offset) ret = uprobe_register_refctr(tu->inode, tu->offset, tu->ref_ctr_offset, &tu->consumer); else ret = uprobe_register(tu->inode, tu->offset, &tu->consumer); if (ret) tu->inode = NULL; return ret; } static void __probe_event_disable(struct trace_probe *tp) { struct trace_uprobe *tu; tu = container_of(tp, struct trace_uprobe, tp); WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { if (!tu->inode) continue; uprobe_unregister(tu->inode, tu->offset, &tu->consumer); tu->inode = NULL; } } static int probe_event_enable(struct trace_event_call *call, struct trace_event_file *file, filter_func_t filter) { struct trace_probe *tp; struct trace_uprobe *tu; bool enabled; int ret; tp = trace_probe_primary_from_call(call); if (WARN_ON_ONCE(!tp)) return -ENODEV; enabled = trace_probe_is_enabled(tp); /* This may also change "enabled" state */ if (file) { if (trace_probe_test_flag(tp, TP_FLAG_PROFILE)) return -EINTR; ret = trace_probe_add_file(tp, file); if (ret < 0) return ret; } else { if (trace_probe_test_flag(tp, TP_FLAG_TRACE)) return -EINTR; trace_probe_set_flag(tp, TP_FLAG_PROFILE); } tu = container_of(tp, struct trace_uprobe, tp); WARN_ON(!uprobe_filter_is_empty(tu->tp.event->filter)); if (enabled) return 0; ret = uprobe_buffer_enable(); if (ret) goto err_flags; list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { ret = trace_uprobe_enable(tu, filter); if (ret) { __probe_event_disable(tp); goto err_buffer; } } return 0; err_buffer: uprobe_buffer_disable(); err_flags: if (file) trace_probe_remove_file(tp, file); else trace_probe_clear_flag(tp, TP_FLAG_PROFILE); return ret; } static void probe_event_disable(struct trace_event_call *call, struct trace_event_file *file) { struct trace_probe *tp; tp = trace_probe_primary_from_call(call); if (WARN_ON_ONCE(!tp)) return; if (!trace_probe_is_enabled(tp)) return; if (file) { if (trace_probe_remove_file(tp, file) < 0) return; if (trace_probe_is_enabled(tp)) return; } else trace_probe_clear_flag(tp, TP_FLAG_PROFILE); __probe_event_disable(tp); uprobe_buffer_disable(); } static int uprobe_event_define_fields(struct trace_event_call *event_call) { int ret, size; struct uprobe_trace_entry_head field; struct trace_uprobe *tu; tu = trace_uprobe_primary_from_call(event_call); if (unlikely(!tu)) return -ENODEV; if (is_ret_probe(tu)) { DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_FUNC, 0); DEFINE_FIELD(unsigned long, vaddr[1], FIELD_STRING_RETIP, 0); size = SIZEOF_TRACE_ENTRY(true); } else { DEFINE_FIELD(unsigned long, vaddr[0], FIELD_STRING_IP, 0); size = SIZEOF_TRACE_ENTRY(false); } return traceprobe_define_arg_fields(event_call, size, &tu->tp); } #ifdef CONFIG_PERF_EVENTS static bool __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm) { struct perf_event *event; list_for_each_entry(event, &filter->perf_events, hw.tp_list) { if (event->hw.target->mm == mm) return true; } return false; } static inline bool trace_uprobe_filter_event(struct trace_uprobe_filter *filter, struct perf_event *event) { return __uprobe_perf_filter(filter, event->hw.target->mm); } static bool trace_uprobe_filter_remove(struct trace_uprobe_filter *filter, struct perf_event *event) { bool done; write_lock(&filter->rwlock); if (event->hw.target) { list_del(&event->hw.tp_list); done = filter->nr_systemwide || (event->hw.target->flags & PF_EXITING) || trace_uprobe_filter_event(filter, event); } else { filter->nr_systemwide--; done = filter->nr_systemwide; } write_unlock(&filter->rwlock); return done; } /* This returns true if the filter always covers target mm */ static bool trace_uprobe_filter_add(struct trace_uprobe_filter *filter, struct perf_event *event) { bool done; write_lock(&filter->rwlock); if (event->hw.target) { /* * event->parent != NULL means copy_process(), we can avoid * uprobe_apply(). current->mm must be probed and we can rely * on dup_mmap() which preserves the already installed bp's. * * attr.enable_on_exec means that exec/mmap will install the * breakpoints we need. */ done = filter->nr_systemwide || event->parent || event->attr.enable_on_exec || trace_uprobe_filter_event(filter, event); list_add(&event->hw.tp_list, &filter->perf_events); } else { done = filter->nr_systemwide; filter->nr_systemwide++; } write_unlock(&filter->rwlock); return done; } static int uprobe_perf_close(struct trace_event_call *call, struct perf_event *event) { struct trace_probe *tp; struct trace_uprobe *tu; int ret = 0; tp = trace_probe_primary_from_call(call); if (WARN_ON_ONCE(!tp)) return -ENODEV; tu = container_of(tp, struct trace_uprobe, tp); if (trace_uprobe_filter_remove(tu->tp.event->filter, event)) return 0; list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { ret = uprobe_apply(tu->inode, tu->offset, &tu->consumer, false); if (ret) break; } return ret; } static int uprobe_perf_open(struct trace_event_call *call, struct perf_event *event) { struct trace_probe *tp; struct trace_uprobe *tu; int err = 0; tp = trace_probe_primary_from_call(call); if (WARN_ON_ONCE(!tp)) return -ENODEV; tu = container_of(tp, struct trace_uprobe, tp); if (trace_uprobe_filter_add(tu->tp.event->filter, event)) return 0; list_for_each_entry(tu, trace_probe_probe_list(tp), tp.list) { err = uprobe_apply(tu->inode, tu->offset, &tu->consumer, true); if (err) { uprobe_perf_close(call, event); break; } } return err; } static bool uprobe_perf_filter(struct uprobe_consumer *uc, enum uprobe_filter_ctx ctx, struct mm_struct *mm) { struct trace_uprobe_filter *filter; struct trace_uprobe *tu; int ret; tu = container_of(uc, struct trace_uprobe, consumer); filter = tu->tp.event->filter; /* * speculative short-circuiting check to avoid unnecessarily taking * filter->rwlock below, if the uprobe has system-wide consumer */ if (READ_ONCE(filter->nr_systemwide)) return true; read_lock(&filter->rwlock); ret = __uprobe_perf_filter(filter, mm); read_unlock(&filter->rwlock); return ret; } static void __uprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, struct uprobe_cpu_buffer **ucbp) { struct trace_event_call *call = trace_probe_event_call(&tu->tp); struct uprobe_trace_entry_head *entry; struct uprobe_cpu_buffer *ucb; struct hlist_head *head; void *data; int size, esize; int rctx; #ifdef CONFIG_BPF_EVENTS if (bpf_prog_array_valid(call)) { u32 ret; ret = bpf_prog_run_array_uprobe(call->prog_array, regs, bpf_prog_run); if (!ret) return; } #endif /* CONFIG_BPF_EVENTS */ esize = SIZEOF_TRACE_ENTRY(is_ret_probe(tu)); ucb = prepare_uprobe_buffer(tu, regs, ucbp); size = esize + ucb->dsize; size = ALIGN(size + sizeof(u32), sizeof(u64)) - sizeof(u32); if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE, "profile buffer not large enough")) return; preempt_disable(); head = this_cpu_ptr(call->perf_events); if (hlist_empty(head)) goto out; entry = perf_trace_buf_alloc(size, NULL, &rctx); if (!entry) goto out; if (is_ret_probe(tu)) { entry->vaddr[0] = func; entry->vaddr[1] = instruction_pointer(regs); data = DATAOF_TRACE_ENTRY(entry, true); } else { entry->vaddr[0] = instruction_pointer(regs); data = DATAOF_TRACE_ENTRY(entry, false); } memcpy(data, ucb->buf, ucb->dsize); if (size - esize > ucb->dsize) memset(data + ucb->dsize, 0, size - esize - ucb->dsize); perf_trace_buf_submit(entry, size, rctx, call->event.type, 1, regs, head, NULL); out: preempt_enable(); } /* uprobe profile handler */ static int uprobe_perf_func(struct trace_uprobe *tu, struct pt_regs *regs, struct uprobe_cpu_buffer **ucbp) { if (!uprobe_perf_filter(&tu->consumer, 0, current->mm)) return UPROBE_HANDLER_REMOVE; if (!is_ret_probe(tu)) __uprobe_perf_func(tu, 0, regs, ucbp); return 0; } static void uretprobe_perf_func(struct trace_uprobe *tu, unsigned long func, struct pt_regs *regs, struct uprobe_cpu_buffer **ucbp) { __uprobe_perf_func(tu, func, regs, ucbp); } int bpf_get_uprobe_info(const struct perf_event *event, u32 *fd_type, const char **filename, u64 *probe_offset, u64 *probe_addr, bool perf_type_tracepoint) { const char *pevent = trace_event_name(event->tp_event); const char *group = event->tp_event->class->system; struct trace_uprobe *tu; if (perf_type_tracepoint) tu = find_probe_event(pevent, group); else tu = trace_uprobe_primary_from_call(event->tp_event); if (!tu) return -EINVAL; *fd_type = is_ret_probe(tu) ? BPF_FD_TYPE_URETPROBE : BPF_FD_TYPE_UPROBE; *filename = tu->filename; *probe_offset = tu->offset; *probe_addr = 0; return 0; } #endif /* CONFIG_PERF_EVENTS */ static int trace_uprobe_register(struct trace_event_call *event, enum trace_reg type, void *data) { struct trace_event_file *file = data; switch (type) { case TRACE_REG_REGISTER: return probe_event_enable(event, file, NULL); case TRACE_REG_UNREGISTER: probe_event_disable(event, file); return 0; #ifdef CONFIG_PERF_EVENTS case TRACE_REG_PERF_REGISTER: return probe_event_enable(event, NULL, uprobe_perf_filter); case TRACE_REG_PERF_UNREGISTER: probe_event_disable(event, NULL); return 0; case TRACE_REG_PERF_OPEN: return uprobe_perf_open(event, data); case TRACE_REG_PERF_CLOSE: return uprobe_perf_close(event, data); #endif default: return 0; } } static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs) { struct trace_uprobe *tu; struct uprobe_dispatch_data udd; struct uprobe_cpu_buffer *ucb = NULL; int ret = 0; tu = container_of(con, struct trace_uprobe, consumer); tu->nhit++; udd.tu = tu; udd.bp_addr = instruction_pointer(regs); current->utask->vaddr = (unsigned long) &udd; if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) ret |= uprobe_trace_func(tu, regs, &ucb); #ifdef CONFIG_PERF_EVENTS if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) ret |= uprobe_perf_func(tu, regs, &ucb); #endif uprobe_buffer_put(ucb); return ret; } static int uretprobe_dispatcher(struct uprobe_consumer *con, unsigned long func, struct pt_regs *regs) { struct trace_uprobe *tu; struct uprobe_dispatch_data udd; struct uprobe_cpu_buffer *ucb = NULL; tu = container_of(con, struct trace_uprobe, consumer); udd.tu = tu; udd.bp_addr = func; current->utask->vaddr = (unsigned long) &udd; if (WARN_ON_ONCE(!uprobe_cpu_buffer)) return 0; if (trace_probe_test_flag(&tu->tp, TP_FLAG_TRACE)) uretprobe_trace_func(tu, func, regs, &ucb); #ifdef CONFIG_PERF_EVENTS if (trace_probe_test_flag(&tu->tp, TP_FLAG_PROFILE)) uretprobe_perf_func(tu, func, regs, &ucb); #endif uprobe_buffer_put(ucb); return 0; } static struct trace_event_functions uprobe_funcs = { .trace = print_uprobe_event }; static struct trace_event_fields uprobe_fields_array[] = { { .type = TRACE_FUNCTION_TYPE, .define_fields = uprobe_event_define_fields }, {} }; static inline void init_trace_event_call(struct trace_uprobe *tu) { struct trace_event_call *call = trace_probe_event_call(&tu->tp); call->event.funcs = &uprobe_funcs; call->class->fields_array = uprobe_fields_array; call->flags = TRACE_EVENT_FL_UPROBE | TRACE_EVENT_FL_CAP_ANY; call->class->reg = trace_uprobe_register; } static int register_uprobe_event(struct trace_uprobe *tu) { init_trace_event_call(tu); return trace_probe_register_event_call(&tu->tp); } static int unregister_uprobe_event(struct trace_uprobe *tu) { return trace_probe_unregister_event_call(&tu->tp); } #ifdef CONFIG_PERF_EVENTS struct trace_event_call * create_local_trace_uprobe(char *name, unsigned long offs, unsigned long ref_ctr_offset, bool is_return) { enum probe_print_type ptype; struct trace_uprobe *tu; struct path path; int ret; ret = kern_path(name, LOOKUP_FOLLOW, &path); if (ret) return ERR_PTR(ret); if (!d_is_reg(path.dentry)) { path_put(&path); return ERR_PTR(-EINVAL); } /* * local trace_kprobes are not added to dyn_event, so they are never * searched in find_trace_kprobe(). Therefore, there is no concern of * duplicated name "DUMMY_EVENT" here. */ tu = alloc_trace_uprobe(UPROBE_EVENT_SYSTEM, "DUMMY_EVENT", 0, is_return); if (IS_ERR(tu)) { pr_info("Failed to allocate trace_uprobe.(%d)\n", (int)PTR_ERR(tu)); path_put(&path); return ERR_CAST(tu); } tu->offset = offs; tu->path = path; tu->ref_ctr_offset = ref_ctr_offset; tu->filename = kstrdup(name, GFP_KERNEL); if (!tu->filename) { ret = -ENOMEM; goto error; } init_trace_event_call(tu); ptype = is_ret_probe(tu) ? PROBE_PRINT_RETURN : PROBE_PRINT_NORMAL; if (traceprobe_set_print_fmt(&tu->tp, ptype) < 0) { ret = -ENOMEM; goto error; } return trace_probe_event_call(&tu->tp); error: free_trace_uprobe(tu); return ERR_PTR(ret); } void destroy_local_trace_uprobe(struct trace_event_call *event_call) { struct trace_uprobe *tu; tu = trace_uprobe_primary_from_call(event_call); free_trace_uprobe(tu); } #endif /* CONFIG_PERF_EVENTS */ /* Make a trace interface for controlling probe points */ static __init int init_uprobe_trace(void) { int ret; ret = dyn_event_register(&trace_uprobe_ops); if (ret) return ret; ret = tracing_init_dentry(); if (ret) return 0; trace_create_file("uprobe_events", TRACE_MODE_WRITE, NULL, NULL, &uprobe_events_ops); /* Profile interface */ trace_create_file("uprobe_profile", TRACE_MODE_READ, NULL, NULL, &uprobe_profile_ops); return 0; } fs_initcall(init_uprobe_trace); |
| 371 539 373 612 637 553 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2007 Oracle. All rights reserved. */ #include <asm/unaligned.h> #include "messages.h" #include "extent_io.h" #include "fs.h" #include "accessors.h" static bool check_setget_bounds(const struct extent_buffer *eb, const void *ptr, unsigned off, int size) { const unsigned long member_offset = (unsigned long)ptr + off; if (unlikely(member_offset + size > eb->len)) { btrfs_warn(eb->fs_info, "bad eb member %s: ptr 0x%lx start %llu member offset %lu size %d", (member_offset > eb->len ? "start" : "end"), (unsigned long)ptr, eb->start, member_offset, size); return false; } return true; } void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb) { token->eb = eb; token->kaddr = folio_address(eb->folios[0]); token->offset = 0; } /* * Macro templates that define helpers to read/write extent buffer data of a * given size, that are also used via ctree.h for access to item members by * specialized helpers. * * Generic helpers: * - btrfs_set_8 (for 8/16/32/64) * - btrfs_get_8 (for 8/16/32/64) * * Generic helpers with a token (cached address of the most recently accessed * page): * - btrfs_set_token_8 (for 8/16/32/64) * - btrfs_get_token_8 (for 8/16/32/64) * * The set/get functions handle data spanning two pages transparently, in case * metadata block size is larger than page. Every pointer to metadata items is * an offset into the extent buffer page array, cast to a specific type. This * gives us all the type checking. * * The extent buffer pages stored in the array folios may not form a contiguous * phyusical range, but the API functions assume the linear offset to the range * from 0 to metadata node size. */ #define DEFINE_BTRFS_SETGET_BITS(bits) \ u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \ const void *ptr, unsigned long off) \ { \ const unsigned long member_offset = (unsigned long)ptr + off; \ const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \ const unsigned long oil = get_eb_offset_in_folio(token->eb, \ member_offset);\ const int unit_size = token->eb->folio_size; \ const int unit_shift = token->eb->folio_shift; \ const int size = sizeof(u##bits); \ u8 lebytes[sizeof(u##bits)]; \ const int part = unit_size - oil; \ \ ASSERT(token); \ ASSERT(token->kaddr); \ ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \ if (token->offset <= member_offset && \ member_offset + size <= token->offset + unit_size) { \ return get_unaligned_le##bits(token->kaddr + oil); \ } \ token->kaddr = folio_address(token->eb->folios[idx]); \ token->offset = idx << unit_shift; \ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oil + size <= unit_size) \ return get_unaligned_le##bits(token->kaddr + oil); \ \ memcpy(lebytes, token->kaddr + oil, part); \ token->kaddr = folio_address(token->eb->folios[idx + 1]); \ token->offset = (idx + 1) << unit_shift; \ memcpy(lebytes + part, token->kaddr, size - part); \ return get_unaligned_le##bits(lebytes); \ } \ u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ const void *ptr, unsigned long off) \ { \ const unsigned long member_offset = (unsigned long)ptr + off; \ const unsigned long idx = get_eb_folio_index(eb, member_offset);\ const unsigned long oil = get_eb_offset_in_folio(eb, \ member_offset);\ const int unit_size = eb->folio_size; \ char *kaddr = folio_address(eb->folios[idx]); \ const int size = sizeof(u##bits); \ const int part = unit_size - oil; \ u8 lebytes[sizeof(u##bits)]; \ \ ASSERT(check_setget_bounds(eb, ptr, off, size)); \ if (INLINE_EXTENT_BUFFER_PAGES == 1 || oil + size <= unit_size) \ return get_unaligned_le##bits(kaddr + oil); \ \ memcpy(lebytes, kaddr + oil, part); \ kaddr = folio_address(eb->folios[idx + 1]); \ memcpy(lebytes + part, kaddr, size - part); \ return get_unaligned_le##bits(lebytes); \ } \ void btrfs_set_token_##bits(struct btrfs_map_token *token, \ const void *ptr, unsigned long off, \ u##bits val) \ { \ const unsigned long member_offset = (unsigned long)ptr + off; \ const unsigned long idx = get_eb_folio_index(token->eb, member_offset); \ const unsigned long oil = get_eb_offset_in_folio(token->eb, \ member_offset);\ const int unit_size = token->eb->folio_size; \ const int unit_shift = token->eb->folio_shift; \ const int size = sizeof(u##bits); \ u8 lebytes[sizeof(u##bits)]; \ const int part = unit_size - oil; \ \ ASSERT(token); \ ASSERT(token->kaddr); \ ASSERT(check_setget_bounds(token->eb, ptr, off, size)); \ if (token->offset <= member_offset && \ member_offset + size <= token->offset + unit_size) { \ put_unaligned_le##bits(val, token->kaddr + oil); \ return; \ } \ token->kaddr = folio_address(token->eb->folios[idx]); \ token->offset = idx << unit_shift; \ if (INLINE_EXTENT_BUFFER_PAGES == 1 || \ oil + size <= unit_size) { \ put_unaligned_le##bits(val, token->kaddr + oil); \ return; \ } \ put_unaligned_le##bits(val, lebytes); \ memcpy(token->kaddr + oil, lebytes, part); \ token->kaddr = folio_address(token->eb->folios[idx + 1]); \ token->offset = (idx + 1) << unit_shift; \ memcpy(token->kaddr, lebytes + part, size - part); \ } \ void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \ unsigned long off, u##bits val) \ { \ const unsigned long member_offset = (unsigned long)ptr + off; \ const unsigned long idx = get_eb_folio_index(eb, member_offset);\ const unsigned long oil = get_eb_offset_in_folio(eb, \ member_offset);\ const int unit_size = eb->folio_size; \ char *kaddr = folio_address(eb->folios[idx]); \ const int size = sizeof(u##bits); \ const int part = unit_size - oil; \ u8 lebytes[sizeof(u##bits)]; \ \ ASSERT(check_setget_bounds(eb, ptr, off, size)); \ if (INLINE_EXTENT_BUFFER_PAGES == 1 || \ oil + size <= unit_size) { \ put_unaligned_le##bits(val, kaddr + oil); \ return; \ } \ \ put_unaligned_le##bits(val, lebytes); \ memcpy(kaddr + oil, lebytes, part); \ kaddr = folio_address(eb->folios[idx + 1]); \ memcpy(kaddr, lebytes + part, size - part); \ } DEFINE_BTRFS_SETGET_BITS(8) DEFINE_BTRFS_SETGET_BITS(16) DEFINE_BTRFS_SETGET_BITS(32) DEFINE_BTRFS_SETGET_BITS(64) void btrfs_node_key(const struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { unsigned long ptr = btrfs_node_key_ptr_offset(eb, nr); read_eb_member(eb, (struct btrfs_key_ptr *)ptr, struct btrfs_key_ptr, key, disk_key); } |
| 1 1 1 7 8 1 1 15 1 2 10 1 1 1 3 1 25 4 1 24 22 1 1 1 1 5 8 1 1 9 3 5 1 1 1 1 2 21 1 4 1 11 6 9 9 1 1 1 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 | // SPDX-License-Identifier: GPL-2.0-only /* * Landlock LSM - System call implementations and user space interfaces * * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2018-2020 ANSSI */ #include <asm/current.h> #include <linux/anon_inodes.h> #include <linux/build_bug.h> #include <linux/capability.h> #include <linux/compiler_types.h> #include <linux/dcache.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/limits.h> #include <linux/mount.h> #include <linux/path.h> #include <linux/sched.h> #include <linux/security.h> #include <linux/stddef.h> #include <linux/syscalls.h> #include <linux/types.h> #include <linux/uaccess.h> #include <uapi/linux/landlock.h> #include "cred.h" #include "fs.h" #include "limits.h" #include "net.h" #include "ruleset.h" #include "setup.h" static bool is_initialized(void) { if (likely(landlock_initialized)) return true; pr_warn_once( "Disabled but requested by user space. " "You should enable Landlock at boot time: " "https://docs.kernel.org/userspace-api/landlock.html#boot-time-configuration\n"); return false; } /** * copy_min_struct_from_user - Safe future-proof argument copying * * Extend copy_struct_from_user() to check for consistent user buffer. * * @dst: Kernel space pointer or NULL. * @ksize: Actual size of the data pointed to by @dst. * @ksize_min: Minimal required size to be copied. * @src: User space pointer or NULL. * @usize: (Alleged) size of the data pointed to by @src. */ static __always_inline int copy_min_struct_from_user(void *const dst, const size_t ksize, const size_t ksize_min, const void __user *const src, const size_t usize) { /* Checks buffer inconsistencies. */ BUILD_BUG_ON(!dst); if (!src) return -EFAULT; /* Checks size ranges. */ BUILD_BUG_ON(ksize <= 0); BUILD_BUG_ON(ksize < ksize_min); if (usize < ksize_min) return -EINVAL; if (usize > PAGE_SIZE) return -E2BIG; /* Copies user buffer and fills with zeros. */ return copy_struct_from_user(dst, ksize, src, usize); } /* * This function only contains arithmetic operations with constants, leading to * BUILD_BUG_ON(). The related code is evaluated and checked at build time, * but it is then ignored thanks to compiler optimizations. */ static void build_check_abi(void) { struct landlock_ruleset_attr ruleset_attr; struct landlock_path_beneath_attr path_beneath_attr; struct landlock_net_port_attr net_port_attr; size_t ruleset_size, path_beneath_size, net_port_size; /* * For each user space ABI structures, first checks that there is no * hole in them, then checks that all architectures have the same * struct size. */ ruleset_size = sizeof(ruleset_attr.handled_access_fs); ruleset_size += sizeof(ruleset_attr.handled_access_net); BUILD_BUG_ON(sizeof(ruleset_attr) != ruleset_size); BUILD_BUG_ON(sizeof(ruleset_attr) != 16); path_beneath_size = sizeof(path_beneath_attr.allowed_access); path_beneath_size += sizeof(path_beneath_attr.parent_fd); BUILD_BUG_ON(sizeof(path_beneath_attr) != path_beneath_size); BUILD_BUG_ON(sizeof(path_beneath_attr) != 12); net_port_size = sizeof(net_port_attr.allowed_access); net_port_size += sizeof(net_port_attr.port); BUILD_BUG_ON(sizeof(net_port_attr) != net_port_size); BUILD_BUG_ON(sizeof(net_port_attr) != 16); } /* Ruleset handling */ static int fop_ruleset_release(struct inode *const inode, struct file *const filp) { struct landlock_ruleset *ruleset = filp->private_data; landlock_put_ruleset(ruleset); return 0; } static ssize_t fop_dummy_read(struct file *const filp, char __user *const buf, const size_t size, loff_t *const ppos) { /* Dummy handler to enable FMODE_CAN_READ. */ return -EINVAL; } static ssize_t fop_dummy_write(struct file *const filp, const char __user *const buf, const size_t size, loff_t *const ppos) { /* Dummy handler to enable FMODE_CAN_WRITE. */ return -EINVAL; } /* * A ruleset file descriptor enables to build a ruleset by adding (i.e. * writing) rule after rule, without relying on the task's context. This * reentrant design is also used in a read way to enforce the ruleset on the * current task. */ static const struct file_operations ruleset_fops = { .release = fop_ruleset_release, .read = fop_dummy_read, .write = fop_dummy_write, }; #define LANDLOCK_ABI_VERSION 5 /** * sys_landlock_create_ruleset - Create a new ruleset * * @attr: Pointer to a &struct landlock_ruleset_attr identifying the scope of * the new ruleset. * @size: Size of the pointed &struct landlock_ruleset_attr (needed for * backward and forward compatibility). * @flags: Supported value: %LANDLOCK_CREATE_RULESET_VERSION. * * This system call enables to create a new Landlock ruleset, and returns the * related file descriptor on success. * * If @flags is %LANDLOCK_CREATE_RULESET_VERSION and @attr is NULL and @size is * 0, then the returned value is the highest supported Landlock ABI version * (starting at 1). * * Possible returned errors are: * * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; * - %EINVAL: unknown @flags, or unknown access, or too small @size; * - %E2BIG or %EFAULT: @attr or @size inconsistencies; * - %ENOMSG: empty &landlock_ruleset_attr.handled_access_fs. */ SYSCALL_DEFINE3(landlock_create_ruleset, const struct landlock_ruleset_attr __user *const, attr, const size_t, size, const __u32, flags) { struct landlock_ruleset_attr ruleset_attr; struct landlock_ruleset *ruleset; int err, ruleset_fd; /* Build-time checks. */ build_check_abi(); if (!is_initialized()) return -EOPNOTSUPP; if (flags) { if ((flags == LANDLOCK_CREATE_RULESET_VERSION) && !attr && !size) return LANDLOCK_ABI_VERSION; return -EINVAL; } /* Copies raw user space buffer. */ err = copy_min_struct_from_user(&ruleset_attr, sizeof(ruleset_attr), offsetofend(typeof(ruleset_attr), handled_access_fs), attr, size); if (err) return err; /* Checks content (and 32-bits cast). */ if ((ruleset_attr.handled_access_fs | LANDLOCK_MASK_ACCESS_FS) != LANDLOCK_MASK_ACCESS_FS) return -EINVAL; /* Checks network content (and 32-bits cast). */ if ((ruleset_attr.handled_access_net | LANDLOCK_MASK_ACCESS_NET) != LANDLOCK_MASK_ACCESS_NET) return -EINVAL; /* Checks arguments and transforms to kernel struct. */ ruleset = landlock_create_ruleset(ruleset_attr.handled_access_fs, ruleset_attr.handled_access_net); if (IS_ERR(ruleset)) return PTR_ERR(ruleset); /* Creates anonymous FD referring to the ruleset. */ ruleset_fd = anon_inode_getfd("[landlock-ruleset]", &ruleset_fops, ruleset, O_RDWR | O_CLOEXEC); if (ruleset_fd < 0) landlock_put_ruleset(ruleset); return ruleset_fd; } /* * Returns an owned ruleset from a FD. It is thus needed to call * landlock_put_ruleset() on the return value. */ static struct landlock_ruleset *get_ruleset_from_fd(const int fd, const fmode_t mode) { struct fd ruleset_f; struct landlock_ruleset *ruleset; ruleset_f = fdget(fd); if (!ruleset_f.file) return ERR_PTR(-EBADF); /* Checks FD type and access right. */ if (ruleset_f.file->f_op != &ruleset_fops) { ruleset = ERR_PTR(-EBADFD); goto out_fdput; } if (!(ruleset_f.file->f_mode & mode)) { ruleset = ERR_PTR(-EPERM); goto out_fdput; } ruleset = ruleset_f.file->private_data; if (WARN_ON_ONCE(ruleset->num_layers != 1)) { ruleset = ERR_PTR(-EINVAL); goto out_fdput; } landlock_get_ruleset(ruleset); out_fdput: fdput(ruleset_f); return ruleset; } /* Path handling */ /* * @path: Must call put_path(@path) after the call if it succeeded. */ static int get_path_from_fd(const s32 fd, struct path *const path) { struct fd f; int err = 0; BUILD_BUG_ON(!__same_type( fd, ((struct landlock_path_beneath_attr *)NULL)->parent_fd)); /* Handles O_PATH. */ f = fdget_raw(fd); if (!f.file) return -EBADF; /* * Forbids ruleset FDs, internal filesystems (e.g. nsfs), including * pseudo filesystems that will never be mountable (e.g. sockfs, * pipefs). */ if ((f.file->f_op == &ruleset_fops) || (f.file->f_path.mnt->mnt_flags & MNT_INTERNAL) || (f.file->f_path.dentry->d_sb->s_flags & SB_NOUSER) || d_is_negative(f.file->f_path.dentry) || IS_PRIVATE(d_backing_inode(f.file->f_path.dentry))) { err = -EBADFD; goto out_fdput; } *path = f.file->f_path; path_get(path); out_fdput: fdput(f); return err; } static int add_rule_path_beneath(struct landlock_ruleset *const ruleset, const void __user *const rule_attr) { struct landlock_path_beneath_attr path_beneath_attr; struct path path; int res, err; access_mask_t mask; /* Copies raw user space buffer. */ res = copy_from_user(&path_beneath_attr, rule_attr, sizeof(path_beneath_attr)); if (res) return -EFAULT; /* * Informs about useless rule: empty allowed_access (i.e. deny rules) * are ignored in path walks. */ if (!path_beneath_attr.allowed_access) return -ENOMSG; /* Checks that allowed_access matches the @ruleset constraints. */ mask = landlock_get_raw_fs_access_mask(ruleset, 0); if ((path_beneath_attr.allowed_access | mask) != mask) return -EINVAL; /* Gets and checks the new rule. */ err = get_path_from_fd(path_beneath_attr.parent_fd, &path); if (err) return err; /* Imports the new rule. */ err = landlock_append_fs_rule(ruleset, &path, path_beneath_attr.allowed_access); path_put(&path); return err; } static int add_rule_net_port(struct landlock_ruleset *ruleset, const void __user *const rule_attr) { struct landlock_net_port_attr net_port_attr; int res; access_mask_t mask; /* Copies raw user space buffer. */ res = copy_from_user(&net_port_attr, rule_attr, sizeof(net_port_attr)); if (res) return -EFAULT; /* * Informs about useless rule: empty allowed_access (i.e. deny rules) * are ignored by network actions. */ if (!net_port_attr.allowed_access) return -ENOMSG; /* Checks that allowed_access matches the @ruleset constraints. */ mask = landlock_get_net_access_mask(ruleset, 0); if ((net_port_attr.allowed_access | mask) != mask) return -EINVAL; /* Denies inserting a rule with port greater than 65535. */ if (net_port_attr.port > U16_MAX) return -EINVAL; /* Imports the new rule. */ return landlock_append_net_rule(ruleset, net_port_attr.port, net_port_attr.allowed_access); } /** * sys_landlock_add_rule - Add a new rule to a ruleset * * @ruleset_fd: File descriptor tied to the ruleset that should be extended * with the new rule. * @rule_type: Identify the structure type pointed to by @rule_attr: * %LANDLOCK_RULE_PATH_BENEATH or %LANDLOCK_RULE_NET_PORT. * @rule_attr: Pointer to a rule (matching the @rule_type). * @flags: Must be 0. * * This system call enables to define a new rule and add it to an existing * ruleset. * * Possible returned errors are: * * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; * - %EAFNOSUPPORT: @rule_type is %LANDLOCK_RULE_NET_PORT but TCP/IP is not * supported by the running kernel; * - %EINVAL: @flags is not 0; * - %EINVAL: The rule accesses are inconsistent (i.e. * &landlock_path_beneath_attr.allowed_access or * &landlock_net_port_attr.allowed_access is not a subset of the ruleset * handled accesses) * - %EINVAL: &landlock_net_port_attr.port is greater than 65535; * - %ENOMSG: Empty accesses (e.g. &landlock_path_beneath_attr.allowed_access is * 0); * - %EBADF: @ruleset_fd is not a file descriptor for the current thread, or a * member of @rule_attr is not a file descriptor as expected; * - %EBADFD: @ruleset_fd is not a ruleset file descriptor, or a member of * @rule_attr is not the expected file descriptor type; * - %EPERM: @ruleset_fd has no write access to the underlying ruleset; * - %EFAULT: @rule_attr was not a valid address. */ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, const enum landlock_rule_type, rule_type, const void __user *const, rule_attr, const __u32, flags) { struct landlock_ruleset *ruleset; int err; if (!is_initialized()) return -EOPNOTSUPP; /* No flag for now. */ if (flags) return -EINVAL; /* Gets and checks the ruleset. */ ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_WRITE); if (IS_ERR(ruleset)) return PTR_ERR(ruleset); switch (rule_type) { case LANDLOCK_RULE_PATH_BENEATH: err = add_rule_path_beneath(ruleset, rule_attr); break; case LANDLOCK_RULE_NET_PORT: err = add_rule_net_port(ruleset, rule_attr); break; default: err = -EINVAL; break; } landlock_put_ruleset(ruleset); return err; } /* Enforcement */ /** * sys_landlock_restrict_self - Enforce a ruleset on the calling thread * * @ruleset_fd: File descriptor tied to the ruleset to merge with the target. * @flags: Must be 0. * * This system call enables to enforce a Landlock ruleset on the current * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its * namespace or is running with no_new_privs. This avoids scenarios where * unprivileged tasks can affect the behavior of privileged children. * * Possible returned errors are: * * - %EOPNOTSUPP: Landlock is supported by the kernel but disabled at boot time; * - %EINVAL: @flags is not 0. * - %EBADF: @ruleset_fd is not a file descriptor for the current thread; * - %EBADFD: @ruleset_fd is not a ruleset file descriptor; * - %EPERM: @ruleset_fd has no read access to the underlying ruleset, or the * current thread is not running with no_new_privs, or it doesn't have * %CAP_SYS_ADMIN in its namespace. * - %E2BIG: The maximum number of stacked rulesets is reached for the current * thread. */ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, flags) { struct landlock_ruleset *new_dom, *ruleset; struct cred *new_cred; struct landlock_cred_security *new_llcred; int err; if (!is_initialized()) return -EOPNOTSUPP; /* * Similar checks as for seccomp(2), except that an -EPERM may be * returned. */ if (!task_no_new_privs(current) && !ns_capable_noaudit(current_user_ns(), CAP_SYS_ADMIN)) return -EPERM; /* No flag for now. */ if (flags) return -EINVAL; /* Gets and checks the ruleset. */ ruleset = get_ruleset_from_fd(ruleset_fd, FMODE_CAN_READ); if (IS_ERR(ruleset)) return PTR_ERR(ruleset); /* Prepares new credentials. */ new_cred = prepare_creds(); if (!new_cred) { err = -ENOMEM; goto out_put_ruleset; } new_llcred = landlock_cred(new_cred); /* * There is no possible race condition while copying and manipulating * the current credentials because they are dedicated per thread. */ new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset); if (IS_ERR(new_dom)) { err = PTR_ERR(new_dom); goto out_put_creds; } /* Replaces the old (prepared) domain. */ landlock_put_ruleset(new_llcred->domain); new_llcred->domain = new_dom; landlock_put_ruleset(ruleset); return commit_creds(new_cred); out_put_creds: abort_creds(new_cred); out_put_ruleset: landlock_put_ruleset(ruleset); return err; } |
| 1 5 3 69 69 65 1 5 3 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 | /* * Copyright (C) 2015 Red Hat, Inc. * All Rights Reserved. * * Authors: * Dave Airlie * Alon Levy * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ #include <drm/drm_atomic_helper.h> #include <drm/drm_damage_helper.h> #include <drm/drm_edid.h> #include <drm/drm_fourcc.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_probe_helper.h> #include <drm/drm_simple_kms_helper.h> #include "virtgpu_drv.h" #define XRES_MIN 32 #define YRES_MIN 32 #define XRES_DEF 1024 #define YRES_DEF 768 #define XRES_MAX 8192 #define YRES_MAX 8192 #define drm_connector_to_virtio_gpu_output(x) \ container_of(x, struct virtio_gpu_output, conn) static const struct drm_crtc_funcs virtio_gpu_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .destroy = drm_crtc_cleanup, .page_flip = drm_atomic_helper_page_flip, .reset = drm_atomic_helper_crtc_reset, .atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state, .atomic_destroy_state = drm_atomic_helper_crtc_destroy_state, }; static const struct drm_framebuffer_funcs virtio_gpu_fb_funcs = { .create_handle = drm_gem_fb_create_handle, .destroy = drm_gem_fb_destroy, .dirty = drm_atomic_helper_dirtyfb, }; static int virtio_gpu_framebuffer_init(struct drm_device *dev, struct virtio_gpu_framebuffer *vgfb, const struct drm_mode_fb_cmd2 *mode_cmd, struct drm_gem_object *obj) { int ret; vgfb->base.obj[0] = obj; drm_helper_mode_fill_fb_struct(dev, &vgfb->base, mode_cmd); ret = drm_framebuffer_init(dev, &vgfb->base, &virtio_gpu_fb_funcs); if (ret) { vgfb->base.obj[0] = NULL; return ret; } return 0; } static void virtio_gpu_crtc_mode_set_nofb(struct drm_crtc *crtc) { struct drm_device *dev = crtc->dev; struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(crtc); virtio_gpu_cmd_set_scanout(vgdev, output->index, 0, crtc->mode.hdisplay, crtc->mode.vdisplay, 0, 0); virtio_gpu_notify(vgdev); } static void virtio_gpu_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { } static void virtio_gpu_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_device *dev = crtc->dev; struct virtio_gpu_device *vgdev = dev->dev_private; struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(crtc); virtio_gpu_cmd_set_scanout(vgdev, output->index, 0, 0, 0, 0, 0); virtio_gpu_notify(vgdev); } static int virtio_gpu_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { return 0; } static void virtio_gpu_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(crtc); /* * virtio-gpu can't do modeset and plane update operations * independent from each other. So the actual modeset happens * in the plane update callback, and here we just check * whenever we must force the modeset. */ if (drm_atomic_crtc_needs_modeset(crtc_state)) { output->needs_modeset = true; } } static const struct drm_crtc_helper_funcs virtio_gpu_crtc_helper_funcs = { .mode_set_nofb = virtio_gpu_crtc_mode_set_nofb, .atomic_check = virtio_gpu_crtc_atomic_check, .atomic_flush = virtio_gpu_crtc_atomic_flush, .atomic_enable = virtio_gpu_crtc_atomic_enable, .atomic_disable = virtio_gpu_crtc_atomic_disable, }; static void virtio_gpu_enc_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { } static void virtio_gpu_enc_enable(struct drm_encoder *encoder) { } static void virtio_gpu_enc_disable(struct drm_encoder *encoder) { } static int virtio_gpu_conn_get_modes(struct drm_connector *connector) { struct virtio_gpu_output *output = drm_connector_to_virtio_gpu_output(connector); struct drm_display_mode *mode = NULL; int count, width, height; count = drm_edid_connector_add_modes(connector); if (count) return count; width = le32_to_cpu(output->info.r.width); height = le32_to_cpu(output->info.r.height); count = drm_add_modes_noedid(connector, XRES_MAX, YRES_MAX); if (width == 0 || height == 0) { drm_set_preferred_mode(connector, XRES_DEF, YRES_DEF); } else { DRM_DEBUG("add mode: %dx%d\n", width, height); mode = drm_cvt_mode(connector->dev, width, height, 60, false, false, false); if (!mode) return count; mode->type |= DRM_MODE_TYPE_PREFERRED; drm_mode_probed_add(connector, mode); count++; } return count; } static enum drm_mode_status virtio_gpu_conn_mode_valid(struct drm_connector *connector, struct drm_display_mode *mode) { struct virtio_gpu_output *output = drm_connector_to_virtio_gpu_output(connector); int width, height; width = le32_to_cpu(output->info.r.width); height = le32_to_cpu(output->info.r.height); if (!(mode->type & DRM_MODE_TYPE_PREFERRED)) return MODE_OK; if (mode->hdisplay == XRES_DEF && mode->vdisplay == YRES_DEF) return MODE_OK; if (mode->hdisplay <= width && mode->hdisplay >= width - 16 && mode->vdisplay <= height && mode->vdisplay >= height - 16) return MODE_OK; DRM_DEBUG("del mode: %dx%d\n", mode->hdisplay, mode->vdisplay); return MODE_BAD; } static const struct drm_encoder_helper_funcs virtio_gpu_enc_helper_funcs = { .mode_set = virtio_gpu_enc_mode_set, .enable = virtio_gpu_enc_enable, .disable = virtio_gpu_enc_disable, }; static const struct drm_connector_helper_funcs virtio_gpu_conn_helper_funcs = { .get_modes = virtio_gpu_conn_get_modes, .mode_valid = virtio_gpu_conn_mode_valid, }; static enum drm_connector_status virtio_gpu_conn_detect( struct drm_connector *connector, bool force) { struct virtio_gpu_output *output = drm_connector_to_virtio_gpu_output(connector); if (output->info.enabled) return connector_status_connected; else return connector_status_disconnected; } static void virtio_gpu_conn_destroy(struct drm_connector *connector) { drm_connector_unregister(connector); drm_connector_cleanup(connector); } static const struct drm_connector_funcs virtio_gpu_connector_funcs = { .detect = virtio_gpu_conn_detect, .fill_modes = drm_helper_probe_single_connector_modes, .destroy = virtio_gpu_conn_destroy, .reset = drm_atomic_helper_connector_reset, .atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state, .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, }; static int vgdev_output_init(struct virtio_gpu_device *vgdev, int index) { struct drm_device *dev = vgdev->ddev; struct virtio_gpu_output *output = vgdev->outputs + index; struct drm_connector *connector = &output->conn; struct drm_encoder *encoder = &output->enc; struct drm_crtc *crtc = &output->crtc; struct drm_plane *primary, *cursor; output->index = index; if (index == 0) { output->info.enabled = cpu_to_le32(true); output->info.r.width = cpu_to_le32(XRES_DEF); output->info.r.height = cpu_to_le32(YRES_DEF); } primary = virtio_gpu_plane_init(vgdev, DRM_PLANE_TYPE_PRIMARY, index); if (IS_ERR(primary)) return PTR_ERR(primary); cursor = virtio_gpu_plane_init(vgdev, DRM_PLANE_TYPE_CURSOR, index); if (IS_ERR(cursor)) return PTR_ERR(cursor); drm_crtc_init_with_planes(dev, crtc, primary, cursor, &virtio_gpu_crtc_funcs, NULL); drm_crtc_helper_add(crtc, &virtio_gpu_crtc_helper_funcs); drm_connector_init(dev, connector, &virtio_gpu_connector_funcs, DRM_MODE_CONNECTOR_VIRTUAL); drm_connector_helper_add(connector, &virtio_gpu_conn_helper_funcs); if (vgdev->has_edid) drm_connector_attach_edid_property(connector); drm_simple_encoder_init(dev, encoder, DRM_MODE_ENCODER_VIRTUAL); drm_encoder_helper_add(encoder, &virtio_gpu_enc_helper_funcs); encoder->possible_crtcs = 1 << index; drm_connector_attach_encoder(connector, encoder); drm_connector_register(connector); return 0; } static struct drm_framebuffer * virtio_gpu_user_framebuffer_create(struct drm_device *dev, struct drm_file *file_priv, const struct drm_mode_fb_cmd2 *mode_cmd) { struct drm_gem_object *obj = NULL; struct virtio_gpu_framebuffer *virtio_gpu_fb; int ret; if (mode_cmd->pixel_format != DRM_FORMAT_HOST_XRGB8888 && mode_cmd->pixel_format != DRM_FORMAT_HOST_ARGB8888) return ERR_PTR(-ENOENT); /* lookup object associated with res handle */ obj = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]); if (!obj) return ERR_PTR(-EINVAL); virtio_gpu_fb = kzalloc(sizeof(*virtio_gpu_fb), GFP_KERNEL); if (virtio_gpu_fb == NULL) { drm_gem_object_put(obj); return ERR_PTR(-ENOMEM); } ret = virtio_gpu_framebuffer_init(dev, virtio_gpu_fb, mode_cmd, obj); if (ret) { kfree(virtio_gpu_fb); drm_gem_object_put(obj); return NULL; } return &virtio_gpu_fb->base; } static const struct drm_mode_config_funcs virtio_gpu_mode_funcs = { .fb_create = virtio_gpu_user_framebuffer_create, .atomic_check = drm_atomic_helper_check, .atomic_commit = drm_atomic_helper_commit, }; int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev) { int i, ret; if (!vgdev->num_scanouts) return 0; ret = drmm_mode_config_init(vgdev->ddev); if (ret) return ret; vgdev->ddev->mode_config.quirk_addfb_prefer_host_byte_order = true; vgdev->ddev->mode_config.funcs = &virtio_gpu_mode_funcs; /* modes will be validated against the framebuffer size */ vgdev->ddev->mode_config.min_width = XRES_MIN; vgdev->ddev->mode_config.min_height = YRES_MIN; vgdev->ddev->mode_config.max_width = XRES_MAX; vgdev->ddev->mode_config.max_height = YRES_MAX; vgdev->ddev->mode_config.fb_modifiers_not_supported = true; for (i = 0 ; i < vgdev->num_scanouts; ++i) vgdev_output_init(vgdev, i); drm_mode_config_reset(vgdev->ddev); return 0; } void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev) { int i; if (!vgdev->num_scanouts) return; for (i = 0 ; i < vgdev->num_scanouts; ++i) drm_edid_free(vgdev->outputs[i].drm_edid); } |
| 946 98 19 19 53 21 21 21 21 37 37 38 34 37 6 6 6 6 6 802 799 5 5 5 799 796 5 5 222 212 21 21 21 21 19 19 19 18 19 19 19 19 19 18 5 21 985 991 19 19 19 19 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 | // SPDX-License-Identifier: GPL-2.0 /* * bus.c - bus driver management * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * Copyright (c) 2007 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2007 Novell Inc. * Copyright (c) 2023 Greg Kroah-Hartman <gregkh@linuxfoundation.org> */ #include <linux/async.h> #include <linux/device/bus.h> #include <linux/device.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/string.h> #include <linux/mutex.h> #include <linux/sysfs.h> #include "base.h" #include "power/power.h" /* /sys/devices/system */ static struct kset *system_kset; /* /sys/bus */ static struct kset *bus_kset; #define to_bus_attr(_attr) container_of(_attr, struct bus_attribute, attr) /* * sysfs bindings for drivers */ #define to_drv_attr(_attr) container_of(_attr, struct driver_attribute, attr) #define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) \ struct driver_attribute driver_attr_##_name = \ __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store) static int __must_check bus_rescan_devices_helper(struct device *dev, void *data); /** * bus_to_subsys - Turn a struct bus_type into a struct subsys_private * * @bus: pointer to the struct bus_type to look up * * The driver core internals needs to work on the subsys_private structure, not * the external struct bus_type pointer. This function walks the list of * registered busses in the system and finds the matching one and returns the * internal struct subsys_private that relates to that bus. * * Note, the reference count of the return value is INCREMENTED if it is not * NULL. A call to subsys_put() must be done when finished with the pointer in * order for it to be properly freed. */ static struct subsys_private *bus_to_subsys(const struct bus_type *bus) { struct subsys_private *sp = NULL; struct kobject *kobj; if (!bus || !bus_kset) return NULL; spin_lock(&bus_kset->list_lock); if (list_empty(&bus_kset->list)) goto done; list_for_each_entry(kobj, &bus_kset->list, entry) { struct kset *kset = container_of(kobj, struct kset, kobj); sp = container_of_const(kset, struct subsys_private, subsys); if (sp->bus == bus) goto done; } sp = NULL; done: sp = subsys_get(sp); spin_unlock(&bus_kset->list_lock); return sp; } static const struct bus_type *bus_get(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); if (sp) return bus; return NULL; } static void bus_put(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); /* two puts are required as the call to bus_to_subsys incremented it again */ subsys_put(sp); subsys_put(sp); } static ssize_t drv_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct driver_attribute *drv_attr = to_drv_attr(attr); struct driver_private *drv_priv = to_driver(kobj); ssize_t ret = -EIO; if (drv_attr->show) ret = drv_attr->show(drv_priv->driver, buf); return ret; } static ssize_t drv_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct driver_attribute *drv_attr = to_drv_attr(attr); struct driver_private *drv_priv = to_driver(kobj); ssize_t ret = -EIO; if (drv_attr->store) ret = drv_attr->store(drv_priv->driver, buf, count); return ret; } static const struct sysfs_ops driver_sysfs_ops = { .show = drv_attr_show, .store = drv_attr_store, }; static void driver_release(struct kobject *kobj) { struct driver_private *drv_priv = to_driver(kobj); pr_debug("driver: '%s': %s\n", kobject_name(kobj), __func__); kfree(drv_priv); } static const struct kobj_type driver_ktype = { .sysfs_ops = &driver_sysfs_ops, .release = driver_release, }; /* * sysfs bindings for buses */ static ssize_t bus_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct bus_attribute *bus_attr = to_bus_attr(attr); struct subsys_private *subsys_priv = to_subsys_private(kobj); ssize_t ret = 0; if (bus_attr->show) ret = bus_attr->show(subsys_priv->bus, buf); return ret; } static ssize_t bus_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct bus_attribute *bus_attr = to_bus_attr(attr); struct subsys_private *subsys_priv = to_subsys_private(kobj); ssize_t ret = 0; if (bus_attr->store) ret = bus_attr->store(subsys_priv->bus, buf, count); return ret; } static const struct sysfs_ops bus_sysfs_ops = { .show = bus_attr_show, .store = bus_attr_store, }; int bus_create_file(const struct bus_type *bus, struct bus_attribute *attr) { struct subsys_private *sp = bus_to_subsys(bus); int error; if (!sp) return -EINVAL; error = sysfs_create_file(&sp->subsys.kobj, &attr->attr); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(bus_create_file); void bus_remove_file(const struct bus_type *bus, struct bus_attribute *attr) { struct subsys_private *sp = bus_to_subsys(bus); if (!sp) return; sysfs_remove_file(&sp->subsys.kobj, &attr->attr); subsys_put(sp); } EXPORT_SYMBOL_GPL(bus_remove_file); static void bus_release(struct kobject *kobj) { struct subsys_private *priv = to_subsys_private(kobj); lockdep_unregister_key(&priv->lock_key); kfree(priv); } static const struct kobj_type bus_ktype = { .sysfs_ops = &bus_sysfs_ops, .release = bus_release, }; static int bus_uevent_filter(const struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); if (ktype == &bus_ktype) return 1; return 0; } static const struct kset_uevent_ops bus_uevent_ops = { .filter = bus_uevent_filter, }; /* Manually detach a device from its associated driver. */ static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count) { const struct bus_type *bus = bus_get(drv->bus); struct device *dev; int err = -ENODEV; dev = bus_find_device_by_name(bus, NULL, buf); if (dev && dev->driver == drv) { device_driver_detach(dev); err = count; } put_device(dev); bus_put(bus); return err; } static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store); /* * Manually attach a device to a driver. * Note: the driver must want to bind to the device, * it is not possible to override the driver's id table. */ static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count) { const struct bus_type *bus = bus_get(drv->bus); struct device *dev; int err = -ENODEV; dev = bus_find_device_by_name(bus, NULL, buf); if (dev && driver_match_device(drv, dev)) { err = device_driver_attach(drv, dev); if (!err) { /* success */ err = count; } } put_device(dev); bus_put(bus); return err; } static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store); static ssize_t drivers_autoprobe_show(const struct bus_type *bus, char *buf) { struct subsys_private *sp = bus_to_subsys(bus); int ret; if (!sp) return -EINVAL; ret = sysfs_emit(buf, "%d\n", sp->drivers_autoprobe); subsys_put(sp); return ret; } static ssize_t drivers_autoprobe_store(const struct bus_type *bus, const char *buf, size_t count) { struct subsys_private *sp = bus_to_subsys(bus); if (!sp) return -EINVAL; if (buf[0] == '0') sp->drivers_autoprobe = 0; else sp->drivers_autoprobe = 1; subsys_put(sp); return count; } static ssize_t drivers_probe_store(const struct bus_type *bus, const char *buf, size_t count) { struct device *dev; int err = -EINVAL; dev = bus_find_device_by_name(bus, NULL, buf); if (!dev) return -ENODEV; if (bus_rescan_devices_helper(dev, NULL) == 0) err = count; put_device(dev); return err; } static struct device *next_device(struct klist_iter *i) { struct klist_node *n = klist_next(i); struct device *dev = NULL; struct device_private *dev_prv; if (n) { dev_prv = to_device_private_bus(n); dev = dev_prv->device; } return dev; } /** * bus_for_each_dev - device iterator. * @bus: bus type. * @start: device to start iterating from. * @data: data for the callback. * @fn: function to be called for each device. * * Iterate over @bus's list of devices, and call @fn for each, * passing it @data. If @start is not NULL, we use that device to * begin iterating from. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. * * NOTE: The device that returns a non-zero value is not retained * in any way, nor is its refcount incremented. If the caller needs * to retain this data, it should do so, and increment the reference * count in the supplied callback. */ int bus_for_each_dev(const struct bus_type *bus, struct device *start, void *data, int (*fn)(struct device *, void *)) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; struct device *dev; int error = 0; if (!sp) return -EINVAL; klist_iter_init_node(&sp->klist_devices, &i, (start ? &start->p->knode_bus : NULL)); while (!error && (dev = next_device(&i))) error = fn(dev, data); klist_iter_exit(&i); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(bus_for_each_dev); /** * bus_find_device - device iterator for locating a particular device. * @bus: bus type * @start: Device to begin with * @data: Data to pass to match function * @match: Callback function to check device * * This is similar to the bus_for_each_dev() function above, but it * returns a reference to a device that is 'found' for later use, as * determined by the @match callback. * * The callback should return 0 if the device doesn't match and non-zero * if it does. If the callback returns non-zero, this function will * return to the caller and not iterate over any more devices. */ struct device *bus_find_device(const struct bus_type *bus, struct device *start, const void *data, int (*match)(struct device *dev, const void *data)) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; struct device *dev; if (!sp) return NULL; klist_iter_init_node(&sp->klist_devices, &i, (start ? &start->p->knode_bus : NULL)); while ((dev = next_device(&i))) if (match(dev, data) && get_device(dev)) break; klist_iter_exit(&i); subsys_put(sp); return dev; } EXPORT_SYMBOL_GPL(bus_find_device); static struct device_driver *next_driver(struct klist_iter *i) { struct klist_node *n = klist_next(i); struct driver_private *drv_priv; if (n) { drv_priv = container_of(n, struct driver_private, knode_bus); return drv_priv->driver; } return NULL; } /** * bus_for_each_drv - driver iterator * @bus: bus we're dealing with. * @start: driver to start iterating on. * @data: data to pass to the callback. * @fn: function to call for each driver. * * This is nearly identical to the device iterator above. * We iterate over each driver that belongs to @bus, and call * @fn for each. If @fn returns anything but 0, we break out * and return it. If @start is not NULL, we use it as the head * of the list. * * NOTE: we don't return the driver that returns a non-zero * value, nor do we leave the reference count incremented for that * driver. If the caller needs to know that info, it must set it * in the callback. It must also be sure to increment the refcount * so it doesn't disappear before returning to the caller. */ int bus_for_each_drv(const struct bus_type *bus, struct device_driver *start, void *data, int (*fn)(struct device_driver *, void *)) { struct subsys_private *sp = bus_to_subsys(bus); struct klist_iter i; struct device_driver *drv; int error = 0; if (!sp) return -EINVAL; klist_iter_init_node(&sp->klist_drivers, &i, start ? &start->p->knode_bus : NULL); while ((drv = next_driver(&i)) && !error) error = fn(drv, data); klist_iter_exit(&i); subsys_put(sp); return error; } EXPORT_SYMBOL_GPL(bus_for_each_drv); /** * bus_add_device - add device to bus * @dev: device being added * * - Add device's bus attributes. * - Create links to device's bus. * - Add the device to its bus's list of devices. */ int bus_add_device(struct device *dev) { struct subsys_private *sp = bus_to_subsys(dev->bus); int error; if (!sp) { /* * This is a normal operation for many devices that do not * have a bus assigned to them, just say that all went * well. */ return 0; } /* * Reference in sp is now incremented and will be dropped when * the device is removed from the bus */ pr_debug("bus: '%s': add device %s\n", sp->bus->name, dev_name(dev)); error = device_add_groups(dev, sp->bus->dev_groups); if (error) goto out_put; error = sysfs_create_link(&sp->devices_kset->kobj, &dev->kobj, dev_name(dev)); if (error) goto out_groups; error = sysfs_create_link(&dev->kobj, &sp->subsys.kobj, "subsystem"); if (error) goto out_subsys; klist_add_tail(&dev->p->knode_bus, &sp->klist_devices); return 0; out_subsys: sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev)); out_groups: device_remove_groups(dev, sp->bus->dev_groups); out_put: subsys_put(sp); return error; } /** * bus_probe_device - probe drivers for a new device * @dev: device to probe * * - Automatically probe for a driver if the bus allows it. */ void bus_probe_device(struct device *dev) { struct subsys_private *sp = bus_to_subsys(dev->bus); struct subsys_interface *sif; if (!sp) return; if (sp->drivers_autoprobe) device_initial_probe(dev); mutex_lock(&sp->mutex); list_for_each_entry(sif, &sp->interfaces, node) if (sif->add_dev) sif->add_dev(dev, sif); mutex_unlock(&sp->mutex); subsys_put(sp); } /** * bus_remove_device - remove device from bus * @dev: device to be removed * * - Remove device from all interfaces. * - Remove symlink from bus' directory. * - Delete device from bus's list. * - Detach from its driver. * - Drop reference taken in bus_add_device(). */ void bus_remove_device(struct device *dev) { struct subsys_private *sp = bus_to_subsys(dev->bus); struct subsys_interface *sif; if (!sp) return; mutex_lock(&sp->mutex); list_for_each_entry(sif, &sp->interfaces, node) if (sif->remove_dev) sif->remove_dev(dev, sif); mutex_unlock(&sp->mutex); sysfs_remove_link(&dev->kobj, "subsystem"); sysfs_remove_link(&sp->devices_kset->kobj, dev_name(dev)); device_remove_groups(dev, dev->bus->dev_groups); if (klist_node_attached(&dev->p->knode_bus)) klist_del(&dev->p->knode_bus); pr_debug("bus: '%s': remove device %s\n", dev->bus->name, dev_name(dev)); device_release_driver(dev); /* * Decrement the reference count twice, once for the bus_to_subsys() * call in the start of this function, and the second one from the * reference increment in bus_add_device() */ subsys_put(sp); subsys_put(sp); } static int __must_check add_bind_files(struct device_driver *drv) { int ret; ret = driver_create_file(drv, &driver_attr_unbind); if (ret == 0) { ret = driver_create_file(drv, &driver_attr_bind); if (ret) driver_remove_file(drv, &driver_attr_unbind); } return ret; } static void remove_bind_files(struct device_driver *drv) { driver_remove_file(drv, &driver_attr_bind); driver_remove_file(drv, &driver_attr_unbind); } static BUS_ATTR_WO(drivers_probe); static BUS_ATTR_RW(drivers_autoprobe); static int add_probe_files(const struct bus_type *bus) { int retval; retval = bus_create_file(bus, &bus_attr_drivers_probe); if (retval) goto out; retval = bus_create_file(bus, &bus_attr_drivers_autoprobe); if (retval) bus_remove_file(bus, &bus_attr_drivers_probe); out: return retval; } static void remove_probe_files(const struct bus_type *bus) { bus_remove_file(bus, &bus_attr_drivers_autoprobe); bus_remove_file(bus, &bus_attr_drivers_probe); } static ssize_t uevent_store(struct device_driver *drv, const char *buf, size_t count) { int rc; rc = kobject_synth_uevent(&drv->p->kobj, buf, count); return rc ? rc : count; } static DRIVER_ATTR_WO(uevent); /** * bus_add_driver - Add a driver to the bus. * @drv: driver. */ int bus_add_driver(struct device_driver *drv) { struct subsys_private *sp = bus_to_subsys(drv->bus); struct driver_private *priv; int error = 0; if (!sp) return -EINVAL; /* * Reference in sp is now incremented and will be dropped when * the driver is removed from the bus */ pr_debug("bus: '%s': add driver %s\n", sp->bus->name, drv->name); priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) { error = -ENOMEM; goto out_put_bus; } klist_init(&priv->klist_devices, NULL, NULL); priv->driver = drv; drv->p = priv; priv->kobj.kset = sp->drivers_kset; error = kobject_init_and_add(&priv->kobj, &driver_ktype, NULL, "%s", drv->name); if (error) goto out_unregister; klist_add_tail(&priv->knode_bus, &sp->klist_drivers); if (sp->drivers_autoprobe) { error = driver_attach(drv); if (error) goto out_del_list; } error = module_add_driver(drv->owner, drv); if (error) { printk(KERN_ERR "%s: failed to create module links for %s\n", __func__, drv->name); goto out_detach; } error = driver_create_file(drv, &driver_attr_uevent); if (error) { printk(KERN_ERR "%s: uevent attr (%s) failed\n", __func__, drv->name); } error = driver_add_groups(drv, sp->bus->drv_groups); if (error) { /* How the hell do we get out of this pickle? Give up */ printk(KERN_ERR "%s: driver_add_groups(%s) failed\n", __func__, drv->name); } if (!drv->suppress_bind_attrs) { error = add_bind_files(drv); if (error) { /* Ditto */ printk(KERN_ERR "%s: add_bind_files(%s) failed\n", __func__, drv->name); } } return 0; out_detach: driver_detach(drv); out_del_list: klist_del(&priv->knode_bus); out_unregister: kobject_put(&priv->kobj); /* drv->p is freed in driver_release() */ drv->p = NULL; out_put_bus: subsys_put(sp); return error; } /** * bus_remove_driver - delete driver from bus's knowledge. * @drv: driver. * * Detach the driver from the devices it controls, and remove * it from its bus's list of drivers. Finally, we drop the reference * to the bus we took in bus_add_driver(). */ void bus_remove_driver(struct device_driver *drv) { struct subsys_private *sp = bus_to_subsys(drv->bus); if (!sp) return; pr_debug("bus: '%s': remove driver %s\n", sp->bus->name, drv->name); if (!drv->suppress_bind_attrs) remove_bind_files(drv); driver_remove_groups(drv, sp->bus->drv_groups); driver_remove_file(drv, &driver_attr_uevent); klist_remove(&drv->p->knode_bus); driver_detach(drv); module_remove_driver(drv); kobject_put(&drv->p->kobj); /* * Decrement the reference count twice, once for the bus_to_subsys() * call in the start of this function, and the second one from the * reference increment in bus_add_driver() */ subsys_put(sp); subsys_put(sp); } /* Helper for bus_rescan_devices's iter */ static int __must_check bus_rescan_devices_helper(struct device *dev, void *data) { int ret = 0; if (!dev->driver) { if (dev->parent && dev->bus->need_parent_lock) device_lock(dev->parent); ret = device_attach(dev); if (dev->parent && dev->bus->need_parent_lock) device_unlock(dev->parent); } return ret < 0 ? ret : 0; } /** * bus_rescan_devices - rescan devices on the bus for possible drivers * @bus: the bus to scan. * * This function will look for devices on the bus with no driver * attached and rescan it against existing drivers to see if it matches * any by calling device_attach() for the unbound devices. */ int bus_rescan_devices(const struct bus_type *bus) { return bus_for_each_dev(bus, NULL, NULL, bus_rescan_devices_helper); } EXPORT_SYMBOL_GPL(bus_rescan_devices); /** * device_reprobe - remove driver for a device and probe for a new driver * @dev: the device to reprobe * * This function detaches the attached driver (if any) for the given * device and restarts the driver probing process. It is intended * to use if probing criteria changed during a devices lifetime and * driver attachment should change accordingly. */ int device_reprobe(struct device *dev) { if (dev->driver) device_driver_detach(dev); return bus_rescan_devices_helper(dev, NULL); } EXPORT_SYMBOL_GPL(device_reprobe); static void klist_devices_get(struct klist_node *n) { struct device_private *dev_prv = to_device_private_bus(n); struct device *dev = dev_prv->device; get_device(dev); } static void klist_devices_put(struct klist_node *n) { struct device_private *dev_prv = to_device_private_bus(n); struct device *dev = dev_prv->device; put_device(dev); } static ssize_t bus_uevent_store(const struct bus_type *bus, const char *buf, size_t count) { struct subsys_private *sp = bus_to_subsys(bus); int ret; if (!sp) return -EINVAL; ret = kobject_synth_uevent(&sp->subsys.kobj, buf, count); subsys_put(sp); if (ret) return ret; return count; } /* * "open code" the old BUS_ATTR() macro here. We want to use BUS_ATTR_WO() * here, but can not use it as earlier in the file we have * DEVICE_ATTR_WO(uevent), which would cause a clash with the with the store * function name. */ static struct bus_attribute bus_attr_uevent = __ATTR(uevent, 0200, NULL, bus_uevent_store); /** * bus_register - register a driver-core subsystem * @bus: bus to register * * Once we have that, we register the bus with the kobject * infrastructure, then register the children subsystems it has: * the devices and drivers that belong to the subsystem. */ int bus_register(const struct bus_type *bus) { int retval; struct subsys_private *priv; struct kobject *bus_kobj; struct lock_class_key *key; priv = kzalloc(sizeof(struct subsys_private), GFP_KERNEL); if (!priv) return -ENOMEM; priv->bus = bus; BLOCKING_INIT_NOTIFIER_HEAD(&priv->bus_notifier); bus_kobj = &priv->subsys.kobj; retval = kobject_set_name(bus_kobj, "%s", bus->name); if (retval) goto out; bus_kobj->kset = bus_kset; bus_kobj->ktype = &bus_ktype; priv->drivers_autoprobe = 1; retval = kset_register(&priv->subsys); if (retval) goto out; retval = bus_create_file(bus, &bus_attr_uevent); if (retval) goto bus_uevent_fail; priv->devices_kset = kset_create_and_add("devices", NULL, bus_kobj); if (!priv->devices_kset) { retval = -ENOMEM; goto bus_devices_fail; } priv->drivers_kset = kset_create_and_add("drivers", NULL, bus_kobj); if (!priv->drivers_kset) { retval = -ENOMEM; goto bus_drivers_fail; } INIT_LIST_HEAD(&priv->interfaces); key = &priv->lock_key; lockdep_register_key(key); __mutex_init(&priv->mutex, "subsys mutex", key); klist_init(&priv->klist_devices, klist_devices_get, klist_devices_put); klist_init(&priv->klist_drivers, NULL, NULL); retval = add_probe_files(bus); if (retval) goto bus_probe_files_fail; retval = sysfs_create_groups(bus_kobj, bus->bus_groups); if (retval) goto bus_groups_fail; pr_debug("bus: '%s': registered\n", bus->name); return 0; bus_groups_fail: remove_probe_files(bus); bus_probe_files_fail: kset_unregister(priv->drivers_kset); bus_drivers_fail: kset_unregister(priv->devices_kset); bus_devices_fail: bus_remove_file(bus, &bus_attr_uevent); bus_uevent_fail: kset_unregister(&priv->subsys); out: kfree(priv); return retval; } EXPORT_SYMBOL_GPL(bus_register); /** * bus_unregister - remove a bus from the system * @bus: bus. * * Unregister the child subsystems and the bus itself. * Finally, we call bus_put() to release the refcount */ void bus_unregister(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct kobject *bus_kobj; if (!sp) return; pr_debug("bus: '%s': unregistering\n", bus->name); if (sp->dev_root) device_unregister(sp->dev_root); bus_kobj = &sp->subsys.kobj; sysfs_remove_groups(bus_kobj, bus->bus_groups); remove_probe_files(bus); bus_remove_file(bus, &bus_attr_uevent); kset_unregister(sp->drivers_kset); kset_unregister(sp->devices_kset); kset_unregister(&sp->subsys); subsys_put(sp); } EXPORT_SYMBOL_GPL(bus_unregister); int bus_register_notifier(const struct bus_type *bus, struct notifier_block *nb) { struct subsys_private *sp = bus_to_subsys(bus); int retval; if (!sp) return -EINVAL; retval = blocking_notifier_chain_register(&sp->bus_notifier, nb); subsys_put(sp); return retval; } EXPORT_SYMBOL_GPL(bus_register_notifier); int bus_unregister_notifier(const struct bus_type *bus, struct notifier_block *nb) { struct subsys_private *sp = bus_to_subsys(bus); int retval; if (!sp) return -EINVAL; retval = blocking_notifier_chain_unregister(&sp->bus_notifier, nb); subsys_put(sp); return retval; } EXPORT_SYMBOL_GPL(bus_unregister_notifier); void bus_notify(struct device *dev, enum bus_notifier_event value) { struct subsys_private *sp = bus_to_subsys(dev->bus); if (!sp) return; blocking_notifier_call_chain(&sp->bus_notifier, value, dev); subsys_put(sp); } struct kset *bus_get_kset(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct kset *kset; if (!sp) return NULL; kset = &sp->subsys; subsys_put(sp); return kset; } EXPORT_SYMBOL_GPL(bus_get_kset); /* * Yes, this forcibly breaks the klist abstraction temporarily. It * just wants to sort the klist, not change reference counts and * take/drop locks rapidly in the process. It does all this while * holding the lock for the list, so objects can't otherwise be * added/removed while we're swizzling. */ static void device_insertion_sort_klist(struct device *a, struct list_head *list, int (*compare)(const struct device *a, const struct device *b)) { struct klist_node *n; struct device_private *dev_prv; struct device *b; list_for_each_entry(n, list, n_node) { dev_prv = to_device_private_bus(n); b = dev_prv->device; if (compare(a, b) <= 0) { list_move_tail(&a->p->knode_bus.n_node, &b->p->knode_bus.n_node); return; } } list_move_tail(&a->p->knode_bus.n_node, list); } void bus_sort_breadthfirst(const struct bus_type *bus, int (*compare)(const struct device *a, const struct device *b)) { struct subsys_private *sp = bus_to_subsys(bus); LIST_HEAD(sorted_devices); struct klist_node *n, *tmp; struct device_private *dev_prv; struct device *dev; struct klist *device_klist; if (!sp) return; device_klist = &sp->klist_devices; spin_lock(&device_klist->k_lock); list_for_each_entry_safe(n, tmp, &device_klist->k_list, n_node) { dev_prv = to_device_private_bus(n); dev = dev_prv->device; device_insertion_sort_klist(dev, &sorted_devices, compare); } list_splice(&sorted_devices, &device_klist->k_list); spin_unlock(&device_klist->k_lock); subsys_put(sp); } EXPORT_SYMBOL_GPL(bus_sort_breadthfirst); struct subsys_dev_iter { struct klist_iter ki; const struct device_type *type; }; /** * subsys_dev_iter_init - initialize subsys device iterator * @iter: subsys iterator to initialize * @sp: the subsys private (i.e. bus) we wanna iterate over * @start: the device to start iterating from, if any * @type: device_type of the devices to iterate over, NULL for all * * Initialize subsys iterator @iter such that it iterates over devices * of @subsys. If @start is set, the list iteration will start there, * otherwise if it is NULL, the iteration starts at the beginning of * the list. */ static void subsys_dev_iter_init(struct subsys_dev_iter *iter, struct subsys_private *sp, struct device *start, const struct device_type *type) { struct klist_node *start_knode = NULL; if (start) start_knode = &start->p->knode_bus; klist_iter_init_node(&sp->klist_devices, &iter->ki, start_knode); iter->type = type; } /** * subsys_dev_iter_next - iterate to the next device * @iter: subsys iterator to proceed * * Proceed @iter to the next device and return it. Returns NULL if * iteration is complete. * * The returned device is referenced and won't be released till * iterator is proceed to the next device or exited. The caller is * free to do whatever it wants to do with the device including * calling back into subsys code. */ static struct device *subsys_dev_iter_next(struct subsys_dev_iter *iter) { struct klist_node *knode; struct device *dev; for (;;) { knode = klist_next(&iter->ki); if (!knode) return NULL; dev = to_device_private_bus(knode)->device; if (!iter->type || iter->type == dev->type) return dev; } } /** * subsys_dev_iter_exit - finish iteration * @iter: subsys iterator to finish * * Finish an iteration. Always call this function after iteration is * complete whether the iteration ran till the end or not. */ static void subsys_dev_iter_exit(struct subsys_dev_iter *iter) { klist_iter_exit(&iter->ki); } int subsys_interface_register(struct subsys_interface *sif) { struct subsys_private *sp; struct subsys_dev_iter iter; struct device *dev; if (!sif || !sif->subsys) return -ENODEV; sp = bus_to_subsys(sif->subsys); if (!sp) return -EINVAL; /* * Reference in sp is now incremented and will be dropped when * the interface is removed from the bus */ mutex_lock(&sp->mutex); list_add_tail(&sif->node, &sp->interfaces); if (sif->add_dev) { subsys_dev_iter_init(&iter, sp, NULL, NULL); while ((dev = subsys_dev_iter_next(&iter))) sif->add_dev(dev, sif); subsys_dev_iter_exit(&iter); } mutex_unlock(&sp->mutex); return 0; } EXPORT_SYMBOL_GPL(subsys_interface_register); void subsys_interface_unregister(struct subsys_interface *sif) { struct subsys_private *sp; struct subsys_dev_iter iter; struct device *dev; if (!sif || !sif->subsys) return; sp = bus_to_subsys(sif->subsys); if (!sp) return; mutex_lock(&sp->mutex); list_del_init(&sif->node); if (sif->remove_dev) { subsys_dev_iter_init(&iter, sp, NULL, NULL); while ((dev = subsys_dev_iter_next(&iter))) sif->remove_dev(dev, sif); subsys_dev_iter_exit(&iter); } mutex_unlock(&sp->mutex); /* * Decrement the reference count twice, once for the bus_to_subsys() * call in the start of this function, and the second one from the * reference increment in subsys_interface_register() */ subsys_put(sp); subsys_put(sp); } EXPORT_SYMBOL_GPL(subsys_interface_unregister); static void system_root_device_release(struct device *dev) { kfree(dev); } static int subsys_register(const struct bus_type *subsys, const struct attribute_group **groups, struct kobject *parent_of_root) { struct subsys_private *sp; struct device *dev; int err; err = bus_register(subsys); if (err < 0) return err; sp = bus_to_subsys(subsys); if (!sp) { err = -EINVAL; goto err_sp; } dev = kzalloc(sizeof(struct device), GFP_KERNEL); if (!dev) { err = -ENOMEM; goto err_dev; } err = dev_set_name(dev, "%s", subsys->name); if (err < 0) goto err_name; dev->kobj.parent = parent_of_root; dev->groups = groups; dev->release = system_root_device_release; err = device_register(dev); if (err < 0) goto err_dev_reg; sp->dev_root = dev; subsys_put(sp); return 0; err_dev_reg: put_device(dev); dev = NULL; err_name: kfree(dev); err_dev: subsys_put(sp); err_sp: bus_unregister(subsys); return err; } /** * subsys_system_register - register a subsystem at /sys/devices/system/ * @subsys: system subsystem * @groups: default attributes for the root device * * All 'system' subsystems have a /sys/devices/system/<name> root device * with the name of the subsystem. The root device can carry subsystem- * wide attributes. All registered devices are below this single root * device and are named after the subsystem with a simple enumeration * number appended. The registered devices are not explicitly named; * only 'id' in the device needs to be set. * * Do not use this interface for anything new, it exists for compatibility * with bad ideas only. New subsystems should use plain subsystems; and * add the subsystem-wide attributes should be added to the subsystem * directory itself and not some create fake root-device placed in * /sys/devices/system/<name>. */ int subsys_system_register(const struct bus_type *subsys, const struct attribute_group **groups) { return subsys_register(subsys, groups, &system_kset->kobj); } EXPORT_SYMBOL_GPL(subsys_system_register); /** * subsys_virtual_register - register a subsystem at /sys/devices/virtual/ * @subsys: virtual subsystem * @groups: default attributes for the root device * * All 'virtual' subsystems have a /sys/devices/system/<name> root device * with the name of the subystem. The root device can carry subsystem-wide * attributes. All registered devices are below this single root device. * There's no restriction on device naming. This is for kernel software * constructs which need sysfs interface. */ int subsys_virtual_register(const struct bus_type *subsys, const struct attribute_group **groups) { struct kobject *virtual_dir; virtual_dir = virtual_device_parent(NULL); if (!virtual_dir) return -ENOMEM; return subsys_register(subsys, groups, virtual_dir); } EXPORT_SYMBOL_GPL(subsys_virtual_register); /** * driver_find - locate driver on a bus by its name. * @name: name of the driver. * @bus: bus to scan for the driver. * * Call kset_find_obj() to iterate over list of drivers on * a bus to find driver by name. Return driver if found. * * This routine provides no locking to prevent the driver it returns * from being unregistered or unloaded while the caller is using it. * The caller is responsible for preventing this. */ struct device_driver *driver_find(const char *name, const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct kobject *k; struct driver_private *priv; if (!sp) return NULL; k = kset_find_obj(sp->drivers_kset, name); subsys_put(sp); if (!k) return NULL; priv = to_driver(k); /* Drop reference added by kset_find_obj() */ kobject_put(k); return priv->driver; } EXPORT_SYMBOL_GPL(driver_find); /* * Warning, the value could go to "removed" instantly after calling this function, so be very * careful when calling it... */ bool bus_is_registered(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); bool is_initialized = false; if (sp) { is_initialized = true; subsys_put(sp); } return is_initialized; } /** * bus_get_dev_root - return a pointer to the "device root" of a bus * @bus: bus to return the device root of. * * If a bus has a "device root" structure, return it, WITH THE REFERENCE * COUNT INCREMENTED. * * Note, when finished with the device, a call to put_device() is required. * * If the device root is not present (or bus is not a valid pointer), NULL * will be returned. */ struct device *bus_get_dev_root(const struct bus_type *bus) { struct subsys_private *sp = bus_to_subsys(bus); struct device *dev_root; if (!sp) return NULL; dev_root = get_device(sp->dev_root); subsys_put(sp); return dev_root; } EXPORT_SYMBOL_GPL(bus_get_dev_root); int __init buses_init(void) { bus_kset = kset_create_and_add("bus", &bus_uevent_ops, NULL); if (!bus_kset) return -ENOMEM; system_kset = kset_create_and_add("system", NULL, &devices_kset->kobj); if (!system_kset) return -ENOMEM; return 0; } |
| 1 4 4 3 3 3 3 2 2 2 2 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Glue Code for assembler optimized version of Blowfish * * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> * * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by: * Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au> */ #include <crypto/algapi.h> #include <crypto/blowfish.h> #include <crypto/internal/skcipher.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> #include "ecb_cbc_helpers.h" /* regular block cipher functions */ asmlinkage void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src); /* 4-way parallel cipher functions */ asmlinkage void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src); asmlinkage void __blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src, bool cbc); static inline void blowfish_dec_ecb_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src) { return __blowfish_dec_blk_4way(ctx, dst, src, false); } static inline void blowfish_dec_cbc_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src) { return __blowfish_dec_blk_4way(ctx, dst, src, true); } static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src); } static void blowfish_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { blowfish_dec_blk(crypto_tfm_ctx(tfm), dst, src); } static int blowfish_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { return blowfish_setkey(&tfm->base, key, keylen); } static int ecb_encrypt(struct skcipher_request *req) { ECB_WALK_START(req, BF_BLOCK_SIZE, -1); ECB_BLOCK(4, blowfish_enc_blk_4way); ECB_BLOCK(1, blowfish_enc_blk); ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { ECB_WALK_START(req, BF_BLOCK_SIZE, -1); ECB_BLOCK(4, blowfish_dec_ecb_4way); ECB_BLOCK(1, blowfish_dec_blk); ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { CBC_WALK_START(req, BF_BLOCK_SIZE, -1); CBC_ENC_BLOCK(blowfish_enc_blk); CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { CBC_WALK_START(req, BF_BLOCK_SIZE, -1); CBC_DEC_BLOCK(4, blowfish_dec_cbc_4way); CBC_DEC_BLOCK(1, blowfish_dec_blk); CBC_WALK_END(); } static struct crypto_alg bf_cipher_alg = { .cra_name = "blowfish", .cra_driver_name = "blowfish-asm", .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = BF_BLOCK_SIZE, .cra_ctxsize = sizeof(struct bf_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = BF_MIN_KEY_SIZE, .cia_max_keysize = BF_MAX_KEY_SIZE, .cia_setkey = blowfish_setkey, .cia_encrypt = blowfish_encrypt, .cia_decrypt = blowfish_decrypt, } } }; static struct skcipher_alg bf_skcipher_algs[] = { { .base.cra_name = "ecb(blowfish)", .base.cra_driver_name = "ecb-blowfish-asm", .base.cra_priority = 300, .base.cra_blocksize = BF_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct bf_ctx), .base.cra_module = THIS_MODULE, .min_keysize = BF_MIN_KEY_SIZE, .max_keysize = BF_MAX_KEY_SIZE, .setkey = blowfish_setkey_skcipher, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { .base.cra_name = "cbc(blowfish)", .base.cra_driver_name = "cbc-blowfish-asm", .base.cra_priority = 300, .base.cra_blocksize = BF_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct bf_ctx), .base.cra_module = THIS_MODULE, .min_keysize = BF_MIN_KEY_SIZE, .max_keysize = BF_MAX_KEY_SIZE, .ivsize = BF_BLOCK_SIZE, .setkey = blowfish_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, }; static bool is_blacklisted_cpu(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return false; if (boot_cpu_data.x86 == 0x0f) { /* * On Pentium 4, blowfish-x86_64 is slower than generic C * implementation because use of 64bit rotates (which are really * slow on P4). Therefore blacklist P4s. */ return true; } return false; } static int force; module_param(force, int, 0); MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init blowfish_init(void) { int err; if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "blowfish-x86_64: performance on this CPU " "would be suboptimal: disabling " "blowfish-x86_64.\n"); return -ENODEV; } err = crypto_register_alg(&bf_cipher_alg); if (err) return err; err = crypto_register_skciphers(bf_skcipher_algs, ARRAY_SIZE(bf_skcipher_algs)); if (err) crypto_unregister_alg(&bf_cipher_alg); return err; } static void __exit blowfish_fini(void) { crypto_unregister_alg(&bf_cipher_alg); crypto_unregister_skciphers(bf_skcipher_algs, ARRAY_SIZE(bf_skcipher_algs)); } module_init(blowfish_init); module_exit(blowfish_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized"); MODULE_ALIAS_CRYPTO("blowfish"); MODULE_ALIAS_CRYPTO("blowfish-asm"); |
| 14 14 216 88 147 25 2 113 124 125 3 2 86 1 1 20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 | /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef __SOUND_PCM_H #define __SOUND_PCM_H /* * Digital Audio (PCM) abstract layer * Copyright (c) by Jaroslav Kysela <perex@perex.cz> * Abramo Bagnara <abramo@alsa-project.org> */ #include <sound/asound.h> #include <sound/memalloc.h> #include <sound/minors.h> #include <linux/poll.h> #include <linux/mm.h> #include <linux/bitops.h> #include <linux/pm_qos.h> #include <linux/refcount.h> #include <linux/uio.h> #define snd_pcm_substream_chip(substream) ((substream)->private_data) #define snd_pcm_chip(pcm) ((pcm)->private_data) #if IS_ENABLED(CONFIG_SND_PCM_OSS) #include <sound/pcm_oss.h> #endif /* * Hardware (lowlevel) section */ struct snd_pcm_hardware { unsigned int info; /* SNDRV_PCM_INFO_* */ u64 formats; /* SNDRV_PCM_FMTBIT_* */ u32 subformats; /* for S32_LE, SNDRV_PCM_SUBFMTBIT_* */ unsigned int rates; /* SNDRV_PCM_RATE_* */ unsigned int rate_min; /* min rate */ unsigned int rate_max; /* max rate */ unsigned int channels_min; /* min channels */ unsigned int channels_max; /* max channels */ size_t buffer_bytes_max; /* max buffer size */ size_t period_bytes_min; /* min period size */ size_t period_bytes_max; /* max period size */ unsigned int periods_min; /* min # of periods */ unsigned int periods_max; /* max # of periods */ size_t fifo_size; /* fifo size in bytes */ }; struct snd_pcm_status64; struct snd_pcm_substream; struct snd_pcm_audio_tstamp_config; /* definitions further down */ struct snd_pcm_audio_tstamp_report; struct snd_pcm_ops { int (*open)(struct snd_pcm_substream *substream); int (*close)(struct snd_pcm_substream *substream); int (*ioctl)(struct snd_pcm_substream * substream, unsigned int cmd, void *arg); int (*hw_params)(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params); int (*hw_free)(struct snd_pcm_substream *substream); int (*prepare)(struct snd_pcm_substream *substream); int (*trigger)(struct snd_pcm_substream *substream, int cmd); int (*sync_stop)(struct snd_pcm_substream *substream); snd_pcm_uframes_t (*pointer)(struct snd_pcm_substream *substream); int (*get_time_info)(struct snd_pcm_substream *substream, struct timespec64 *system_ts, struct timespec64 *audio_ts, struct snd_pcm_audio_tstamp_config *audio_tstamp_config, struct snd_pcm_audio_tstamp_report *audio_tstamp_report); int (*fill_silence)(struct snd_pcm_substream *substream, int channel, unsigned long pos, unsigned long bytes); int (*copy)(struct snd_pcm_substream *substream, int channel, unsigned long pos, struct iov_iter *iter, unsigned long bytes); struct page *(*page)(struct snd_pcm_substream *substream, unsigned long offset); int (*mmap)(struct snd_pcm_substream *substream, struct vm_area_struct *vma); int (*ack)(struct snd_pcm_substream *substream); }; /* * */ #if defined(CONFIG_SND_DYNAMIC_MINORS) #define SNDRV_PCM_DEVICES (SNDRV_OS_MINORS-2) #else #define SNDRV_PCM_DEVICES 8 #endif #define SNDRV_PCM_IOCTL1_RESET 0 /* 1 is absent slot. */ #define SNDRV_PCM_IOCTL1_CHANNEL_INFO 2 /* 3 is absent slot. */ #define SNDRV_PCM_IOCTL1_FIFO_SIZE 4 #define SNDRV_PCM_IOCTL1_SYNC_ID 5 #define SNDRV_PCM_TRIGGER_STOP 0 #define SNDRV_PCM_TRIGGER_START 1 #define SNDRV_PCM_TRIGGER_PAUSE_PUSH 3 #define SNDRV_PCM_TRIGGER_PAUSE_RELEASE 4 #define SNDRV_PCM_TRIGGER_SUSPEND 5 #define SNDRV_PCM_TRIGGER_RESUME 6 #define SNDRV_PCM_TRIGGER_DRAIN 7 #define SNDRV_PCM_POS_XRUN ((snd_pcm_uframes_t)-1) /* If you change this don't forget to change rates[] table in pcm_native.c */ #define SNDRV_PCM_RATE_5512 (1U<<0) /* 5512Hz */ #define SNDRV_PCM_RATE_8000 (1U<<1) /* 8000Hz */ #define SNDRV_PCM_RATE_11025 (1U<<2) /* 11025Hz */ #define SNDRV_PCM_RATE_16000 (1U<<3) /* 16000Hz */ #define SNDRV_PCM_RATE_22050 (1U<<4) /* 22050Hz */ #define SNDRV_PCM_RATE_32000 (1U<<5) /* 32000Hz */ #define SNDRV_PCM_RATE_44100 (1U<<6) /* 44100Hz */ #define SNDRV_PCM_RATE_48000 (1U<<7) /* 48000Hz */ #define SNDRV_PCM_RATE_64000 (1U<<8) /* 64000Hz */ #define SNDRV_PCM_RATE_88200 (1U<<9) /* 88200Hz */ #define SNDRV_PCM_RATE_96000 (1U<<10) /* 96000Hz */ #define SNDRV_PCM_RATE_176400 (1U<<11) /* 176400Hz */ #define SNDRV_PCM_RATE_192000 (1U<<12) /* 192000Hz */ #define SNDRV_PCM_RATE_352800 (1U<<13) /* 352800Hz */ #define SNDRV_PCM_RATE_384000 (1U<<14) /* 384000Hz */ #define SNDRV_PCM_RATE_705600 (1U<<15) /* 705600Hz */ #define SNDRV_PCM_RATE_768000 (1U<<16) /* 768000Hz */ #define SNDRV_PCM_RATE_CONTINUOUS (1U<<30) /* continuous range */ #define SNDRV_PCM_RATE_KNOT (1U<<31) /* supports more non-continuous rates */ #define SNDRV_PCM_RATE_8000_44100 (SNDRV_PCM_RATE_8000|SNDRV_PCM_RATE_11025|\ SNDRV_PCM_RATE_16000|SNDRV_PCM_RATE_22050|\ SNDRV_PCM_RATE_32000|SNDRV_PCM_RATE_44100) #define SNDRV_PCM_RATE_8000_48000 (SNDRV_PCM_RATE_8000_44100|SNDRV_PCM_RATE_48000) #define SNDRV_PCM_RATE_8000_96000 (SNDRV_PCM_RATE_8000_48000|SNDRV_PCM_RATE_64000|\ SNDRV_PCM_RATE_88200|SNDRV_PCM_RATE_96000) #define SNDRV_PCM_RATE_8000_192000 (SNDRV_PCM_RATE_8000_96000|SNDRV_PCM_RATE_176400|\ SNDRV_PCM_RATE_192000) #define SNDRV_PCM_RATE_8000_384000 (SNDRV_PCM_RATE_8000_192000|\ SNDRV_PCM_RATE_352800|\ SNDRV_PCM_RATE_384000) #define SNDRV_PCM_RATE_8000_768000 (SNDRV_PCM_RATE_8000_384000|\ SNDRV_PCM_RATE_705600|\ SNDRV_PCM_RATE_768000) #define _SNDRV_PCM_FMTBIT(fmt) (1ULL << (__force int)SNDRV_PCM_FORMAT_##fmt) #define SNDRV_PCM_FMTBIT_S8 _SNDRV_PCM_FMTBIT(S8) #define SNDRV_PCM_FMTBIT_U8 _SNDRV_PCM_FMTBIT(U8) #define SNDRV_PCM_FMTBIT_S16_LE _SNDRV_PCM_FMTBIT(S16_LE) #define SNDRV_PCM_FMTBIT_S16_BE _SNDRV_PCM_FMTBIT(S16_BE) #define SNDRV_PCM_FMTBIT_U16_LE _SNDRV_PCM_FMTBIT(U16_LE) #define SNDRV_PCM_FMTBIT_U16_BE _SNDRV_PCM_FMTBIT(U16_BE) #define SNDRV_PCM_FMTBIT_S24_LE _SNDRV_PCM_FMTBIT(S24_LE) #define SNDRV_PCM_FMTBIT_S24_BE _SNDRV_PCM_FMTBIT(S24_BE) #define SNDRV_PCM_FMTBIT_U24_LE _SNDRV_PCM_FMTBIT(U24_LE) #define SNDRV_PCM_FMTBIT_U24_BE _SNDRV_PCM_FMTBIT(U24_BE) // For S32/U32 formats, 'msbits' hardware parameter is often used to deliver information about the // available bit count in most significant bit. It's for the case of so-called 'left-justified' or // `right-padding` sample which has less width than 32 bit. #define SNDRV_PCM_FMTBIT_S32_LE _SNDRV_PCM_FMTBIT(S32_LE) #define SNDRV_PCM_FMTBIT_S32_BE _SNDRV_PCM_FMTBIT(S32_BE) #define SNDRV_PCM_FMTBIT_U32_LE _SNDRV_PCM_FMTBIT(U32_LE) #define SNDRV_PCM_FMTBIT_U32_BE _SNDRV_PCM_FMTBIT(U32_BE) #define SNDRV_PCM_FMTBIT_FLOAT_LE _SNDRV_PCM_FMTBIT(FLOAT_LE) #define SNDRV_PCM_FMTBIT_FLOAT_BE _SNDRV_PCM_FMTBIT(FLOAT_BE) #define SNDRV_PCM_FMTBIT_FLOAT64_LE _SNDRV_PCM_FMTBIT(FLOAT64_LE) #define SNDRV_PCM_FMTBIT_FLOAT64_BE _SNDRV_PCM_FMTBIT(FLOAT64_BE) #define SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE _SNDRV_PCM_FMTBIT(IEC958_SUBFRAME_LE) #define SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_BE _SNDRV_PCM_FMTBIT(IEC958_SUBFRAME_BE) #define SNDRV_PCM_FMTBIT_MU_LAW _SNDRV_PCM_FMTBIT(MU_LAW) #define SNDRV_PCM_FMTBIT_A_LAW _SNDRV_PCM_FMTBIT(A_LAW) #define SNDRV_PCM_FMTBIT_IMA_ADPCM _SNDRV_PCM_FMTBIT(IMA_ADPCM) #define SNDRV_PCM_FMTBIT_MPEG _SNDRV_PCM_FMTBIT(MPEG) #define SNDRV_PCM_FMTBIT_GSM _SNDRV_PCM_FMTBIT(GSM) #define SNDRV_PCM_FMTBIT_S20_LE _SNDRV_PCM_FMTBIT(S20_LE) #define SNDRV_PCM_FMTBIT_U20_LE _SNDRV_PCM_FMTBIT(U20_LE) #define SNDRV_PCM_FMTBIT_S20_BE _SNDRV_PCM_FMTBIT(S20_BE) #define SNDRV_PCM_FMTBIT_U20_BE _SNDRV_PCM_FMTBIT(U20_BE) #define SNDRV_PCM_FMTBIT_SPECIAL _SNDRV_PCM_FMTBIT(SPECIAL) #define SNDRV_PCM_FMTBIT_S24_3LE _SNDRV_PCM_FMTBIT(S24_3LE) #define SNDRV_PCM_FMTBIT_U24_3LE _SNDRV_PCM_FMTBIT(U24_3LE) #define SNDRV_PCM_FMTBIT_S24_3BE _SNDRV_PCM_FMTBIT(S24_3BE) #define SNDRV_PCM_FMTBIT_U24_3BE _SNDRV_PCM_FMTBIT(U24_3BE) #define SNDRV_PCM_FMTBIT_S20_3LE _SNDRV_PCM_FMTBIT(S20_3LE) #define SNDRV_PCM_FMTBIT_U20_3LE _SNDRV_PCM_FMTBIT(U20_3LE) #define SNDRV_PCM_FMTBIT_S20_3BE _SNDRV_PCM_FMTBIT(S20_3BE) #define SNDRV_PCM_FMTBIT_U20_3BE _SNDRV_PCM_FMTBIT(U20_3BE) #define SNDRV_PCM_FMTBIT_S18_3LE _SNDRV_PCM_FMTBIT(S18_3LE) #define SNDRV_PCM_FMTBIT_U18_3LE _SNDRV_PCM_FMTBIT(U18_3LE) #define SNDRV_PCM_FMTBIT_S18_3BE _SNDRV_PCM_FMTBIT(S18_3BE) #define SNDRV_PCM_FMTBIT_U18_3BE _SNDRV_PCM_FMTBIT(U18_3BE) #define SNDRV_PCM_FMTBIT_G723_24 _SNDRV_PCM_FMTBIT(G723_24) #define SNDRV_PCM_FMTBIT_G723_24_1B _SNDRV_PCM_FMTBIT(G723_24_1B) #define SNDRV_PCM_FMTBIT_G723_40 _SNDRV_PCM_FMTBIT(G723_40) #define SNDRV_PCM_FMTBIT_G723_40_1B _SNDRV_PCM_FMTBIT(G723_40_1B) #define SNDRV_PCM_FMTBIT_DSD_U8 _SNDRV_PCM_FMTBIT(DSD_U8) #define SNDRV_PCM_FMTBIT_DSD_U16_LE _SNDRV_PCM_FMTBIT(DSD_U16_LE) #define SNDRV_PCM_FMTBIT_DSD_U32_LE _SNDRV_PCM_FMTBIT(DSD_U32_LE) #define SNDRV_PCM_FMTBIT_DSD_U16_BE _SNDRV_PCM_FMTBIT(DSD_U16_BE) #define SNDRV_PCM_FMTBIT_DSD_U32_BE _SNDRV_PCM_FMTBIT(DSD_U32_BE) #ifdef SNDRV_LITTLE_ENDIAN #define SNDRV_PCM_FMTBIT_S16 SNDRV_PCM_FMTBIT_S16_LE #define SNDRV_PCM_FMTBIT_U16 SNDRV_PCM_FMTBIT_U16_LE #define SNDRV_PCM_FMTBIT_S24 SNDRV_PCM_FMTBIT_S24_LE #define SNDRV_PCM_FMTBIT_U24 SNDRV_PCM_FMTBIT_U24_LE #define SNDRV_PCM_FMTBIT_S32 SNDRV_PCM_FMTBIT_S32_LE #define SNDRV_PCM_FMTBIT_U32 SNDRV_PCM_FMTBIT_U32_LE #define SNDRV_PCM_FMTBIT_FLOAT SNDRV_PCM_FMTBIT_FLOAT_LE #define SNDRV_PCM_FMTBIT_FLOAT64 SNDRV_PCM_FMTBIT_FLOAT64_LE #define SNDRV_PCM_FMTBIT_IEC958_SUBFRAME SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_LE #define SNDRV_PCM_FMTBIT_S20 SNDRV_PCM_FMTBIT_S20_LE #define SNDRV_PCM_FMTBIT_U20 SNDRV_PCM_FMTBIT_U20_LE #endif #ifdef SNDRV_BIG_ENDIAN #define SNDRV_PCM_FMTBIT_S16 SNDRV_PCM_FMTBIT_S16_BE #define SNDRV_PCM_FMTBIT_U16 SNDRV_PCM_FMTBIT_U16_BE #define SNDRV_PCM_FMTBIT_S24 SNDRV_PCM_FMTBIT_S24_BE #define SNDRV_PCM_FMTBIT_U24 SNDRV_PCM_FMTBIT_U24_BE #define SNDRV_PCM_FMTBIT_S32 SNDRV_PCM_FMTBIT_S32_BE #define SNDRV_PCM_FMTBIT_U32 SNDRV_PCM_FMTBIT_U32_BE #define SNDRV_PCM_FMTBIT_FLOAT SNDRV_PCM_FMTBIT_FLOAT_BE #define SNDRV_PCM_FMTBIT_FLOAT64 SNDRV_PCM_FMTBIT_FLOAT64_BE #define SNDRV_PCM_FMTBIT_IEC958_SUBFRAME SNDRV_PCM_FMTBIT_IEC958_SUBFRAME_BE #define SNDRV_PCM_FMTBIT_S20 SNDRV_PCM_FMTBIT_S20_BE #define SNDRV_PCM_FMTBIT_U20 SNDRV_PCM_FMTBIT_U20_BE #endif #define _SNDRV_PCM_SUBFMTBIT(fmt) BIT((__force int)SNDRV_PCM_SUBFORMAT_##fmt) #define SNDRV_PCM_SUBFMTBIT_STD _SNDRV_PCM_SUBFMTBIT(STD) #define SNDRV_PCM_SUBFMTBIT_MSBITS_MAX _SNDRV_PCM_SUBFMTBIT(MSBITS_MAX) #define SNDRV_PCM_SUBFMTBIT_MSBITS_20 _SNDRV_PCM_SUBFMTBIT(MSBITS_20) #define SNDRV_PCM_SUBFMTBIT_MSBITS_24 _SNDRV_PCM_SUBFMTBIT(MSBITS_24) struct snd_pcm_file { struct snd_pcm_substream *substream; int no_compat_mmap; unsigned int user_pversion; /* supported protocol version */ }; struct snd_pcm_hw_rule; typedef int (*snd_pcm_hw_rule_func_t)(struct snd_pcm_hw_params *params, struct snd_pcm_hw_rule *rule); struct snd_pcm_hw_rule { unsigned int cond; int var; int deps[5]; snd_pcm_hw_rule_func_t func; void *private; }; struct snd_pcm_hw_constraints { struct snd_mask masks[SNDRV_PCM_HW_PARAM_LAST_MASK - SNDRV_PCM_HW_PARAM_FIRST_MASK + 1]; struct snd_interval intervals[SNDRV_PCM_HW_PARAM_LAST_INTERVAL - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL + 1]; unsigned int rules_num; unsigned int rules_all; struct snd_pcm_hw_rule *rules; }; static inline struct snd_mask *constrs_mask(struct snd_pcm_hw_constraints *constrs, snd_pcm_hw_param_t var) { return &constrs->masks[var - SNDRV_PCM_HW_PARAM_FIRST_MASK]; } static inline struct snd_interval *constrs_interval(struct snd_pcm_hw_constraints *constrs, snd_pcm_hw_param_t var) { return &constrs->intervals[var - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL]; } struct snd_ratnum { unsigned int num; unsigned int den_min, den_max, den_step; }; struct snd_ratden { unsigned int num_min, num_max, num_step; unsigned int den; }; struct snd_pcm_hw_constraint_ratnums { int nrats; const struct snd_ratnum *rats; }; struct snd_pcm_hw_constraint_ratdens { int nrats; const struct snd_ratden *rats; }; struct snd_pcm_hw_constraint_list { const unsigned int *list; unsigned int count; unsigned int mask; }; struct snd_pcm_hw_constraint_ranges { unsigned int count; const struct snd_interval *ranges; unsigned int mask; }; /* * userspace-provided audio timestamp config to kernel, * structure is for internal use only and filled with dedicated unpack routine */ struct snd_pcm_audio_tstamp_config { /* 5 of max 16 bits used */ u32 type_requested:4; u32 report_delay:1; /* add total delay to A/D or D/A */ }; static inline void snd_pcm_unpack_audio_tstamp_config(__u32 data, struct snd_pcm_audio_tstamp_config *config) { config->type_requested = data & 0xF; config->report_delay = (data >> 4) & 1; } /* * kernel-provided audio timestamp report to user-space * structure is for internal use only and read by dedicated pack routine */ struct snd_pcm_audio_tstamp_report { /* 6 of max 16 bits used for bit-fields */ /* for backwards compatibility */ u32 valid:1; /* actual type if hardware could not support requested timestamp */ u32 actual_type:4; /* accuracy represented in ns units */ u32 accuracy_report:1; /* 0 if accuracy unknown, 1 if accuracy field is valid */ u32 accuracy; /* up to 4.29s, will be packed in separate field */ }; static inline void snd_pcm_pack_audio_tstamp_report(__u32 *data, __u32 *accuracy, const struct snd_pcm_audio_tstamp_report *report) { u32 tmp; tmp = report->accuracy_report; tmp <<= 4; tmp |= report->actual_type; tmp <<= 1; tmp |= report->valid; *data &= 0xffff; /* zero-clear MSBs */ *data |= (tmp << 16); *accuracy = report->accuracy; } struct snd_pcm_runtime { /* -- Status -- */ snd_pcm_state_t state; /* stream state */ snd_pcm_state_t suspended_state; /* suspended stream state */ struct snd_pcm_substream *trigger_master; struct timespec64 trigger_tstamp; /* trigger timestamp */ bool trigger_tstamp_latched; /* trigger timestamp latched in low-level driver/hardware */ int overrange; snd_pcm_uframes_t avail_max; snd_pcm_uframes_t hw_ptr_base; /* Position at buffer restart */ snd_pcm_uframes_t hw_ptr_interrupt; /* Position at interrupt time */ unsigned long hw_ptr_jiffies; /* Time when hw_ptr is updated */ unsigned long hw_ptr_buffer_jiffies; /* buffer time in jiffies */ snd_pcm_sframes_t delay; /* extra delay; typically FIFO size */ u64 hw_ptr_wrap; /* offset for hw_ptr due to boundary wrap-around */ /* -- HW params -- */ snd_pcm_access_t access; /* access mode */ snd_pcm_format_t format; /* SNDRV_PCM_FORMAT_* */ snd_pcm_subformat_t subformat; /* subformat */ unsigned int rate; /* rate in Hz */ unsigned int channels; /* channels */ snd_pcm_uframes_t period_size; /* period size */ unsigned int periods; /* periods */ snd_pcm_uframes_t buffer_size; /* buffer size */ snd_pcm_uframes_t min_align; /* Min alignment for the format */ size_t byte_align; unsigned int frame_bits; unsigned int sample_bits; unsigned int info; unsigned int rate_num; unsigned int rate_den; unsigned int no_period_wakeup: 1; /* -- SW params; see struct snd_pcm_sw_params for comments -- */ int tstamp_mode; unsigned int period_step; snd_pcm_uframes_t start_threshold; snd_pcm_uframes_t stop_threshold; snd_pcm_uframes_t silence_threshold; snd_pcm_uframes_t silence_size; snd_pcm_uframes_t boundary; /* internal data of auto-silencer */ snd_pcm_uframes_t silence_start; /* starting pointer to silence area */ snd_pcm_uframes_t silence_filled; /* already filled part of silence area */ bool std_sync_id; /* hardware synchronization - standard per card ID */ /* -- mmap -- */ struct snd_pcm_mmap_status *status; struct snd_pcm_mmap_control *control; /* -- locking / scheduling -- */ snd_pcm_uframes_t twake; /* do transfer (!poll) wakeup if non-zero */ wait_queue_head_t sleep; /* poll sleep */ wait_queue_head_t tsleep; /* transfer sleep */ struct snd_fasync *fasync; bool stop_operating; /* sync_stop will be called */ struct mutex buffer_mutex; /* protect for buffer changes */ atomic_t buffer_accessing; /* >0: in r/w operation, <0: blocked */ /* -- private section -- */ void *private_data; void (*private_free)(struct snd_pcm_runtime *runtime); /* -- hardware description -- */ struct snd_pcm_hardware hw; struct snd_pcm_hw_constraints hw_constraints; /* -- timer -- */ unsigned int timer_resolution; /* timer resolution */ int tstamp_type; /* timestamp type */ /* -- DMA -- */ unsigned char *dma_area; /* DMA area */ dma_addr_t dma_addr; /* physical bus address (not accessible from main CPU) */ size_t dma_bytes; /* size of DMA area */ struct snd_dma_buffer *dma_buffer_p; /* allocated buffer */ unsigned int buffer_changed:1; /* buffer allocation changed; set only in managed mode */ /* -- audio timestamp config -- */ struct snd_pcm_audio_tstamp_config audio_tstamp_config; struct snd_pcm_audio_tstamp_report audio_tstamp_report; struct timespec64 driver_tstamp; #if IS_ENABLED(CONFIG_SND_PCM_OSS) /* -- OSS things -- */ struct snd_pcm_oss_runtime oss; #endif }; struct snd_pcm_group { /* keep linked substreams */ spinlock_t lock; struct mutex mutex; struct list_head substreams; refcount_t refs; }; struct pid; struct snd_pcm_substream { struct snd_pcm *pcm; struct snd_pcm_str *pstr; void *private_data; /* copied from pcm->private_data */ int number; char name[32]; /* substream name */ int stream; /* stream (direction) */ struct pm_qos_request latency_pm_qos_req; /* pm_qos request */ size_t buffer_bytes_max; /* limit ring buffer size */ struct snd_dma_buffer dma_buffer; size_t dma_max; /* -- hardware operations -- */ const struct snd_pcm_ops *ops; /* -- runtime information -- */ struct snd_pcm_runtime *runtime; /* -- timer section -- */ struct snd_timer *timer; /* timer */ unsigned timer_running: 1; /* time is running */ long wait_time; /* time in ms for R/W to wait for avail */ /* -- next substream -- */ struct snd_pcm_substream *next; /* -- linked substreams -- */ struct list_head link_list; /* linked list member */ struct snd_pcm_group self_group; /* fake group for non linked substream (with substream lock inside) */ struct snd_pcm_group *group; /* pointer to current group */ /* -- assigned files -- */ int ref_count; atomic_t mmap_count; unsigned int f_flags; void (*pcm_release)(struct snd_pcm_substream *); struct pid *pid; #if IS_ENABLED(CONFIG_SND_PCM_OSS) /* -- OSS things -- */ struct snd_pcm_oss_substream oss; #endif #ifdef CONFIG_SND_VERBOSE_PROCFS struct snd_info_entry *proc_root; #endif /* CONFIG_SND_VERBOSE_PROCFS */ /* misc flags */ unsigned int hw_opened: 1; unsigned int managed_buffer_alloc:1; }; #define SUBSTREAM_BUSY(substream) ((substream)->ref_count > 0) struct snd_pcm_str { int stream; /* stream (direction) */ struct snd_pcm *pcm; /* -- substreams -- */ unsigned int substream_count; unsigned int substream_opened; struct snd_pcm_substream *substream; #if IS_ENABLED(CONFIG_SND_PCM_OSS) /* -- OSS things -- */ struct snd_pcm_oss_stream oss; #endif #ifdef CONFIG_SND_VERBOSE_PROCFS struct snd_info_entry *proc_root; #ifdef CONFIG_SND_PCM_XRUN_DEBUG unsigned int xrun_debug; /* 0 = disabled, 1 = verbose, 2 = stacktrace */ #endif #endif struct snd_kcontrol *chmap_kctl; /* channel-mapping controls */ struct device *dev; }; struct snd_pcm { struct snd_card *card; struct list_head list; int device; /* device number */ unsigned int info_flags; unsigned short dev_class; unsigned short dev_subclass; char id[64]; char name[80]; struct snd_pcm_str streams[2]; struct mutex open_mutex; wait_queue_head_t open_wait; void *private_data; void (*private_free) (struct snd_pcm *pcm); bool internal; /* pcm is for internal use only */ bool nonatomic; /* whole PCM operations are in non-atomic context */ bool no_device_suspend; /* don't invoke device PM suspend */ #if IS_ENABLED(CONFIG_SND_PCM_OSS) struct snd_pcm_oss oss; #endif }; /* * Registering */ extern const struct file_operations snd_pcm_f_ops[2]; int snd_pcm_new(struct snd_card *card, const char *id, int device, int playback_count, int capture_count, struct snd_pcm **rpcm); int snd_pcm_new_internal(struct snd_card *card, const char *id, int device, int playback_count, int capture_count, struct snd_pcm **rpcm); int snd_pcm_new_stream(struct snd_pcm *pcm, int stream, int substream_count); #if IS_ENABLED(CONFIG_SND_PCM_OSS) struct snd_pcm_notify { int (*n_register) (struct snd_pcm * pcm); int (*n_disconnect) (struct snd_pcm * pcm); int (*n_unregister) (struct snd_pcm * pcm); struct list_head list; }; int snd_pcm_notify(struct snd_pcm_notify *notify, int nfree); #endif /* * Native I/O */ int snd_pcm_info(struct snd_pcm_substream *substream, struct snd_pcm_info *info); int snd_pcm_info_user(struct snd_pcm_substream *substream, struct snd_pcm_info __user *info); int snd_pcm_status64(struct snd_pcm_substream *substream, struct snd_pcm_status64 *status); int snd_pcm_start(struct snd_pcm_substream *substream); int snd_pcm_stop(struct snd_pcm_substream *substream, snd_pcm_state_t status); int snd_pcm_drain_done(struct snd_pcm_substream *substream); int snd_pcm_stop_xrun(struct snd_pcm_substream *substream); #ifdef CONFIG_PM int snd_pcm_suspend_all(struct snd_pcm *pcm); #else static inline int snd_pcm_suspend_all(struct snd_pcm *pcm) { return 0; } #endif int snd_pcm_kernel_ioctl(struct snd_pcm_substream *substream, unsigned int cmd, void *arg); int snd_pcm_open_substream(struct snd_pcm *pcm, int stream, struct file *file, struct snd_pcm_substream **rsubstream); void snd_pcm_release_substream(struct snd_pcm_substream *substream); int snd_pcm_attach_substream(struct snd_pcm *pcm, int stream, struct file *file, struct snd_pcm_substream **rsubstream); void snd_pcm_detach_substream(struct snd_pcm_substream *substream); int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area); #ifdef CONFIG_SND_DEBUG void snd_pcm_debug_name(struct snd_pcm_substream *substream, char *name, size_t len); #else static inline void snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size) { *buf = 0; } #endif /* * PCM library */ /** * snd_pcm_stream_linked - Check whether the substream is linked with others * @substream: substream to check * * Return: true if the given substream is being linked with others */ static inline int snd_pcm_stream_linked(struct snd_pcm_substream *substream) { return substream->group != &substream->self_group; } void snd_pcm_stream_lock(struct snd_pcm_substream *substream); void snd_pcm_stream_unlock(struct snd_pcm_substream *substream); void snd_pcm_stream_lock_irq(struct snd_pcm_substream *substream); void snd_pcm_stream_unlock_irq(struct snd_pcm_substream *substream); unsigned long _snd_pcm_stream_lock_irqsave(struct snd_pcm_substream *substream); unsigned long _snd_pcm_stream_lock_irqsave_nested(struct snd_pcm_substream *substream); /** * snd_pcm_stream_lock_irqsave - Lock the PCM stream * @substream: PCM substream * @flags: irq flags * * This locks the PCM stream like snd_pcm_stream_lock() but with the local * IRQ (only when nonatomic is false). In nonatomic case, this is identical * as snd_pcm_stream_lock(). */ #define snd_pcm_stream_lock_irqsave(substream, flags) \ do { \ typecheck(unsigned long, flags); \ flags = _snd_pcm_stream_lock_irqsave(substream); \ } while (0) void snd_pcm_stream_unlock_irqrestore(struct snd_pcm_substream *substream, unsigned long flags); /** * snd_pcm_stream_lock_irqsave_nested - Single-nested PCM stream locking * @substream: PCM substream * @flags: irq flags * * This locks the PCM stream like snd_pcm_stream_lock_irqsave() but with * the single-depth lockdep subclass. */ #define snd_pcm_stream_lock_irqsave_nested(substream, flags) \ do { \ typecheck(unsigned long, flags); \ flags = _snd_pcm_stream_lock_irqsave_nested(substream); \ } while (0) /* definitions for guard(); use like guard(pcm_stream_lock) */ DEFINE_LOCK_GUARD_1(pcm_stream_lock, struct snd_pcm_substream, snd_pcm_stream_lock(_T->lock), snd_pcm_stream_unlock(_T->lock)) DEFINE_LOCK_GUARD_1(pcm_stream_lock_irq, struct snd_pcm_substream, snd_pcm_stream_lock_irq(_T->lock), snd_pcm_stream_unlock_irq(_T->lock)) DEFINE_LOCK_GUARD_1(pcm_stream_lock_irqsave, struct snd_pcm_substream, snd_pcm_stream_lock_irqsave(_T->lock, _T->flags), snd_pcm_stream_unlock_irqrestore(_T->lock, _T->flags), unsigned long flags) /** * snd_pcm_group_for_each_entry - iterate over the linked substreams * @s: the iterator * @substream: the substream * * Iterate over the all linked substreams to the given @substream. * When @substream isn't linked with any others, this gives returns @substream * itself once. */ #define snd_pcm_group_for_each_entry(s, substream) \ list_for_each_entry(s, &substream->group->substreams, link_list) #define for_each_pcm_streams(stream) \ for (stream = SNDRV_PCM_STREAM_PLAYBACK; \ stream <= SNDRV_PCM_STREAM_LAST; \ stream++) /** * snd_pcm_running - Check whether the substream is in a running state * @substream: substream to check * * Return: true if the given substream is in the state RUNNING, or in the * state DRAINING for playback. */ static inline int snd_pcm_running(struct snd_pcm_substream *substream) { return (substream->runtime->state == SNDRV_PCM_STATE_RUNNING || (substream->runtime->state == SNDRV_PCM_STATE_DRAINING && substream->stream == SNDRV_PCM_STREAM_PLAYBACK)); } /** * __snd_pcm_set_state - Change the current PCM state * @runtime: PCM runtime to set * @state: the current state to set * * Call within the stream lock */ static inline void __snd_pcm_set_state(struct snd_pcm_runtime *runtime, snd_pcm_state_t state) { runtime->state = state; runtime->status->state = state; /* copy for mmap */ } /** * bytes_to_samples - Unit conversion of the size from bytes to samples * @runtime: PCM runtime instance * @size: size in bytes * * Return: the size in samples */ static inline ssize_t bytes_to_samples(struct snd_pcm_runtime *runtime, ssize_t size) { return size * 8 / runtime->sample_bits; } /** * bytes_to_frames - Unit conversion of the size from bytes to frames * @runtime: PCM runtime instance * @size: size in bytes * * Return: the size in frames */ static inline snd_pcm_sframes_t bytes_to_frames(struct snd_pcm_runtime *runtime, ssize_t size) { return size * 8 / runtime->frame_bits; } /** * samples_to_bytes - Unit conversion of the size from samples to bytes * @runtime: PCM runtime instance * @size: size in samples * * Return: the byte size */ static inline ssize_t samples_to_bytes(struct snd_pcm_runtime *runtime, ssize_t size) { return size * runtime->sample_bits / 8; } /** * frames_to_bytes - Unit conversion of the size from frames to bytes * @runtime: PCM runtime instance * @size: size in frames * * Return: the byte size */ static inline ssize_t frames_to_bytes(struct snd_pcm_runtime *runtime, snd_pcm_sframes_t size) { return size * runtime->frame_bits / 8; } /** * frame_aligned - Check whether the byte size is aligned to frames * @runtime: PCM runtime instance * @bytes: size in bytes * * Return: true if aligned, or false if not */ static inline int frame_aligned(struct snd_pcm_runtime *runtime, ssize_t bytes) { return bytes % runtime->byte_align == 0; } /** * snd_pcm_lib_buffer_bytes - Get the buffer size of the current PCM in bytes * @substream: PCM substream * * Return: buffer byte size */ static inline size_t snd_pcm_lib_buffer_bytes(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; return frames_to_bytes(runtime, runtime->buffer_size); } /** * snd_pcm_lib_period_bytes - Get the period size of the current PCM in bytes * @substream: PCM substream * * Return: period byte size */ static inline size_t snd_pcm_lib_period_bytes(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; return frames_to_bytes(runtime, runtime->period_size); } /** * snd_pcm_playback_avail - Get the available (writable) space for playback * @runtime: PCM runtime instance * * Result is between 0 ... (boundary - 1) * * Return: available frame size */ static inline snd_pcm_uframes_t snd_pcm_playback_avail(struct snd_pcm_runtime *runtime) { snd_pcm_sframes_t avail = runtime->status->hw_ptr + runtime->buffer_size - runtime->control->appl_ptr; if (avail < 0) avail += runtime->boundary; else if ((snd_pcm_uframes_t) avail >= runtime->boundary) avail -= runtime->boundary; return avail; } /** * snd_pcm_capture_avail - Get the available (readable) space for capture * @runtime: PCM runtime instance * * Result is between 0 ... (boundary - 1) * * Return: available frame size */ static inline snd_pcm_uframes_t snd_pcm_capture_avail(struct snd_pcm_runtime *runtime) { snd_pcm_sframes_t avail = runtime->status->hw_ptr - runtime->control->appl_ptr; if (avail < 0) avail += runtime->boundary; return avail; } /** * snd_pcm_playback_hw_avail - Get the queued space for playback * @runtime: PCM runtime instance * * Return: available frame size */ static inline snd_pcm_sframes_t snd_pcm_playback_hw_avail(struct snd_pcm_runtime *runtime) { return runtime->buffer_size - snd_pcm_playback_avail(runtime); } /** * snd_pcm_capture_hw_avail - Get the free space for capture * @runtime: PCM runtime instance * * Return: available frame size */ static inline snd_pcm_sframes_t snd_pcm_capture_hw_avail(struct snd_pcm_runtime *runtime) { return runtime->buffer_size - snd_pcm_capture_avail(runtime); } /** * snd_pcm_playback_ready - check whether the playback buffer is available * @substream: the pcm substream instance * * Checks whether enough free space is available on the playback buffer. * * Return: Non-zero if available, or zero if not. */ static inline int snd_pcm_playback_ready(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; return snd_pcm_playback_avail(runtime) >= runtime->control->avail_min; } /** * snd_pcm_capture_ready - check whether the capture buffer is available * @substream: the pcm substream instance * * Checks whether enough capture data is available on the capture buffer. * * Return: Non-zero if available, or zero if not. */ static inline int snd_pcm_capture_ready(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; return snd_pcm_capture_avail(runtime) >= runtime->control->avail_min; } /** * snd_pcm_playback_data - check whether any data exists on the playback buffer * @substream: the pcm substream instance * * Checks whether any data exists on the playback buffer. * * Return: Non-zero if any data exists, or zero if not. If stop_threshold * is bigger or equal to boundary, then this function returns always non-zero. */ static inline int snd_pcm_playback_data(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; if (runtime->stop_threshold >= runtime->boundary) return 1; return snd_pcm_playback_avail(runtime) < runtime->buffer_size; } /** * snd_pcm_playback_empty - check whether the playback buffer is empty * @substream: the pcm substream instance * * Checks whether the playback buffer is empty. * * Return: Non-zero if empty, or zero if not. */ static inline int snd_pcm_playback_empty(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; return snd_pcm_playback_avail(runtime) >= runtime->buffer_size; } /** * snd_pcm_capture_empty - check whether the capture buffer is empty * @substream: the pcm substream instance * * Checks whether the capture buffer is empty. * * Return: Non-zero if empty, or zero if not. */ static inline int snd_pcm_capture_empty(struct snd_pcm_substream *substream) { struct snd_pcm_runtime *runtime = substream->runtime; return snd_pcm_capture_avail(runtime) == 0; } /** * snd_pcm_trigger_done - Mark the master substream * @substream: the pcm substream instance * @master: the linked master substream * * When multiple substreams of the same card are linked and the hardware * supports the single-shot operation, the driver calls this in the loop * in snd_pcm_group_for_each_entry() for marking the substream as "done". * Then most of trigger operations are performed only to the given master * substream. * * The trigger_master mark is cleared at timestamp updates at the end * of trigger operations. */ static inline void snd_pcm_trigger_done(struct snd_pcm_substream *substream, struct snd_pcm_substream *master) { substream->runtime->trigger_master = master; } static inline int hw_is_mask(int var) { return var >= SNDRV_PCM_HW_PARAM_FIRST_MASK && var <= SNDRV_PCM_HW_PARAM_LAST_MASK; } static inline int hw_is_interval(int var) { return var >= SNDRV_PCM_HW_PARAM_FIRST_INTERVAL && var <= SNDRV_PCM_HW_PARAM_LAST_INTERVAL; } static inline struct snd_mask *hw_param_mask(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var) { return ¶ms->masks[var - SNDRV_PCM_HW_PARAM_FIRST_MASK]; } static inline struct snd_interval *hw_param_interval(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var) { return ¶ms->intervals[var - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL]; } static inline const struct snd_mask *hw_param_mask_c(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var) { return ¶ms->masks[var - SNDRV_PCM_HW_PARAM_FIRST_MASK]; } static inline const struct snd_interval *hw_param_interval_c(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var) { return ¶ms->intervals[var - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL]; } /** * params_channels - Get the number of channels from the hw params * @p: hw params * * Return: the number of channels */ static inline unsigned int params_channels(const struct snd_pcm_hw_params *p) { return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_CHANNELS)->min; } /** * params_rate - Get the sample rate from the hw params * @p: hw params * * Return: the sample rate */ static inline unsigned int params_rate(const struct snd_pcm_hw_params *p) { return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_RATE)->min; } /** * params_period_size - Get the period size (in frames) from the hw params * @p: hw params * * Return: the period size in frames */ static inline unsigned int params_period_size(const struct snd_pcm_hw_params *p) { return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_PERIOD_SIZE)->min; } /** * params_periods - Get the number of periods from the hw params * @p: hw params * * Return: the number of periods */ static inline unsigned int params_periods(const struct snd_pcm_hw_params *p) { return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_PERIODS)->min; } /** * params_buffer_size - Get the buffer size (in frames) from the hw params * @p: hw params * * Return: the buffer size in frames */ static inline unsigned int params_buffer_size(const struct snd_pcm_hw_params *p) { return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_BUFFER_SIZE)->min; } /** * params_buffer_bytes - Get the buffer size (in bytes) from the hw params * @p: hw params * * Return: the buffer size in bytes */ static inline unsigned int params_buffer_bytes(const struct snd_pcm_hw_params *p) { return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_BUFFER_BYTES)->min; } int snd_interval_refine(struct snd_interval *i, const struct snd_interval *v); int snd_interval_list(struct snd_interval *i, unsigned int count, const unsigned int *list, unsigned int mask); int snd_interval_ranges(struct snd_interval *i, unsigned int count, const struct snd_interval *list, unsigned int mask); int snd_interval_ratnum(struct snd_interval *i, unsigned int rats_count, const struct snd_ratnum *rats, unsigned int *nump, unsigned int *denp); void _snd_pcm_hw_params_any(struct snd_pcm_hw_params *params); void _snd_pcm_hw_param_setempty(struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var); int snd_pcm_hw_refine(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params); int snd_pcm_hw_constraint_mask64(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var, u_int64_t mask); int snd_pcm_hw_constraint_minmax(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var, unsigned int min, unsigned int max); int snd_pcm_hw_constraint_integer(struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var); int snd_pcm_hw_constraint_list(struct snd_pcm_runtime *runtime, unsigned int cond, snd_pcm_hw_param_t var, const struct snd_pcm_hw_constraint_list *l); int snd_pcm_hw_constraint_ranges(struct snd_pcm_runtime *runtime, unsigned int cond, snd_pcm_hw_param_t var, const struct snd_pcm_hw_constraint_ranges *r); int snd_pcm_hw_constraint_ratnums(struct snd_pcm_runtime *runtime, unsigned int cond, snd_pcm_hw_param_t var, const struct snd_pcm_hw_constraint_ratnums *r); int snd_pcm_hw_constraint_ratdens(struct snd_pcm_runtime *runtime, unsigned int cond, snd_pcm_hw_param_t var, const struct snd_pcm_hw_constraint_ratdens *r); int snd_pcm_hw_constraint_msbits(struct snd_pcm_runtime *runtime, unsigned int cond, unsigned int width, unsigned int msbits); int snd_pcm_hw_constraint_step(struct snd_pcm_runtime *runtime, unsigned int cond, snd_pcm_hw_param_t var, unsigned long step); int snd_pcm_hw_constraint_pow2(struct snd_pcm_runtime *runtime, unsigned int cond, snd_pcm_hw_param_t var); int snd_pcm_hw_rule_noresample(struct snd_pcm_runtime *runtime, unsigned int base_rate); int snd_pcm_hw_rule_add(struct snd_pcm_runtime *runtime, unsigned int cond, int var, snd_pcm_hw_rule_func_t func, void *private, int dep, ...); /** * snd_pcm_hw_constraint_single() - Constrain parameter to a single value * @runtime: PCM runtime instance * @var: The hw_params variable to constrain * @val: The value to constrain to * * Return: Positive if the value is changed, zero if it's not changed, or a * negative error code. */ static inline int snd_pcm_hw_constraint_single( struct snd_pcm_runtime *runtime, snd_pcm_hw_param_t var, unsigned int val) { return snd_pcm_hw_constraint_minmax(runtime, var, val, val); } int snd_pcm_format_signed(snd_pcm_format_t format); int snd_pcm_format_unsigned(snd_pcm_format_t format); int snd_pcm_format_linear(snd_pcm_format_t format); int snd_pcm_format_little_endian(snd_pcm_format_t format); int snd_pcm_format_big_endian(snd_pcm_format_t format); #if 0 /* just for kernel-doc */ /** * snd_pcm_format_cpu_endian - Check the PCM format is CPU-endian * @format: the format to check * * Return: 1 if the given PCM format is CPU-endian, 0 if * opposite, or a negative error code if endian not specified. */ int snd_pcm_format_cpu_endian(snd_pcm_format_t format); #endif /* DocBook */ #ifdef SNDRV_LITTLE_ENDIAN #define snd_pcm_format_cpu_endian(format) snd_pcm_format_little_endian(format) #else #define snd_pcm_format_cpu_endian(format) snd_pcm_format_big_endian(format) #endif int snd_pcm_format_width(snd_pcm_format_t format); /* in bits */ int snd_pcm_format_physical_width(snd_pcm_format_t format); /* in bits */ ssize_t snd_pcm_format_size(snd_pcm_format_t format, size_t samples); const unsigned char *snd_pcm_format_silence_64(snd_pcm_format_t format); int snd_pcm_format_set_silence(snd_pcm_format_t format, void *buf, unsigned int frames); void snd_pcm_set_ops(struct snd_pcm * pcm, int direction, const struct snd_pcm_ops *ops); void snd_pcm_set_sync_per_card(struct snd_pcm_substream *substream, struct snd_pcm_hw_params *params, const unsigned char *id, unsigned int len); /** * snd_pcm_set_sync - set the PCM sync id * @substream: the pcm substream * * Use the default PCM sync identifier for the specific card. */ static inline void snd_pcm_set_sync(struct snd_pcm_substream *substream) { substream->runtime->std_sync_id = true; } int snd_pcm_lib_ioctl(struct snd_pcm_substream *substream, unsigned int cmd, void *arg); void snd_pcm_period_elapsed_under_stream_lock(struct snd_pcm_substream *substream); void snd_pcm_period_elapsed(struct snd_pcm_substream *substream); snd_pcm_sframes_t __snd_pcm_lib_xfer(struct snd_pcm_substream *substream, void *buf, bool interleaved, snd_pcm_uframes_t frames, bool in_kernel); static inline snd_pcm_sframes_t snd_pcm_lib_write(struct snd_pcm_substream *substream, const void __user *buf, snd_pcm_uframes_t frames) { return __snd_pcm_lib_xfer(substream, (void __force *)buf, true, frames, false); } static inline snd_pcm_sframes_t snd_pcm_lib_read(struct snd_pcm_substream *substream, void __user *buf, snd_pcm_uframes_t frames) { return __snd_pcm_lib_xfer(substream, (void __force *)buf, true, frames, false); } static inline snd_pcm_sframes_t snd_pcm_lib_writev(struct snd_pcm_substream *substream, void __user **bufs, snd_pcm_uframes_t frames) { return __snd_pcm_lib_xfer(substream, (void *)bufs, false, frames, false); } static inline snd_pcm_sframes_t snd_pcm_lib_readv(struct snd_pcm_substream *substream, void __user **bufs, snd_pcm_uframes_t frames) { return __snd_pcm_lib_xfer(substream, (void *)bufs, false, frames, false); } static inline snd_pcm_sframes_t snd_pcm_kernel_write(struct snd_pcm_substream *substream, const void *buf, snd_pcm_uframes_t frames) { return __snd_pcm_lib_xfer(substream, (void *)buf, true, frames, true); } static inline snd_pcm_sframes_t snd_pcm_kernel_read(struct snd_pcm_substream *substream, void *buf, snd_pcm_uframes_t frames) { return __snd_pcm_lib_xfer(substream, buf, true, frames, true); } static inline snd_pcm_sframes_t snd_pcm_kernel_writev(struct snd_pcm_substream *substream, void **bufs, snd_pcm_uframes_t frames) { return __snd_pcm_lib_xfer(substream, bufs, false, frames, true); } static inline snd_pcm_sframes_t snd_pcm_kernel_readv(struct snd_pcm_substream *substream, void **bufs, snd_pcm_uframes_t frames) { return __snd_pcm_lib_xfer(substream, bufs, false, frames, true); } int snd_pcm_hw_limit_rates(struct snd_pcm_hardware *hw); static inline int snd_pcm_limit_hw_rates(struct snd_pcm_runtime *runtime) { return snd_pcm_hw_limit_rates(&runtime->hw); } unsigned int snd_pcm_rate_to_rate_bit(unsigned int rate); unsigned int snd_pcm_rate_bit_to_rate(unsigned int rate_bit); unsigned int snd_pcm_rate_mask_intersect(unsigned int rates_a, unsigned int rates_b); unsigned int snd_pcm_rate_range_to_bits(unsigned int rate_min, unsigned int rate_max); /** * snd_pcm_set_runtime_buffer - Set the PCM runtime buffer * @substream: PCM substream to set * @bufp: the buffer information, NULL to clear * * Copy the buffer information to runtime->dma_buffer when @bufp is non-NULL. * Otherwise it clears the current buffer information. */ static inline void snd_pcm_set_runtime_buffer(struct snd_pcm_substream *substream, struct snd_dma_buffer *bufp) { struct snd_pcm_runtime *runtime = substream->runtime; if (bufp) { runtime->dma_buffer_p = bufp; runtime->dma_area = bufp->area; runtime->dma_addr = bufp->addr; runtime->dma_bytes = bufp->bytes; } else { runtime->dma_buffer_p = NULL; runtime->dma_area = NULL; runtime->dma_addr = 0; runtime->dma_bytes = 0; } } /** * snd_pcm_gettime - Fill the timespec64 depending on the timestamp mode * @runtime: PCM runtime instance * @tv: timespec64 to fill */ static inline void snd_pcm_gettime(struct snd_pcm_runtime *runtime, struct timespec64 *tv) { switch (runtime->tstamp_type) { case SNDRV_PCM_TSTAMP_TYPE_MONOTONIC: ktime_get_ts64(tv); break; case SNDRV_PCM_TSTAMP_TYPE_MONOTONIC_RAW: ktime_get_raw_ts64(tv); break; default: ktime_get_real_ts64(tv); break; } } /* * Memory */ void snd_pcm_lib_preallocate_free(struct snd_pcm_substream *substream); void snd_pcm_lib_preallocate_free_for_all(struct snd_pcm *pcm); void snd_pcm_lib_preallocate_pages(struct snd_pcm_substream *substream, int type, struct device *data, size_t size, size_t max); void snd_pcm_lib_preallocate_pages_for_all(struct snd_pcm *pcm, int type, void *data, size_t size, size_t max); int snd_pcm_lib_malloc_pages(struct snd_pcm_substream *substream, size_t size); int snd_pcm_lib_free_pages(struct snd_pcm_substream *substream); int snd_pcm_set_managed_buffer(struct snd_pcm_substream *substream, int type, struct device *data, size_t size, size_t max); int snd_pcm_set_managed_buffer_all(struct snd_pcm *pcm, int type, struct device *data, size_t size, size_t max); /** * snd_pcm_set_fixed_buffer - Preallocate and set up the fixed size PCM buffer * @substream: the pcm substream instance * @type: DMA type (SNDRV_DMA_TYPE_*) * @data: DMA type dependent data * @size: the requested pre-allocation size in bytes * * This is a variant of snd_pcm_set_managed_buffer(), but this pre-allocates * only the given sized buffer and doesn't allow re-allocation nor dynamic * allocation of a larger buffer unlike the standard one. * The function may return -ENOMEM error, hence the caller must check it. * * Return: zero if successful, or a negative error code */ static inline int __must_check snd_pcm_set_fixed_buffer(struct snd_pcm_substream *substream, int type, struct device *data, size_t size) { return snd_pcm_set_managed_buffer(substream, type, data, size, 0); } /** * snd_pcm_set_fixed_buffer_all - Preallocate and set up the fixed size PCM buffer * @pcm: the pcm instance * @type: DMA type (SNDRV_DMA_TYPE_*) * @data: DMA type dependent data * @size: the requested pre-allocation size in bytes * * Apply the set up of the fixed buffer via snd_pcm_set_fixed_buffer() for * all substream. If any of allocation fails, it returns -ENOMEM, hence the * caller must check the return value. * * Return: zero if successful, or a negative error code */ static inline int __must_check snd_pcm_set_fixed_buffer_all(struct snd_pcm *pcm, int type, struct device *data, size_t size) { return snd_pcm_set_managed_buffer_all(pcm, type, data, size, 0); } int _snd_pcm_lib_alloc_vmalloc_buffer(struct snd_pcm_substream *substream, size_t size, gfp_t gfp_flags); int snd_pcm_lib_free_vmalloc_buffer(struct snd_pcm_substream *substream); struct page *snd_pcm_lib_get_vmalloc_page(struct snd_pcm_substream *substream, unsigned long offset); /** * snd_pcm_lib_alloc_vmalloc_buffer - allocate virtual DMA buffer * @substream: the substream to allocate the buffer to * @size: the requested buffer size, in bytes * * Allocates the PCM substream buffer using vmalloc(), i.e., the memory is * contiguous in kernel virtual space, but not in physical memory. Use this * if the buffer is accessed by kernel code but not by device DMA. * * Return: 1 if the buffer was changed, 0 if not changed, or a negative error * code. */ static inline int snd_pcm_lib_alloc_vmalloc_buffer (struct snd_pcm_substream *substream, size_t size) { return _snd_pcm_lib_alloc_vmalloc_buffer(substream, size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); } /** * snd_pcm_lib_alloc_vmalloc_32_buffer - allocate 32-bit-addressable buffer * @substream: the substream to allocate the buffer to * @size: the requested buffer size, in bytes * * This function works like snd_pcm_lib_alloc_vmalloc_buffer(), but uses * vmalloc_32(), i.e., the pages are allocated from 32-bit-addressable memory. * * Return: 1 if the buffer was changed, 0 if not changed, or a negative error * code. */ static inline int snd_pcm_lib_alloc_vmalloc_32_buffer (struct snd_pcm_substream *substream, size_t size) { return _snd_pcm_lib_alloc_vmalloc_buffer(substream, size, GFP_KERNEL | GFP_DMA32 | __GFP_ZERO); } #define snd_pcm_get_dma_buf(substream) ((substream)->runtime->dma_buffer_p) /** * snd_pcm_sgbuf_get_addr - Get the DMA address at the corresponding offset * @substream: PCM substream * @ofs: byte offset * * Return: DMA address */ static inline dma_addr_t snd_pcm_sgbuf_get_addr(struct snd_pcm_substream *substream, unsigned int ofs) { return snd_sgbuf_get_addr(snd_pcm_get_dma_buf(substream), ofs); } /** * snd_pcm_sgbuf_get_chunk_size - Compute the max size that fits within the * contig. page from the given size * @substream: PCM substream * @ofs: byte offset * @size: byte size to examine * * Return: chunk size */ static inline unsigned int snd_pcm_sgbuf_get_chunk_size(struct snd_pcm_substream *substream, unsigned int ofs, unsigned int size) { return snd_sgbuf_get_chunk_size(snd_pcm_get_dma_buf(substream), ofs, size); } /** * snd_pcm_mmap_data_open - increase the mmap counter * @area: VMA * * PCM mmap callback should handle this counter properly */ static inline void snd_pcm_mmap_data_open(struct vm_area_struct *area) { struct snd_pcm_substream *substream = (struct snd_pcm_substream *)area->vm_private_data; atomic_inc(&substream->mmap_count); } /** * snd_pcm_mmap_data_close - decrease the mmap counter * @area: VMA * * PCM mmap callback should handle this counter properly */ static inline void snd_pcm_mmap_data_close(struct vm_area_struct *area) { struct snd_pcm_substream *substream = (struct snd_pcm_substream *)area->vm_private_data; atomic_dec(&substream->mmap_count); } int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream, struct vm_area_struct *area); /* mmap for io-memory area */ #if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_ALPHA) #define SNDRV_PCM_INFO_MMAP_IOMEM SNDRV_PCM_INFO_MMAP int snd_pcm_lib_mmap_iomem(struct snd_pcm_substream *substream, struct vm_area_struct *area); #else #define SNDRV_PCM_INFO_MMAP_IOMEM 0 #define snd_pcm_lib_mmap_iomem NULL #endif /** * snd_pcm_limit_isa_dma_size - Get the max size fitting with ISA DMA transfer * @dma: DMA number * @max: pointer to store the max size */ static inline void snd_pcm_limit_isa_dma_size(int dma, size_t *max) { *max = dma < 4 ? 64 * 1024 : 128 * 1024; } /* * Misc */ #define SNDRV_PCM_DEFAULT_CON_SPDIF (IEC958_AES0_CON_EMPHASIS_NONE|\ (IEC958_AES1_CON_ORIGINAL<<8)|\ (IEC958_AES1_CON_PCM_CODER<<8)|\ (IEC958_AES3_CON_FS_48000<<24)) const char *snd_pcm_format_name(snd_pcm_format_t format); /** * snd_pcm_direction_name - Get a string naming the direction of a stream * @direction: Stream's direction, one of SNDRV_PCM_STREAM_XXX * * Returns a string naming the direction of the stream. */ static inline const char *snd_pcm_direction_name(int direction) { if (direction == SNDRV_PCM_STREAM_PLAYBACK) return "Playback"; else return "Capture"; } /** * snd_pcm_stream_str - Get a string naming the direction of a stream * @substream: the pcm substream instance * * Return: A string naming the direction of the stream. */ static inline const char *snd_pcm_stream_str(struct snd_pcm_substream *substream) { return snd_pcm_direction_name(substream->stream); } /* * PCM channel-mapping control API */ /* array element of channel maps */ struct snd_pcm_chmap_elem { unsigned char channels; unsigned char map[15]; }; /* channel map information; retrieved via snd_kcontrol_chip() */ struct snd_pcm_chmap { struct snd_pcm *pcm; /* assigned PCM instance */ int stream; /* PLAYBACK or CAPTURE */ struct snd_kcontrol *kctl; const struct snd_pcm_chmap_elem *chmap; unsigned int max_channels; unsigned int channel_mask; /* optional: active channels bitmask */ void *private_data; /* optional: private data pointer */ }; /** * snd_pcm_chmap_substream - get the PCM substream assigned to the given chmap info * @info: chmap information * @idx: the substream number index * * Return: the matched PCM substream, or NULL if not found */ static inline struct snd_pcm_substream * snd_pcm_chmap_substream(struct snd_pcm_chmap *info, unsigned int idx) { struct snd_pcm_substream *s; for (s = info->pcm->streams[info->stream].substream; s; s = s->next) if (s->number == idx) return s; return NULL; } /* ALSA-standard channel maps (RL/RR prior to C/LFE) */ extern const struct snd_pcm_chmap_elem snd_pcm_std_chmaps[]; /* Other world's standard channel maps (C/LFE prior to RL/RR) */ extern const struct snd_pcm_chmap_elem snd_pcm_alt_chmaps[]; /* bit masks to be passed to snd_pcm_chmap.channel_mask field */ #define SND_PCM_CHMAP_MASK_24 ((1U << 2) | (1U << 4)) #define SND_PCM_CHMAP_MASK_246 (SND_PCM_CHMAP_MASK_24 | (1U << 6)) #define SND_PCM_CHMAP_MASK_2468 (SND_PCM_CHMAP_MASK_246 | (1U << 8)) int snd_pcm_add_chmap_ctls(struct snd_pcm *pcm, int stream, const struct snd_pcm_chmap_elem *chmap, int max_channels, unsigned long private_value, struct snd_pcm_chmap **info_ret); /** * pcm_format_to_bits - Strong-typed conversion of pcm_format to bitwise * @pcm_format: PCM format * * Return: 64bit mask corresponding to the given PCM format */ static inline u64 pcm_format_to_bits(snd_pcm_format_t pcm_format) { return 1ULL << (__force int) pcm_format; } /** * pcm_for_each_format - helper to iterate for each format type * @f: the iterator variable in snd_pcm_format_t type */ #define pcm_for_each_format(f) \ for ((f) = SNDRV_PCM_FORMAT_FIRST; \ (__force int)(f) <= (__force int)SNDRV_PCM_FORMAT_LAST; \ (f) = (__force snd_pcm_format_t)((__force int)(f) + 1)) /* printk helpers */ #define pcm_err(pcm, fmt, args...) \ dev_err((pcm)->card->dev, fmt, ##args) #define pcm_warn(pcm, fmt, args...) \ dev_warn((pcm)->card->dev, fmt, ##args) #define pcm_dbg(pcm, fmt, args...) \ dev_dbg((pcm)->card->dev, fmt, ##args) /* helpers for copying between iov_iter and iomem */ int copy_to_iter_fromio(struct iov_iter *itert, const void __iomem *src, size_t count); int copy_from_iter_toio(void __iomem *dst, struct iov_iter *iter, size_t count); struct snd_pcm_status64 { snd_pcm_state_t state; /* stream state */ u8 rsvd[4]; s64 trigger_tstamp_sec; /* time when stream was started/stopped/paused */ s64 trigger_tstamp_nsec; s64 tstamp_sec; /* reference timestamp */ s64 tstamp_nsec; snd_pcm_uframes_t appl_ptr; /* appl ptr */ snd_pcm_uframes_t hw_ptr; /* hw ptr */ snd_pcm_sframes_t delay; /* current delay in frames */ snd_pcm_uframes_t avail; /* number of frames available */ snd_pcm_uframes_t avail_max; /* max frames available on hw since last status */ snd_pcm_uframes_t overrange; /* count of ADC (capture) overrange detections from last status */ snd_pcm_state_t suspended_state; /* suspended stream state */ __u32 audio_tstamp_data; /* needed for 64-bit alignment, used for configs/report to/from userspace */ s64 audio_tstamp_sec; /* sample counter, wall clock, PHC or on-demand sync'ed */ s64 audio_tstamp_nsec; s64 driver_tstamp_sec; /* useful in case reference system tstamp is reported with delay */ s64 driver_tstamp_nsec; __u32 audio_tstamp_accuracy; /* in ns units, only valid if indicated in audio_tstamp_data */ unsigned char reserved[52-4*sizeof(s64)]; /* must be filled with zero */ }; #define SNDRV_PCM_IOCTL_STATUS64 _IOR('A', 0x20, struct snd_pcm_status64) #define SNDRV_PCM_IOCTL_STATUS_EXT64 _IOWR('A', 0x24, struct snd_pcm_status64) struct snd_pcm_status32 { snd_pcm_state_t state; /* stream state */ s32 trigger_tstamp_sec; /* time when stream was started/stopped/paused */ s32 trigger_tstamp_nsec; s32 tstamp_sec; /* reference timestamp */ s32 tstamp_nsec; u32 appl_ptr; /* appl ptr */ u32 hw_ptr; /* hw ptr */ s32 delay; /* current delay in frames */ u32 avail; /* number of frames available */ u32 avail_max; /* max frames available on hw since last status */ u32 overrange; /* count of ADC (capture) overrange detections from last status */ snd_pcm_state_t suspended_state; /* suspended stream state */ u32 audio_tstamp_data; /* needed for 64-bit alignment, used for configs/report to/from userspace */ s32 audio_tstamp_sec; /* sample counter, wall clock, PHC or on-demand sync'ed */ s32 audio_tstamp_nsec; s32 driver_tstamp_sec; /* useful in case reference system tstamp is reported with delay */ s32 driver_tstamp_nsec; u32 audio_tstamp_accuracy; /* in ns units, only valid if indicated in audio_tstamp_data */ unsigned char reserved[52-4*sizeof(s32)]; /* must be filled with zero */ }; #define SNDRV_PCM_IOCTL_STATUS32 _IOR('A', 0x20, struct snd_pcm_status32) #define SNDRV_PCM_IOCTL_STATUS_EXT32 _IOWR('A', 0x24, struct snd_pcm_status32) #endif /* __SOUND_PCM_H */ |
| 2 1 1 1 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2005 Marc Kleine-Budde, Pengutronix * Copyright (C) 2006 Andrey Volkov, Varma Electronics * Copyright (C) 2008-2009 Wolfgang Grandegger <wg@grandegger.com> * Copyright (C) 2021 Vincent Mailhol <mailhol.vincent@wanadoo.fr> */ #include <linux/can/dev.h> #include <net/rtnetlink.h> static const struct nla_policy can_policy[IFLA_CAN_MAX + 1] = { [IFLA_CAN_STATE] = { .type = NLA_U32 }, [IFLA_CAN_CTRLMODE] = { .len = sizeof(struct can_ctrlmode) }, [IFLA_CAN_RESTART_MS] = { .type = NLA_U32 }, [IFLA_CAN_RESTART] = { .type = NLA_U32 }, [IFLA_CAN_BITTIMING] = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, [IFLA_CAN_CLOCK] = { .len = sizeof(struct can_clock) }, [IFLA_CAN_BERR_COUNTER] = { .len = sizeof(struct can_berr_counter) }, [IFLA_CAN_DATA_BITTIMING] = { .len = sizeof(struct can_bittiming) }, [IFLA_CAN_DATA_BITTIMING_CONST] = { .len = sizeof(struct can_bittiming_const) }, [IFLA_CAN_TERMINATION] = { .type = NLA_U16 }, [IFLA_CAN_TDC] = { .type = NLA_NESTED }, [IFLA_CAN_CTRLMODE_EXT] = { .type = NLA_NESTED }, }; static const struct nla_policy can_tdc_policy[IFLA_CAN_TDC_MAX + 1] = { [IFLA_CAN_TDC_TDCV_MIN] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCV_MAX] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCO_MIN] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCO_MAX] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCF_MIN] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCF_MAX] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCV] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCO] = { .type = NLA_U32 }, [IFLA_CAN_TDC_TDCF] = { .type = NLA_U32 }, }; static int can_validate_bittiming(const struct can_bittiming *bt, struct netlink_ext_ack *extack) { /* sample point is in one-tenth of a percent */ if (bt->sample_point >= 1000) { NL_SET_ERR_MSG(extack, "sample point must be between 0 and 100%"); return -EINVAL; } return 0; } static int can_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { bool is_can_fd = false; int err; /* Make sure that valid CAN FD configurations always consist of * - nominal/arbitration bittiming * - data bittiming * - control mode with CAN_CTRLMODE_FD set * - TDC parameters are coherent (details below) */ if (!data) return 0; if (data[IFLA_CAN_BITTIMING]) { struct can_bittiming bt; memcpy(&bt, nla_data(data[IFLA_CAN_BITTIMING]), sizeof(bt)); err = can_validate_bittiming(&bt, extack); if (err) return err; } if (data[IFLA_CAN_CTRLMODE]) { struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]); u32 tdc_flags = cm->flags & CAN_CTRLMODE_TDC_MASK; is_can_fd = cm->flags & cm->mask & CAN_CTRLMODE_FD; /* CAN_CTRLMODE_TDC_{AUTO,MANUAL} are mutually exclusive */ if (tdc_flags == CAN_CTRLMODE_TDC_MASK) return -EOPNOTSUPP; /* If one of the CAN_CTRLMODE_TDC_* flag is set then * TDC must be set and vice-versa */ if (!!tdc_flags != !!data[IFLA_CAN_TDC]) return -EOPNOTSUPP; /* If providing TDC parameters, at least TDCO is * needed. TDCV is needed if and only if * CAN_CTRLMODE_TDC_MANUAL is set */ if (data[IFLA_CAN_TDC]) { struct nlattr *tb_tdc[IFLA_CAN_TDC_MAX + 1]; err = nla_parse_nested(tb_tdc, IFLA_CAN_TDC_MAX, data[IFLA_CAN_TDC], can_tdc_policy, extack); if (err) return err; if (tb_tdc[IFLA_CAN_TDC_TDCV]) { if (tdc_flags & CAN_CTRLMODE_TDC_AUTO) return -EOPNOTSUPP; } else { if (tdc_flags & CAN_CTRLMODE_TDC_MANUAL) return -EOPNOTSUPP; } if (!tb_tdc[IFLA_CAN_TDC_TDCO]) return -EOPNOTSUPP; } } if (is_can_fd) { if (!data[IFLA_CAN_BITTIMING] || !data[IFLA_CAN_DATA_BITTIMING]) return -EOPNOTSUPP; } if (data[IFLA_CAN_DATA_BITTIMING] || data[IFLA_CAN_TDC]) { if (!is_can_fd) return -EOPNOTSUPP; } if (data[IFLA_CAN_DATA_BITTIMING]) { struct can_bittiming bt; memcpy(&bt, nla_data(data[IFLA_CAN_DATA_BITTIMING]), sizeof(bt)); err = can_validate_bittiming(&bt, extack); if (err) return err; } return 0; } static int can_tdc_changelink(struct can_priv *priv, const struct nlattr *nla, struct netlink_ext_ack *extack) { struct nlattr *tb_tdc[IFLA_CAN_TDC_MAX + 1]; struct can_tdc tdc = { 0 }; const struct can_tdc_const *tdc_const = priv->tdc_const; int err; if (!tdc_const || !can_tdc_is_enabled(priv)) return -EOPNOTSUPP; err = nla_parse_nested(tb_tdc, IFLA_CAN_TDC_MAX, nla, can_tdc_policy, extack); if (err) return err; if (tb_tdc[IFLA_CAN_TDC_TDCV]) { u32 tdcv = nla_get_u32(tb_tdc[IFLA_CAN_TDC_TDCV]); if (tdcv < tdc_const->tdcv_min || tdcv > tdc_const->tdcv_max) return -EINVAL; tdc.tdcv = tdcv; } if (tb_tdc[IFLA_CAN_TDC_TDCO]) { u32 tdco = nla_get_u32(tb_tdc[IFLA_CAN_TDC_TDCO]); if (tdco < tdc_const->tdco_min || tdco > tdc_const->tdco_max) return -EINVAL; tdc.tdco = tdco; } if (tb_tdc[IFLA_CAN_TDC_TDCF]) { u32 tdcf = nla_get_u32(tb_tdc[IFLA_CAN_TDC_TDCF]); if (tdcf < tdc_const->tdcf_min || tdcf > tdc_const->tdcf_max) return -EINVAL; tdc.tdcf = tdcf; } priv->tdc = tdc; return 0; } static int can_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct can_priv *priv = netdev_priv(dev); u32 tdc_mask = 0; int err; /* We need synchronization with dev->stop() */ ASSERT_RTNL(); if (data[IFLA_CAN_BITTIMING]) { struct can_bittiming bt; /* Do not allow changing bittiming while running */ if (dev->flags & IFF_UP) return -EBUSY; /* Calculate bittiming parameters based on * bittiming_const if set, otherwise pass bitrate * directly via do_set_bitrate(). Bail out if neither * is given. */ if (!priv->bittiming_const && !priv->do_set_bittiming && !priv->bitrate_const) return -EOPNOTSUPP; memcpy(&bt, nla_data(data[IFLA_CAN_BITTIMING]), sizeof(bt)); err = can_get_bittiming(dev, &bt, priv->bittiming_const, priv->bitrate_const, priv->bitrate_const_cnt, extack); if (err) return err; if (priv->bitrate_max && bt.bitrate > priv->bitrate_max) { NL_SET_ERR_MSG_FMT(extack, "arbitration bitrate %u bps surpasses transceiver capabilities of %u bps", bt.bitrate, priv->bitrate_max); return -EINVAL; } memcpy(&priv->bittiming, &bt, sizeof(bt)); if (priv->do_set_bittiming) { /* Finally, set the bit-timing registers */ err = priv->do_set_bittiming(dev); if (err) return err; } } if (data[IFLA_CAN_CTRLMODE]) { struct can_ctrlmode *cm; u32 ctrlstatic; u32 maskedflags; /* Do not allow changing controller mode while running */ if (dev->flags & IFF_UP) return -EBUSY; cm = nla_data(data[IFLA_CAN_CTRLMODE]); ctrlstatic = can_get_static_ctrlmode(priv); maskedflags = cm->flags & cm->mask; /* check whether provided bits are allowed to be passed */ if (maskedflags & ~(priv->ctrlmode_supported | ctrlstatic)) return -EOPNOTSUPP; /* do not check for static fd-non-iso if 'fd' is disabled */ if (!(maskedflags & CAN_CTRLMODE_FD)) ctrlstatic &= ~CAN_CTRLMODE_FD_NON_ISO; /* make sure static options are provided by configuration */ if ((maskedflags & ctrlstatic) != ctrlstatic) return -EOPNOTSUPP; /* clear bits to be modified and copy the flag values */ priv->ctrlmode &= ~cm->mask; priv->ctrlmode |= maskedflags; /* CAN_CTRLMODE_FD can only be set when driver supports FD */ if (priv->ctrlmode & CAN_CTRLMODE_FD) { dev->mtu = CANFD_MTU; } else { dev->mtu = CAN_MTU; memset(&priv->data_bittiming, 0, sizeof(priv->data_bittiming)); priv->ctrlmode &= ~CAN_CTRLMODE_TDC_MASK; memset(&priv->tdc, 0, sizeof(priv->tdc)); } tdc_mask = cm->mask & CAN_CTRLMODE_TDC_MASK; /* CAN_CTRLMODE_TDC_{AUTO,MANUAL} are mutually * exclusive: make sure to turn the other one off */ if (tdc_mask) priv->ctrlmode &= cm->flags | ~CAN_CTRLMODE_TDC_MASK; } if (data[IFLA_CAN_RESTART_MS]) { /* Do not allow changing restart delay while running */ if (dev->flags & IFF_UP) return -EBUSY; priv->restart_ms = nla_get_u32(data[IFLA_CAN_RESTART_MS]); } if (data[IFLA_CAN_RESTART]) { /* Do not allow a restart while not running */ if (!(dev->flags & IFF_UP)) return -EINVAL; err = can_restart_now(dev); if (err) return err; } if (data[IFLA_CAN_DATA_BITTIMING]) { struct can_bittiming dbt; /* Do not allow changing bittiming while running */ if (dev->flags & IFF_UP) return -EBUSY; /* Calculate bittiming parameters based on * data_bittiming_const if set, otherwise pass bitrate * directly via do_set_bitrate(). Bail out if neither * is given. */ if (!priv->data_bittiming_const && !priv->do_set_data_bittiming && !priv->data_bitrate_const) return -EOPNOTSUPP; memcpy(&dbt, nla_data(data[IFLA_CAN_DATA_BITTIMING]), sizeof(dbt)); err = can_get_bittiming(dev, &dbt, priv->data_bittiming_const, priv->data_bitrate_const, priv->data_bitrate_const_cnt, extack); if (err) return err; if (priv->bitrate_max && dbt.bitrate > priv->bitrate_max) { NL_SET_ERR_MSG_FMT(extack, "CANFD data bitrate %u bps surpasses transceiver capabilities of %u bps", dbt.bitrate, priv->bitrate_max); return -EINVAL; } memset(&priv->tdc, 0, sizeof(priv->tdc)); if (data[IFLA_CAN_TDC]) { /* TDC parameters are provided: use them */ err = can_tdc_changelink(priv, data[IFLA_CAN_TDC], extack); if (err) { priv->ctrlmode &= ~CAN_CTRLMODE_TDC_MASK; return err; } } else if (!tdc_mask) { /* Neither of TDC parameters nor TDC flags are * provided: do calculation */ can_calc_tdco(&priv->tdc, priv->tdc_const, &dbt, &priv->ctrlmode, priv->ctrlmode_supported); } /* else: both CAN_CTRLMODE_TDC_{AUTO,MANUAL} are explicitly * turned off. TDC is disabled: do nothing */ memcpy(&priv->data_bittiming, &dbt, sizeof(dbt)); if (priv->do_set_data_bittiming) { /* Finally, set the bit-timing registers */ err = priv->do_set_data_bittiming(dev); if (err) return err; } } if (data[IFLA_CAN_TERMINATION]) { const u16 termval = nla_get_u16(data[IFLA_CAN_TERMINATION]); const unsigned int num_term = priv->termination_const_cnt; unsigned int i; if (!priv->do_set_termination) return -EOPNOTSUPP; /* check whether given value is supported by the interface */ for (i = 0; i < num_term; i++) { if (termval == priv->termination_const[i]) break; } if (i >= num_term) return -EINVAL; /* Finally, set the termination value */ err = priv->do_set_termination(dev, termval); if (err) return err; priv->termination = termval; } return 0; } static size_t can_tdc_get_size(const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); size_t size; if (!priv->tdc_const) return 0; size = nla_total_size(0); /* nest IFLA_CAN_TDC */ if (priv->ctrlmode_supported & CAN_CTRLMODE_TDC_MANUAL) { size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCV_MIN */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCV_MAX */ } size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCO_MIN */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCO_MAX */ if (priv->tdc_const->tdcf_max) { size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCF_MIN */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCF_MAX */ } if (can_tdc_is_enabled(priv)) { if (priv->ctrlmode & CAN_CTRLMODE_TDC_MANUAL || priv->do_get_auto_tdcv) size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCV */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCO */ if (priv->tdc_const->tdcf_max) size += nla_total_size(sizeof(u32)); /* IFLA_CAN_TDCF */ } return size; } static size_t can_ctrlmode_ext_get_size(void) { return nla_total_size(0) + /* nest IFLA_CAN_CTRLMODE_EXT */ nla_total_size(sizeof(u32)); /* IFLA_CAN_CTRLMODE_SUPPORTED */ } static size_t can_get_size(const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); size_t size = 0; if (priv->bittiming.bitrate) /* IFLA_CAN_BITTIMING */ size += nla_total_size(sizeof(struct can_bittiming)); if (priv->bittiming_const) /* IFLA_CAN_BITTIMING_CONST */ size += nla_total_size(sizeof(struct can_bittiming_const)); size += nla_total_size(sizeof(struct can_clock)); /* IFLA_CAN_CLOCK */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_STATE */ size += nla_total_size(sizeof(struct can_ctrlmode)); /* IFLA_CAN_CTRLMODE */ size += nla_total_size(sizeof(u32)); /* IFLA_CAN_RESTART_MS */ if (priv->do_get_berr_counter) /* IFLA_CAN_BERR_COUNTER */ size += nla_total_size(sizeof(struct can_berr_counter)); if (priv->data_bittiming.bitrate) /* IFLA_CAN_DATA_BITTIMING */ size += nla_total_size(sizeof(struct can_bittiming)); if (priv->data_bittiming_const) /* IFLA_CAN_DATA_BITTIMING_CONST */ size += nla_total_size(sizeof(struct can_bittiming_const)); if (priv->termination_const) { size += nla_total_size(sizeof(priv->termination)); /* IFLA_CAN_TERMINATION */ size += nla_total_size(sizeof(*priv->termination_const) * /* IFLA_CAN_TERMINATION_CONST */ priv->termination_const_cnt); } if (priv->bitrate_const) /* IFLA_CAN_BITRATE_CONST */ size += nla_total_size(sizeof(*priv->bitrate_const) * priv->bitrate_const_cnt); if (priv->data_bitrate_const) /* IFLA_CAN_DATA_BITRATE_CONST */ size += nla_total_size(sizeof(*priv->data_bitrate_const) * priv->data_bitrate_const_cnt); size += sizeof(priv->bitrate_max); /* IFLA_CAN_BITRATE_MAX */ size += can_tdc_get_size(dev); /* IFLA_CAN_TDC */ size += can_ctrlmode_ext_get_size(); /* IFLA_CAN_CTRLMODE_EXT */ return size; } static int can_tdc_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct nlattr *nest; struct can_priv *priv = netdev_priv(dev); struct can_tdc *tdc = &priv->tdc; const struct can_tdc_const *tdc_const = priv->tdc_const; if (!tdc_const) return 0; nest = nla_nest_start(skb, IFLA_CAN_TDC); if (!nest) return -EMSGSIZE; if (priv->ctrlmode_supported & CAN_CTRLMODE_TDC_MANUAL && (nla_put_u32(skb, IFLA_CAN_TDC_TDCV_MIN, tdc_const->tdcv_min) || nla_put_u32(skb, IFLA_CAN_TDC_TDCV_MAX, tdc_const->tdcv_max))) goto err_cancel; if (nla_put_u32(skb, IFLA_CAN_TDC_TDCO_MIN, tdc_const->tdco_min) || nla_put_u32(skb, IFLA_CAN_TDC_TDCO_MAX, tdc_const->tdco_max)) goto err_cancel; if (tdc_const->tdcf_max && (nla_put_u32(skb, IFLA_CAN_TDC_TDCF_MIN, tdc_const->tdcf_min) || nla_put_u32(skb, IFLA_CAN_TDC_TDCF_MAX, tdc_const->tdcf_max))) goto err_cancel; if (can_tdc_is_enabled(priv)) { u32 tdcv; int err = -EINVAL; if (priv->ctrlmode & CAN_CTRLMODE_TDC_MANUAL) { tdcv = tdc->tdcv; err = 0; } else if (priv->do_get_auto_tdcv) { err = priv->do_get_auto_tdcv(dev, &tdcv); } if (!err && nla_put_u32(skb, IFLA_CAN_TDC_TDCV, tdcv)) goto err_cancel; if (nla_put_u32(skb, IFLA_CAN_TDC_TDCO, tdc->tdco)) goto err_cancel; if (tdc_const->tdcf_max && nla_put_u32(skb, IFLA_CAN_TDC_TDCF, tdc->tdcf)) goto err_cancel; } nla_nest_end(skb, nest); return 0; err_cancel: nla_nest_cancel(skb, nest); return -EMSGSIZE; } static int can_ctrlmode_ext_fill_info(struct sk_buff *skb, const struct can_priv *priv) { struct nlattr *nest; nest = nla_nest_start(skb, IFLA_CAN_CTRLMODE_EXT); if (!nest) return -EMSGSIZE; if (nla_put_u32(skb, IFLA_CAN_CTRLMODE_SUPPORTED, priv->ctrlmode_supported)) { nla_nest_cancel(skb, nest); return -EMSGSIZE; } nla_nest_end(skb, nest); return 0; } static int can_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); struct can_ctrlmode cm = {.flags = priv->ctrlmode}; struct can_berr_counter bec = { }; enum can_state state = priv->state; if (priv->do_get_state) priv->do_get_state(dev, &state); if ((priv->bittiming.bitrate != CAN_BITRATE_UNSET && priv->bittiming.bitrate != CAN_BITRATE_UNKNOWN && nla_put(skb, IFLA_CAN_BITTIMING, sizeof(priv->bittiming), &priv->bittiming)) || (priv->bittiming_const && nla_put(skb, IFLA_CAN_BITTIMING_CONST, sizeof(*priv->bittiming_const), priv->bittiming_const)) || nla_put(skb, IFLA_CAN_CLOCK, sizeof(priv->clock), &priv->clock) || nla_put_u32(skb, IFLA_CAN_STATE, state) || nla_put(skb, IFLA_CAN_CTRLMODE, sizeof(cm), &cm) || nla_put_u32(skb, IFLA_CAN_RESTART_MS, priv->restart_ms) || (priv->do_get_berr_counter && !priv->do_get_berr_counter(dev, &bec) && nla_put(skb, IFLA_CAN_BERR_COUNTER, sizeof(bec), &bec)) || (priv->data_bittiming.bitrate && nla_put(skb, IFLA_CAN_DATA_BITTIMING, sizeof(priv->data_bittiming), &priv->data_bittiming)) || (priv->data_bittiming_const && nla_put(skb, IFLA_CAN_DATA_BITTIMING_CONST, sizeof(*priv->data_bittiming_const), priv->data_bittiming_const)) || (priv->termination_const && (nla_put_u16(skb, IFLA_CAN_TERMINATION, priv->termination) || nla_put(skb, IFLA_CAN_TERMINATION_CONST, sizeof(*priv->termination_const) * priv->termination_const_cnt, priv->termination_const))) || (priv->bitrate_const && nla_put(skb, IFLA_CAN_BITRATE_CONST, sizeof(*priv->bitrate_const) * priv->bitrate_const_cnt, priv->bitrate_const)) || (priv->data_bitrate_const && nla_put(skb, IFLA_CAN_DATA_BITRATE_CONST, sizeof(*priv->data_bitrate_const) * priv->data_bitrate_const_cnt, priv->data_bitrate_const)) || (nla_put(skb, IFLA_CAN_BITRATE_MAX, sizeof(priv->bitrate_max), &priv->bitrate_max)) || can_tdc_fill_info(skb, dev) || can_ctrlmode_ext_fill_info(skb, priv) ) return -EMSGSIZE; return 0; } static size_t can_get_xstats_size(const struct net_device *dev) { return sizeof(struct can_device_stats); } static int can_fill_xstats(struct sk_buff *skb, const struct net_device *dev) { struct can_priv *priv = netdev_priv(dev); if (nla_put(skb, IFLA_INFO_XSTATS, sizeof(priv->can_stats), &priv->can_stats)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static int can_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { return -EOPNOTSUPP; } static void can_dellink(struct net_device *dev, struct list_head *head) { } struct rtnl_link_ops can_link_ops __read_mostly = { .kind = "can", .netns_refund = true, .maxtype = IFLA_CAN_MAX, .policy = can_policy, .setup = can_setup, .validate = can_validate, .newlink = can_newlink, .changelink = can_changelink, .dellink = can_dellink, .get_size = can_get_size, .fill_info = can_fill_info, .get_xstats_size = can_get_xstats_size, .fill_xstats = can_fill_xstats, }; int can_netlink_register(void) { return rtnl_link_register(&can_link_ops); } void can_netlink_unregister(void) { rtnl_link_unregister(&can_link_ops); } |
| 1 40 40 37 1 1 15 40 5 40 23 21 19 19 5 6 7 5 19 28 29 23 29 8 19 19 4 14 7 16 2 14 16 16 18 18 18 18 19 18 1 1 1 1 1 5 2 1 2 1 2 2 12 12 8 2 2 2 8 1 1 1 1 4 1 2 1 1 1 2 1 1 22 22 22 1 1 2 1 1 1 1 12 12 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich, Antonio Quartulli */ #include "translation-table.h" #include "main.h" #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/build_bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/compiler.h> #include <linux/container_of.h> #include <linux/crc32c.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jhash.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> #include <net/genetlink.h> #include <net/netlink.h> #include <net/sock.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "netlink.h" #include "originator.h" #include "soft-interface.h" #include "tvlv.h" static struct kmem_cache *batadv_tl_cache __read_mostly; static struct kmem_cache *batadv_tg_cache __read_mostly; static struct kmem_cache *batadv_tt_orig_cache __read_mostly; static struct kmem_cache *batadv_tt_change_cache __read_mostly; static struct kmem_cache *batadv_tt_req_cache __read_mostly; static struct kmem_cache *batadv_tt_roam_cache __read_mostly; /* hash class keys */ static struct lock_class_key batadv_tt_local_hash_lock_class_key; static struct lock_class_key batadv_tt_global_hash_lock_class_key; static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client, unsigned short vid, struct batadv_orig_node *orig_node); static void batadv_tt_purge(struct work_struct *work); static void batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry); static void batadv_tt_global_del(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *addr, unsigned short vid, const char *message, bool roaming); /** * batadv_compare_tt() - check if two TT entries are the same * @node: the list element pointer of the first TT entry * @data2: pointer to the tt_common_entry of the second TT entry * * Compare the MAC address and the VLAN ID of the two TT entries and check if * they are the same TT client. * Return: true if the two TT clients are the same, false otherwise */ static bool batadv_compare_tt(const struct hlist_node *node, const void *data2) { const void *data1 = container_of(node, struct batadv_tt_common_entry, hash_entry); const struct batadv_tt_common_entry *tt1 = data1; const struct batadv_tt_common_entry *tt2 = data2; return (tt1->vid == tt2->vid) && batadv_compare_eth(data1, data2); } /** * batadv_choose_tt() - return the index of the tt entry in the hash table * @data: pointer to the tt_common_entry object to map * @size: the size of the hash table * * Return: the hash index where the object represented by 'data' should be * stored at. */ static inline u32 batadv_choose_tt(const void *data, u32 size) { const struct batadv_tt_common_entry *tt; u32 hash = 0; tt = data; hash = jhash(&tt->addr, ETH_ALEN, hash); hash = jhash(&tt->vid, sizeof(tt->vid), hash); return hash % size; } /** * batadv_tt_hash_find() - look for a client in the given hash table * @hash: the hash table to search * @addr: the mac address of the client to look for * @vid: VLAN identifier * * Return: a pointer to the tt_common struct belonging to the searched client if * found, NULL otherwise. */ static struct batadv_tt_common_entry * batadv_tt_hash_find(struct batadv_hashtable *hash, const u8 *addr, unsigned short vid) { struct hlist_head *head; struct batadv_tt_common_entry to_search, *tt, *tt_tmp = NULL; u32 index; if (!hash) return NULL; ether_addr_copy(to_search.addr, addr); to_search.vid = vid; index = batadv_choose_tt(&to_search, hash->size); head = &hash->table[index]; rcu_read_lock(); hlist_for_each_entry_rcu(tt, head, hash_entry) { if (!batadv_compare_eth(tt, addr)) continue; if (tt->vid != vid) continue; if (!kref_get_unless_zero(&tt->refcount)) continue; tt_tmp = tt; break; } rcu_read_unlock(); return tt_tmp; } /** * batadv_tt_local_hash_find() - search the local table for a given client * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to look for * @vid: VLAN identifier * * Return: a pointer to the corresponding tt_local_entry struct if the client is * found, NULL otherwise. */ static struct batadv_tt_local_entry * batadv_tt_local_hash_find(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local_entry = NULL; tt_common_entry = batadv_tt_hash_find(bat_priv->tt.local_hash, addr, vid); if (tt_common_entry) tt_local_entry = container_of(tt_common_entry, struct batadv_tt_local_entry, common); return tt_local_entry; } /** * batadv_tt_global_hash_find() - search the global table for a given client * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to look for * @vid: VLAN identifier * * Return: a pointer to the corresponding tt_global_entry struct if the client * is found, NULL otherwise. */ struct batadv_tt_global_entry * batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global_entry = NULL; tt_common_entry = batadv_tt_hash_find(bat_priv->tt.global_hash, addr, vid); if (tt_common_entry) tt_global_entry = container_of(tt_common_entry, struct batadv_tt_global_entry, common); return tt_global_entry; } /** * batadv_tt_local_entry_free_rcu() - free the tt_local_entry * @rcu: rcu pointer of the tt_local_entry */ static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) { struct batadv_tt_local_entry *tt_local_entry; tt_local_entry = container_of(rcu, struct batadv_tt_local_entry, common.rcu); kmem_cache_free(batadv_tl_cache, tt_local_entry); } /** * batadv_tt_local_entry_release() - release tt_local_entry from lists and queue * for free after rcu grace period * @ref: kref pointer of the nc_node */ static void batadv_tt_local_entry_release(struct kref *ref) { struct batadv_tt_local_entry *tt_local_entry; tt_local_entry = container_of(ref, struct batadv_tt_local_entry, common.refcount); batadv_softif_vlan_put(tt_local_entry->vlan); call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu); } /** * batadv_tt_local_entry_put() - decrement the tt_local_entry refcounter and * possibly release it * @tt_local_entry: tt_local_entry to be free'd */ static void batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry) { if (!tt_local_entry) return; kref_put(&tt_local_entry->common.refcount, batadv_tt_local_entry_release); } /** * batadv_tt_global_entry_free_rcu() - free the tt_global_entry * @rcu: rcu pointer of the tt_global_entry */ static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) { struct batadv_tt_global_entry *tt_global_entry; tt_global_entry = container_of(rcu, struct batadv_tt_global_entry, common.rcu); kmem_cache_free(batadv_tg_cache, tt_global_entry); } /** * batadv_tt_global_entry_release() - release tt_global_entry from lists and * queue for free after rcu grace period * @ref: kref pointer of the nc_node */ void batadv_tt_global_entry_release(struct kref *ref) { struct batadv_tt_global_entry *tt_global_entry; tt_global_entry = container_of(ref, struct batadv_tt_global_entry, common.refcount); batadv_tt_global_del_orig_list(tt_global_entry); call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu); } /** * batadv_tt_global_hash_count() - count the number of orig entries * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to count entries for * @vid: VLAN identifier * * Return: the number of originators advertising the given address/data * (excluding our self). */ int batadv_tt_global_hash_count(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { struct batadv_tt_global_entry *tt_global_entry; int count; tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid); if (!tt_global_entry) return 0; count = atomic_read(&tt_global_entry->orig_list_count); batadv_tt_global_entry_put(tt_global_entry); return count; } /** * batadv_tt_local_size_mod() - change the size by v of the local table * identified by vid * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier of the sub-table to change * @v: the amount to sum to the local table size */ static void batadv_tt_local_size_mod(struct batadv_priv *bat_priv, unsigned short vid, int v) { struct batadv_softif_vlan *vlan; vlan = batadv_softif_vlan_get(bat_priv, vid); if (!vlan) return; atomic_add(v, &vlan->tt.num_entries); batadv_softif_vlan_put(vlan); } /** * batadv_tt_local_size_inc() - increase by one the local table size for the * given vid * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier */ static void batadv_tt_local_size_inc(struct batadv_priv *bat_priv, unsigned short vid) { batadv_tt_local_size_mod(bat_priv, vid, 1); } /** * batadv_tt_local_size_dec() - decrease by one the local table size for the * given vid * @bat_priv: the bat priv with all the soft interface information * @vid: the VLAN identifier */ static void batadv_tt_local_size_dec(struct batadv_priv *bat_priv, unsigned short vid) { batadv_tt_local_size_mod(bat_priv, vid, -1); } /** * batadv_tt_global_size_mod() - change the size by v of the global table * for orig_node identified by vid * @orig_node: the originator for which the table has to be modified * @vid: the VLAN identifier * @v: the amount to sum to the global table size */ static void batadv_tt_global_size_mod(struct batadv_orig_node *orig_node, unsigned short vid, int v) { struct batadv_orig_node_vlan *vlan; vlan = batadv_orig_node_vlan_new(orig_node, vid); if (!vlan) return; if (atomic_add_return(v, &vlan->tt.num_entries) == 0) { spin_lock_bh(&orig_node->vlan_list_lock); if (!hlist_unhashed(&vlan->list)) { hlist_del_init_rcu(&vlan->list); batadv_orig_node_vlan_put(vlan); } spin_unlock_bh(&orig_node->vlan_list_lock); } batadv_orig_node_vlan_put(vlan); } /** * batadv_tt_global_size_inc() - increase by one the global table size for the * given vid * @orig_node: the originator which global table size has to be decreased * @vid: the vlan identifier */ static void batadv_tt_global_size_inc(struct batadv_orig_node *orig_node, unsigned short vid) { batadv_tt_global_size_mod(orig_node, vid, 1); } /** * batadv_tt_global_size_dec() - decrease by one the global table size for the * given vid * @orig_node: the originator which global table size has to be decreased * @vid: the vlan identifier */ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, unsigned short vid) { batadv_tt_global_size_mod(orig_node, vid, -1); } /** * batadv_tt_orig_list_entry_free_rcu() - free the orig_entry * @rcu: rcu pointer of the orig_entry */ static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) { struct batadv_tt_orig_list_entry *orig_entry; orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); kmem_cache_free(batadv_tt_orig_cache, orig_entry); } /** * batadv_tt_orig_list_entry_release() - release tt orig entry from lists and * queue for free after rcu grace period * @ref: kref pointer of the tt orig entry */ static void batadv_tt_orig_list_entry_release(struct kref *ref) { struct batadv_tt_orig_list_entry *orig_entry; orig_entry = container_of(ref, struct batadv_tt_orig_list_entry, refcount); batadv_orig_node_put(orig_entry->orig_node); call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } /** * batadv_tt_orig_list_entry_put() - decrement the tt orig entry refcounter and * possibly release it * @orig_entry: tt orig entry to be free'd */ static void batadv_tt_orig_list_entry_put(struct batadv_tt_orig_list_entry *orig_entry) { if (!orig_entry) return; kref_put(&orig_entry->refcount, batadv_tt_orig_list_entry_release); } /** * batadv_tt_local_event() - store a local TT event (ADD/DEL) * @bat_priv: the bat priv with all the soft interface information * @tt_local_entry: the TT entry involved in the event * @event_flags: flags to store in the event structure */ static void batadv_tt_local_event(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, u8 event_flags) { struct batadv_tt_change_node *tt_change_node, *entry, *safe; struct batadv_tt_common_entry *common = &tt_local_entry->common; u8 flags = common->flags | event_flags; bool event_removed = false; bool del_op_requested, del_op_entry; tt_change_node = kmem_cache_alloc(batadv_tt_change_cache, GFP_ATOMIC); if (!tt_change_node) return; tt_change_node->change.flags = flags; memset(tt_change_node->change.reserved, 0, sizeof(tt_change_node->change.reserved)); ether_addr_copy(tt_change_node->change.addr, common->addr); tt_change_node->change.vid = htons(common->vid); del_op_requested = flags & BATADV_TT_CLIENT_DEL; /* check for ADD+DEL or DEL+ADD events */ spin_lock_bh(&bat_priv->tt.changes_list_lock); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { if (!batadv_compare_eth(entry->change.addr, common->addr)) continue; /* DEL+ADD in the same orig interval have no effect and can be * removed to avoid silly behaviour on the receiver side. The * other way around (ADD+DEL) can happen in case of roaming of * a client still in the NEW state. Roaming of NEW clients is * now possible due to automatically recognition of "temporary" * clients */ del_op_entry = entry->change.flags & BATADV_TT_CLIENT_DEL; if (!del_op_requested && del_op_entry) goto del; if (del_op_requested && !del_op_entry) goto del; /* this is a second add in the same originator interval. It * means that flags have been changed: update them! */ if (!del_op_requested && !del_op_entry) entry->change.flags = flags; continue; del: list_del(&entry->list); kmem_cache_free(batadv_tt_change_cache, entry); kmem_cache_free(batadv_tt_change_cache, tt_change_node); event_removed = true; goto unlock; } /* track the change in the OGMinterval list */ list_add_tail(&tt_change_node->list, &bat_priv->tt.changes_list); unlock: spin_unlock_bh(&bat_priv->tt.changes_list_lock); if (event_removed) atomic_dec(&bat_priv->tt.local_changes); else atomic_inc(&bat_priv->tt.local_changes); } /** * batadv_tt_len() - compute length in bytes of given number of tt changes * @changes_num: number of tt changes * * Return: computed length in bytes. */ static int batadv_tt_len(int changes_num) { return changes_num * sizeof(struct batadv_tvlv_tt_change); } /** * batadv_tt_entries() - compute the number of entries fitting in tt_len bytes * @tt_len: available space * * Return: the number of entries. */ static u16 batadv_tt_entries(u16 tt_len) { return tt_len / batadv_tt_len(1); } /** * batadv_tt_local_table_transmit_size() - calculates the local translation * table size when transmitted over the air * @bat_priv: the bat priv with all the soft interface information * * Return: local translation table size in bytes. */ static int batadv_tt_local_table_transmit_size(struct batadv_priv *bat_priv) { u16 num_vlan = 0; u16 tt_local_entries = 0; struct batadv_softif_vlan *vlan; int hdr_size; rcu_read_lock(); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { num_vlan++; tt_local_entries += atomic_read(&vlan->tt.num_entries); } rcu_read_unlock(); /* header size of tvlv encapsulated tt response payload */ hdr_size = sizeof(struct batadv_unicast_tvlv_packet); hdr_size += sizeof(struct batadv_tvlv_hdr); hdr_size += sizeof(struct batadv_tvlv_tt_data); hdr_size += num_vlan * sizeof(struct batadv_tvlv_tt_vlan_data); return hdr_size + batadv_tt_len(tt_local_entries); } static int batadv_tt_local_init(struct batadv_priv *bat_priv) { if (bat_priv->tt.local_hash) return 0; bat_priv->tt.local_hash = batadv_hash_new(1024); if (!bat_priv->tt.local_hash) return -ENOMEM; batadv_hash_set_lock_class(bat_priv->tt.local_hash, &batadv_tt_local_hash_lock_class_key); return 0; } static void batadv_tt_global_free(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global, const char *message) { struct batadv_tt_global_entry *tt_removed_entry; struct hlist_node *tt_removed_node; batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, batadv_print_vid(tt_global->common.vid), message); tt_removed_node = batadv_hash_remove(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_tt, &tt_global->common); if (!tt_removed_node) return; /* drop reference of remove hash entry */ tt_removed_entry = hlist_entry(tt_removed_node, struct batadv_tt_global_entry, common.hash_entry); batadv_tt_global_entry_put(tt_removed_entry); } /** * batadv_tt_local_add() - add a new client to the local table or update an * existing client * @soft_iface: netdev struct of the mesh interface * @addr: the mac address of the client to add * @vid: VLAN identifier * @ifindex: index of the interface where the client is connected to (useful to * identify wireless clients) * @mark: the value contained in the skb->mark field of the received packet (if * any) * * Return: true if the client was successfully added, false otherwise. */ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, unsigned short vid, int ifindex, u32 mark) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_tt_local_entry *tt_local; struct batadv_tt_global_entry *tt_global = NULL; struct net *net = dev_net(soft_iface); struct batadv_softif_vlan *vlan; struct net_device *in_dev = NULL; struct batadv_hard_iface *in_hardif = NULL; struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry; int hash_added, table_size, packet_size_max; bool ret = false; bool roamed_back = false; u8 remote_flags; u32 match_mark; if (ifindex != BATADV_NULL_IFINDEX) in_dev = dev_get_by_index(net, ifindex); if (in_dev) in_hardif = batadv_hardif_get_by_netdev(in_dev); tt_local = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!is_multicast_ether_addr(addr)) tt_global = batadv_tt_global_hash_find(bat_priv, addr, vid); if (tt_local) { tt_local->last_seen = jiffies; if (tt_local->common.flags & BATADV_TT_CLIENT_PENDING) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Re-adding pending client %pM (vid: %d)\n", addr, batadv_print_vid(vid)); /* whatever the reason why the PENDING flag was set, * this is a client which was enqueued to be removed in * this orig_interval. Since it popped up again, the * flag can be reset like it was never enqueued */ tt_local->common.flags &= ~BATADV_TT_CLIENT_PENDING; goto add_event; } if (tt_local->common.flags & BATADV_TT_CLIENT_ROAM) { batadv_dbg(BATADV_DBG_TT, bat_priv, "Roaming client %pM (vid: %d) came back to its original location\n", addr, batadv_print_vid(vid)); /* the ROAM flag is set because this client roamed away * and the node got a roaming_advertisement message. Now * that the client popped up again at its original * location such flag can be unset */ tt_local->common.flags &= ~BATADV_TT_CLIENT_ROAM; roamed_back = true; } goto check_roaming; } /* Ignore the client if we cannot send it in a full table response. */ table_size = batadv_tt_local_table_transmit_size(bat_priv); table_size += batadv_tt_len(1); packet_size_max = atomic_read(&bat_priv->packet_size_max); if (table_size > packet_size_max) { net_ratelimited_function(batadv_info, soft_iface, "Local translation table size (%i) exceeds maximum packet size (%i); Ignoring new local tt entry: %pM\n", table_size, packet_size_max, addr); goto out; } tt_local = kmem_cache_alloc(batadv_tl_cache, GFP_ATOMIC); if (!tt_local) goto out; /* increase the refcounter of the related vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); if (!vlan) { net_ratelimited_function(batadv_info, soft_iface, "adding TT local entry %pM to non-existent VLAN %d\n", addr, batadv_print_vid(vid)); kmem_cache_free(batadv_tl_cache, tt_local); tt_local = NULL; goto out; } batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new local tt entry: %pM (vid: %d, ttvn: %d)\n", addr, batadv_print_vid(vid), (u8)atomic_read(&bat_priv->tt.vn)); ether_addr_copy(tt_local->common.addr, addr); /* The local entry has to be marked as NEW to avoid to send it in * a full table response going out before the next ttvn increment * (consistency check) */ tt_local->common.flags = BATADV_TT_CLIENT_NEW; tt_local->common.vid = vid; if (batadv_is_wifi_hardif(in_hardif)) tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; kref_init(&tt_local->common.refcount); tt_local->last_seen = jiffies; tt_local->common.added_at = tt_local->last_seen; tt_local->vlan = vlan; /* the batman interface mac and multicast addresses should never be * purged */ if (batadv_compare_eth(addr, soft_iface->dev_addr) || is_multicast_ether_addr(addr)) tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE; kref_get(&tt_local->common.refcount); hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_tt, &tt_local->common, &tt_local->common.hash_entry); if (unlikely(hash_added != 0)) { /* remove the reference for the hash */ batadv_tt_local_entry_put(tt_local); goto out; } add_event: batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS); check_roaming: /* Check whether it is a roaming, but don't do anything if the roaming * process has already been handled */ if (tt_global && !(tt_global->common.flags & BATADV_TT_CLIENT_ROAM)) { /* These node are probably going to update their tt table */ head = &tt_global->orig_list; rcu_read_lock(); hlist_for_each_entry_rcu(orig_entry, head, list) { batadv_send_roam_adv(bat_priv, tt_global->common.addr, tt_global->common.vid, orig_entry->orig_node); } rcu_read_unlock(); if (roamed_back) { batadv_tt_global_free(bat_priv, tt_global, "Roaming canceled"); } else { /* The global entry has to be marked as ROAMING and * has to be kept for consistency purpose */ tt_global->common.flags |= BATADV_TT_CLIENT_ROAM; tt_global->roam_at = jiffies; } } /* store the current remote flags before altering them. This helps * understanding is flags are changing or not */ remote_flags = tt_local->common.flags & BATADV_TT_REMOTE_MASK; if (batadv_is_wifi_hardif(in_hardif)) tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; else tt_local->common.flags &= ~BATADV_TT_CLIENT_WIFI; /* check the mark in the skb: if it's equal to the configured * isolation_mark, it means the packet is coming from an isolated * non-mesh client */ match_mark = (mark & bat_priv->isolation_mark_mask); if (bat_priv->isolation_mark_mask && match_mark == bat_priv->isolation_mark) tt_local->common.flags |= BATADV_TT_CLIENT_ISOLA; else tt_local->common.flags &= ~BATADV_TT_CLIENT_ISOLA; /* if any "dynamic" flag has been modified, resend an ADD event for this * entry so that all the nodes can get the new flags */ if (remote_flags ^ (tt_local->common.flags & BATADV_TT_REMOTE_MASK)) batadv_tt_local_event(bat_priv, tt_local, BATADV_NO_FLAGS); ret = true; out: batadv_hardif_put(in_hardif); dev_put(in_dev); batadv_tt_local_entry_put(tt_local); batadv_tt_global_entry_put(tt_global); return ret; } /** * batadv_tt_prepare_tvlv_global_data() - prepare the TVLV TT header to send * within a TT Response directed to another node * @orig_node: originator for which the TT data has to be prepared * @tt_data: uninitialised pointer to the address of the TVLV buffer * @tt_change: uninitialised pointer to the address of the area where the TT * changed can be stored * @tt_len: pointer to the length to reserve to the tt_change. if -1 this * function reserves the amount of space needed to send the entire global TT * table. In case of success the value is updated with the real amount of * reserved bytes * Allocate the needed amount of memory for the entire TT TVLV and write its * header made up of one tvlv_tt_data object and a series of tvlv_tt_vlan_data * objects, one per active VLAN served by the originator node. * * Return: the size of the allocated buffer or 0 in case of failure. */ static u16 batadv_tt_prepare_tvlv_global_data(struct batadv_orig_node *orig_node, struct batadv_tvlv_tt_data **tt_data, struct batadv_tvlv_tt_change **tt_change, s32 *tt_len) { u16 num_vlan = 0; u16 num_entries = 0; u16 change_offset; u16 tvlv_len; struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_orig_node_vlan *vlan; u8 *tt_change_ptr; spin_lock_bh(&orig_node->vlan_list_lock); hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { num_vlan++; num_entries += atomic_read(&vlan->tt.num_entries); } change_offset = sizeof(**tt_data); change_offset += num_vlan * sizeof(*tt_vlan); /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) *tt_len = batadv_tt_len(num_entries); tvlv_len = *tt_len; tvlv_len += change_offset; *tt_data = kmalloc(tvlv_len, GFP_ATOMIC); if (!*tt_data) { *tt_len = 0; goto out; } (*tt_data)->flags = BATADV_NO_FLAGS; (*tt_data)->ttvn = atomic_read(&orig_node->last_ttvn); (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); hlist_for_each_entry(vlan, &orig_node->vlan_list, list) { tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); tt_vlan->reserved = 0; tt_vlan++; } tt_change_ptr = (u8 *)*tt_data + change_offset; *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: spin_unlock_bh(&orig_node->vlan_list_lock); return tvlv_len; } /** * batadv_tt_prepare_tvlv_local_data() - allocate and prepare the TT TVLV for * this node * @bat_priv: the bat priv with all the soft interface information * @tt_data: uninitialised pointer to the address of the TVLV buffer * @tt_change: uninitialised pointer to the address of the area where the TT * changes can be stored * @tt_len: pointer to the length to reserve to the tt_change. if -1 this * function reserves the amount of space needed to send the entire local TT * table. In case of success the value is updated with the real amount of * reserved bytes * * Allocate the needed amount of memory for the entire TT TVLV and write its * header made up by one tvlv_tt_data object and a series of tvlv_tt_vlan_data * objects, one per active VLAN. * * Return: the size of the allocated buffer or 0 in case of failure. */ static u16 batadv_tt_prepare_tvlv_local_data(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data **tt_data, struct batadv_tvlv_tt_change **tt_change, s32 *tt_len) { struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_softif_vlan *vlan; u16 num_vlan = 0; u16 vlan_entries = 0; u16 total_entries = 0; u16 tvlv_len; u8 *tt_change_ptr; int change_offset; spin_lock_bh(&bat_priv->softif_vlan_list_lock); hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) continue; num_vlan++; total_entries += vlan_entries; } change_offset = sizeof(**tt_data); change_offset += num_vlan * sizeof(*tt_vlan); /* if tt_len is negative, allocate the space needed by the full table */ if (*tt_len < 0) *tt_len = batadv_tt_len(total_entries); tvlv_len = *tt_len; tvlv_len += change_offset; *tt_data = kmalloc(tvlv_len, GFP_ATOMIC); if (!*tt_data) { tvlv_len = 0; goto out; } (*tt_data)->flags = BATADV_NO_FLAGS; (*tt_data)->ttvn = atomic_read(&bat_priv->tt.vn); (*tt_data)->num_vlan = htons(num_vlan); tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(*tt_data + 1); hlist_for_each_entry(vlan, &bat_priv->softif_vlan_list, list) { vlan_entries = atomic_read(&vlan->tt.num_entries); if (vlan_entries < 1) continue; tt_vlan->vid = htons(vlan->vid); tt_vlan->crc = htonl(vlan->tt.crc); tt_vlan->reserved = 0; tt_vlan++; } tt_change_ptr = (u8 *)*tt_data + change_offset; *tt_change = (struct batadv_tvlv_tt_change *)tt_change_ptr; out: spin_unlock_bh(&bat_priv->softif_vlan_list_lock); return tvlv_len; } /** * batadv_tt_tvlv_container_update() - update the translation table tvlv * container after local tt changes have been committed * @bat_priv: the bat priv with all the soft interface information */ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) { struct batadv_tt_change_node *entry, *safe; struct batadv_tvlv_tt_data *tt_data; struct batadv_tvlv_tt_change *tt_change; int tt_diff_len, tt_change_len = 0; int tt_diff_entries_num = 0; int tt_diff_entries_count = 0; u16 tvlv_len; tt_diff_entries_num = atomic_read(&bat_priv->tt.local_changes); tt_diff_len = batadv_tt_len(tt_diff_entries_num); /* if we have too many changes for one packet don't send any * and wait for the tt table request which will be fragmented */ if (tt_diff_len > bat_priv->soft_iface->mtu) tt_diff_len = 0; tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv, &tt_data, &tt_change, &tt_diff_len); if (!tvlv_len) return; tt_data->flags = BATADV_TT_OGM_DIFF; if (tt_diff_len == 0) goto container_register; spin_lock_bh(&bat_priv->tt.changes_list_lock); atomic_set(&bat_priv->tt.local_changes, 0); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { if (tt_diff_entries_count < tt_diff_entries_num) { memcpy(tt_change + tt_diff_entries_count, &entry->change, sizeof(struct batadv_tvlv_tt_change)); tt_diff_entries_count++; } list_del(&entry->list); kmem_cache_free(batadv_tt_change_cache, entry); } spin_unlock_bh(&bat_priv->tt.changes_list_lock); /* Keep the buffer for possible tt_request */ spin_lock_bh(&bat_priv->tt.last_changeset_lock); kfree(bat_priv->tt.last_changeset); bat_priv->tt.last_changeset_len = 0; bat_priv->tt.last_changeset = NULL; tt_change_len = batadv_tt_len(tt_diff_entries_count); /* check whether this new OGM has no changes due to size problems */ if (tt_diff_entries_count > 0) { /* if kmalloc() fails we will reply with the full table * instead of providing the diff */ bat_priv->tt.last_changeset = kzalloc(tt_diff_len, GFP_ATOMIC); if (bat_priv->tt.last_changeset) { memcpy(bat_priv->tt.last_changeset, tt_change, tt_change_len); bat_priv->tt.last_changeset_len = tt_diff_len; } } spin_unlock_bh(&bat_priv->tt.last_changeset_lock); container_register: batadv_tvlv_container_register(bat_priv, BATADV_TVLV_TT, 1, tt_data, tvlv_len); kfree(tt_data); } /** * batadv_tt_local_dump_entry() - Dump one TT local entry into a message * @msg :Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information * @common: tt local & tt global common data * * Return: Error code, or 0 on success */ static int batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_tt_common_entry *common) { void *hdr; struct batadv_softif_vlan *vlan; struct batadv_tt_local_entry *local; unsigned int last_seen_msecs; u32 crc; local = container_of(common, struct batadv_tt_local_entry, common); last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen); vlan = batadv_softif_vlan_get(bat_priv, common->vid); if (!vlan) return 0; crc = vlan->tt.crc; batadv_softif_vlan_put(vlan); hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_TRANSTABLE_LOCAL); if (!hdr) return -ENOBUFS; genl_dump_check_consistent(cb, hdr); if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) || nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) || nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) || nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags)) goto nla_put_failure; if (!(common->flags & BATADV_TT_CLIENT_NOPURGE) && nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_tt_local_dump_bucket() - Dump one TT local bucket into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information * @hash: hash to dump * @bucket: bucket index to dump * @idx_s: Number of entries to skip * * Return: Error code, or 0 on success */ static int batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_hashtable *hash, unsigned int bucket, int *idx_s) { struct batadv_tt_common_entry *common; int idx = 0; spin_lock_bh(&hash->list_locks[bucket]); cb->seq = atomic_read(&hash->generation) << 1 | 1; hlist_for_each_entry(common, &hash->table[bucket], hash_entry) { if (idx++ < *idx_s) continue; if (batadv_tt_local_dump_entry(msg, portid, cb, bat_priv, common)) { spin_unlock_bh(&hash->list_locks[bucket]); *idx_s = idx - 1; return -EMSGSIZE; } } spin_unlock_bh(&hash->list_locks[bucket]); *idx_s = 0; return 0; } /** * batadv_tt_local_dump() - Dump TT local entries into a message * @msg: Netlink message to dump into * @cb: Parameters from query * * Return: Error code, or 0 on success */ int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_hashtable *hash; int ret; int ifindex; int bucket = cb->args[0]; int idx = cb->args[1]; int portid = NETLINK_CB(cb->skb).portid; ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); if (!ifindex) return -EINVAL; soft_iface = dev_get_by_index(net, ifindex); if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { ret = -ENODEV; goto out; } bat_priv = netdev_priv(soft_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { ret = -ENOENT; goto out; } hash = bat_priv->tt.local_hash; while (bucket < hash->size) { if (batadv_tt_local_dump_bucket(msg, portid, cb, bat_priv, hash, bucket, &idx)) break; bucket++; } ret = msg->len; out: batadv_hardif_put(primary_if); dev_put(soft_iface); cb->args[0] = bucket; cb->args[1] = idx; return ret; } static void batadv_tt_local_set_pending(struct batadv_priv *bat_priv, struct batadv_tt_local_entry *tt_local_entry, u16 flags, const char *message) { batadv_tt_local_event(bat_priv, tt_local_entry, flags); /* The local client has to be marked as "pending to be removed" but has * to be kept in the table in order to send it in a full table * response issued before the net ttvn increment (consistency check) */ tt_local_entry->common.flags |= BATADV_TT_CLIENT_PENDING; batadv_dbg(BATADV_DBG_TT, bat_priv, "Local tt entry (%pM, vid: %d) pending to be removed: %s\n", tt_local_entry->common.addr, batadv_print_vid(tt_local_entry->common.vid), message); } /** * batadv_tt_local_remove() - logically remove an entry from the local table * @bat_priv: the bat priv with all the soft interface information * @addr: the MAC address of the client to remove * @vid: VLAN identifier * @message: message to append to the log on deletion * @roaming: true if the deletion is due to a roaming event * * Return: the flags assigned to the local entry before being deleted */ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid, const char *message, bool roaming) { struct batadv_tt_local_entry *tt_removed_entry; struct batadv_tt_local_entry *tt_local_entry; u16 flags, curr_flags = BATADV_NO_FLAGS; struct hlist_node *tt_removed_node; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) goto out; curr_flags = tt_local_entry->common.flags; flags = BATADV_TT_CLIENT_DEL; /* if this global entry addition is due to a roaming, the node has to * mark the local entry as "roamed" in order to correctly reroute * packets later */ if (roaming) { flags |= BATADV_TT_CLIENT_ROAM; /* mark the local client as ROAMed */ tt_local_entry->common.flags |= BATADV_TT_CLIENT_ROAM; } if (!(tt_local_entry->common.flags & BATADV_TT_CLIENT_NEW)) { batadv_tt_local_set_pending(bat_priv, tt_local_entry, flags, message); goto out; } /* if this client has been added right now, it is possible to * immediately purge it */ batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL); tt_removed_node = batadv_hash_remove(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_tt, &tt_local_entry->common); if (!tt_removed_node) goto out; /* drop reference of remove hash entry */ tt_removed_entry = hlist_entry(tt_removed_node, struct batadv_tt_local_entry, common.hash_entry); batadv_tt_local_entry_put(tt_removed_entry); out: batadv_tt_local_entry_put(tt_local_entry); return curr_flags; } /** * batadv_tt_local_purge_list() - purge inactive tt local entries * @bat_priv: the bat priv with all the soft interface information * @head: pointer to the list containing the local tt entries * @timeout: parameter deciding whether a given tt local entry is considered * inactive or not */ static void batadv_tt_local_purge_list(struct batadv_priv *bat_priv, struct hlist_head *head, int timeout) { struct batadv_tt_local_entry *tt_local_entry; struct batadv_tt_common_entry *tt_common_entry; struct hlist_node *node_tmp; hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { tt_local_entry = container_of(tt_common_entry, struct batadv_tt_local_entry, common); if (tt_local_entry->common.flags & BATADV_TT_CLIENT_NOPURGE) continue; /* entry already marked for deletion */ if (tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING) continue; if (!batadv_has_timed_out(tt_local_entry->last_seen, timeout)) continue; batadv_tt_local_set_pending(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL, "timed out"); } } /** * batadv_tt_local_purge() - purge inactive tt local entries * @bat_priv: the bat priv with all the soft interface information * @timeout: parameter deciding whether a given tt local entry is considered * inactive or not */ static void batadv_tt_local_purge(struct batadv_priv *bat_priv, int timeout) { struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ u32 i; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); batadv_tt_local_purge_list(bat_priv, head, timeout); spin_unlock_bh(list_lock); } } static void batadv_tt_local_table_free(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash; spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_local_entry *tt_local; struct hlist_node *node_tmp; struct hlist_head *head; u32 i; if (!bat_priv->tt.local_hash) return; hash = bat_priv->tt.local_hash; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { hlist_del_rcu(&tt_common_entry->hash_entry); tt_local = container_of(tt_common_entry, struct batadv_tt_local_entry, common); batadv_tt_local_entry_put(tt_local); } spin_unlock_bh(list_lock); } batadv_hash_destroy(hash); bat_priv->tt.local_hash = NULL; } static int batadv_tt_global_init(struct batadv_priv *bat_priv) { if (bat_priv->tt.global_hash) return 0; bat_priv->tt.global_hash = batadv_hash_new(1024); if (!bat_priv->tt.global_hash) return -ENOMEM; batadv_hash_set_lock_class(bat_priv->tt.global_hash, &batadv_tt_global_hash_lock_class_key); return 0; } static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_change_node *entry, *safe; spin_lock_bh(&bat_priv->tt.changes_list_lock); list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { list_del(&entry->list); kmem_cache_free(batadv_tt_change_cache, entry); } atomic_set(&bat_priv->tt.local_changes, 0); spin_unlock_bh(&bat_priv->tt.changes_list_lock); } /** * batadv_tt_global_orig_entry_find() - find a TT orig_list_entry * @entry: the TT global entry where the orig_list_entry has to be * extracted from * @orig_node: the originator for which the orig_list_entry has to be found * * retrieve the orig_tt_list_entry belonging to orig_node from the * batadv_tt_global_entry list * * Return: it with an increased refcounter, NULL if not found */ static struct batadv_tt_orig_list_entry * batadv_tt_global_orig_entry_find(const struct batadv_tt_global_entry *entry, const struct batadv_orig_node *orig_node) { struct batadv_tt_orig_list_entry *tmp_orig_entry, *orig_entry = NULL; const struct hlist_head *head; rcu_read_lock(); head = &entry->orig_list; hlist_for_each_entry_rcu(tmp_orig_entry, head, list) { if (tmp_orig_entry->orig_node != orig_node) continue; if (!kref_get_unless_zero(&tmp_orig_entry->refcount)) continue; orig_entry = tmp_orig_entry; break; } rcu_read_unlock(); return orig_entry; } /** * batadv_tt_global_entry_has_orig() - check if a TT global entry is also * handled by a given originator * @entry: the TT global entry to check * @orig_node: the originator to search in the list * @flags: a pointer to store TT flags for the given @entry received * from @orig_node * * find out if an orig_node is already in the list of a tt_global_entry. * * Return: true if found, false otherwise */ static bool batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, const struct batadv_orig_node *orig_node, u8 *flags) { struct batadv_tt_orig_list_entry *orig_entry; bool found = false; orig_entry = batadv_tt_global_orig_entry_find(entry, orig_node); if (orig_entry) { found = true; if (flags) *flags = orig_entry->flags; batadv_tt_orig_list_entry_put(orig_entry); } return found; } /** * batadv_tt_global_sync_flags() - update TT sync flags * @tt_global: the TT global entry to update sync flags in * * Updates the sync flag bits in the tt_global flag attribute with a logical * OR of all sync flags from any of its TT orig entries. */ static void batadv_tt_global_sync_flags(struct batadv_tt_global_entry *tt_global) { struct batadv_tt_orig_list_entry *orig_entry; const struct hlist_head *head; u16 flags = BATADV_NO_FLAGS; rcu_read_lock(); head = &tt_global->orig_list; hlist_for_each_entry_rcu(orig_entry, head, list) flags |= orig_entry->flags; rcu_read_unlock(); flags |= tt_global->common.flags & (~BATADV_TT_SYNC_MASK); tt_global->common.flags = flags; } /** * batadv_tt_global_orig_entry_add() - add or update a TT orig entry * @tt_global: the TT global entry to add an orig entry in * @orig_node: the originator to add an orig entry for * @ttvn: translation table version number of this changeset * @flags: TT sync flags */ static void batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, struct batadv_orig_node *orig_node, int ttvn, u8 flags) { struct batadv_tt_orig_list_entry *orig_entry; spin_lock_bh(&tt_global->list_lock); orig_entry = batadv_tt_global_orig_entry_find(tt_global, orig_node); if (orig_entry) { /* refresh the ttvn: the current value could be a bogus one that * was added during a "temporary client detection" */ orig_entry->ttvn = ttvn; orig_entry->flags = flags; goto sync_flags; } orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC); if (!orig_entry) goto out; INIT_HLIST_NODE(&orig_entry->list); kref_get(&orig_node->refcount); batadv_tt_global_size_inc(orig_node, tt_global->common.vid); orig_entry->orig_node = orig_node; orig_entry->ttvn = ttvn; orig_entry->flags = flags; kref_init(&orig_entry->refcount); kref_get(&orig_entry->refcount); hlist_add_head_rcu(&orig_entry->list, &tt_global->orig_list); atomic_inc(&tt_global->orig_list_count); sync_flags: batadv_tt_global_sync_flags(tt_global); out: batadv_tt_orig_list_entry_put(orig_entry); spin_unlock_bh(&tt_global->list_lock); } /** * batadv_tt_global_add() - add a new TT global entry or update an existing one * @bat_priv: the bat priv with all the soft interface information * @orig_node: the originator announcing the client * @tt_addr: the mac address of the non-mesh client * @vid: VLAN identifier * @flags: TT flags that have to be set for this non-mesh client * @ttvn: the tt version number ever announcing this non-mesh client * * Add a new TT global entry for the given originator. If the entry already * exists add a new reference to the given originator (a global entry can have * references to multiple originators) and adjust the flags attribute to reflect * the function argument. * If a TT local entry exists for this non-mesh client remove it. * * The caller must hold the orig_node refcount. * * Return: true if the new entry has been added, false otherwise */ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *tt_addr, unsigned short vid, u16 flags, u8 ttvn) { struct batadv_tt_global_entry *tt_global_entry; struct batadv_tt_local_entry *tt_local_entry; bool ret = false; int hash_added; struct batadv_tt_common_entry *common; u16 local_flags; /* ignore global entries from backbone nodes */ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vid)) return true; tt_global_entry = batadv_tt_global_hash_find(bat_priv, tt_addr, vid); tt_local_entry = batadv_tt_local_hash_find(bat_priv, tt_addr, vid); /* if the node already has a local client for this entry, it has to wait * for a roaming advertisement instead of manually messing up the global * table */ if ((flags & BATADV_TT_CLIENT_TEMP) && tt_local_entry && !(tt_local_entry->common.flags & BATADV_TT_CLIENT_NEW)) goto out; if (!tt_global_entry) { tt_global_entry = kmem_cache_zalloc(batadv_tg_cache, GFP_ATOMIC); if (!tt_global_entry) goto out; common = &tt_global_entry->common; ether_addr_copy(common->addr, tt_addr); common->vid = vid; if (!is_multicast_ether_addr(common->addr)) common->flags = flags & (~BATADV_TT_SYNC_MASK); tt_global_entry->roam_at = 0; /* node must store current time in case of roaming. This is * needed to purge this entry out on timeout (if nobody claims * it) */ if (flags & BATADV_TT_CLIENT_ROAM) tt_global_entry->roam_at = jiffies; kref_init(&common->refcount); common->added_at = jiffies; INIT_HLIST_HEAD(&tt_global_entry->orig_list); atomic_set(&tt_global_entry->orig_list_count, 0); spin_lock_init(&tt_global_entry->list_lock); kref_get(&common->refcount); hash_added = batadv_hash_add(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_tt, common, &common->hash_entry); if (unlikely(hash_added != 0)) { /* remove the reference for the hash */ batadv_tt_global_entry_put(tt_global_entry); goto out_remove; } } else { common = &tt_global_entry->common; /* If there is already a global entry, we can use this one for * our processing. * But if we are trying to add a temporary client then here are * two options at this point: * 1) the global client is not a temporary client: the global * client has to be left as it is, temporary information * should never override any already known client state * 2) the global client is a temporary client: purge the * originator list and add the new one orig_entry */ if (flags & BATADV_TT_CLIENT_TEMP) { if (!(common->flags & BATADV_TT_CLIENT_TEMP)) goto out; if (batadv_tt_global_entry_has_orig(tt_global_entry, orig_node, NULL)) goto out_remove; batadv_tt_global_del_orig_list(tt_global_entry); goto add_orig_entry; } /* if the client was temporary added before receiving the first * OGM announcing it, we have to clear the TEMP flag. Also, * remove the previous temporary orig node and re-add it * if required. If the orig entry changed, the new one which * is a non-temporary entry is preferred. */ if (common->flags & BATADV_TT_CLIENT_TEMP) { batadv_tt_global_del_orig_list(tt_global_entry); common->flags &= ~BATADV_TT_CLIENT_TEMP; } /* the change can carry possible "attribute" flags like the * TT_CLIENT_TEMP, therefore they have to be copied in the * client entry */ if (!is_multicast_ether_addr(common->addr)) common->flags |= flags & (~BATADV_TT_SYNC_MASK); /* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only * one originator left in the list and we previously received a * delete + roaming change for this originator. * * We should first delete the old originator before adding the * new one. */ if (common->flags & BATADV_TT_CLIENT_ROAM) { batadv_tt_global_del_orig_list(tt_global_entry); common->flags &= ~BATADV_TT_CLIENT_ROAM; tt_global_entry->roam_at = 0; } } add_orig_entry: /* add the new orig_entry (if needed) or update it */ batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn, flags & BATADV_TT_SYNC_MASK); batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (vid: %d, via %pM)\n", common->addr, batadv_print_vid(common->vid), orig_node->orig); ret = true; out_remove: /* Do not remove multicast addresses from the local hash on * global additions */ if (is_multicast_ether_addr(tt_addr)) goto out; /* remove address from local hash if present */ local_flags = batadv_tt_local_remove(bat_priv, tt_addr, vid, "global tt received", flags & BATADV_TT_CLIENT_ROAM); tt_global_entry->common.flags |= local_flags & BATADV_TT_CLIENT_WIFI; if (!(flags & BATADV_TT_CLIENT_ROAM)) /* this is a normal global add. Therefore the client is not in a * roaming state anymore. */ tt_global_entry->common.flags &= ~BATADV_TT_CLIENT_ROAM; out: batadv_tt_global_entry_put(tt_global_entry); batadv_tt_local_entry_put(tt_local_entry); return ret; } /** * batadv_transtable_best_orig() - Get best originator list entry from tt entry * @bat_priv: the bat priv with all the soft interface information * @tt_global_entry: global translation table entry to be analyzed * * This function assumes the caller holds rcu_read_lock(). * Return: best originator list entry or NULL on errors. */ static struct batadv_tt_orig_list_entry * batadv_transtable_best_orig(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global_entry) { struct batadv_neigh_node *router, *best_router = NULL; struct batadv_algo_ops *bao = bat_priv->algo_ops; struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry, *best_entry = NULL; head = &tt_global_entry->orig_list; hlist_for_each_entry_rcu(orig_entry, head, list) { router = batadv_orig_router_get(orig_entry->orig_node, BATADV_IF_DEFAULT); if (!router) continue; if (best_router && bao->neigh.cmp(router, BATADV_IF_DEFAULT, best_router, BATADV_IF_DEFAULT) <= 0) { batadv_neigh_node_put(router); continue; } /* release the refcount for the "old" best */ batadv_neigh_node_put(best_router); best_entry = orig_entry; best_router = router; } batadv_neigh_node_put(best_router); return best_entry; } /** * batadv_tt_global_dump_subentry() - Dump all TT local entries into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @common: tt local & tt global common data * @orig: Originator node announcing a non-mesh client * @best: Is the best originator for the TT entry * * Return: Error code, or 0 on success */ static int batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_tt_common_entry *common, struct batadv_tt_orig_list_entry *orig, bool best) { u16 flags = (common->flags & (~BATADV_TT_SYNC_MASK)) | orig->flags; void *hdr; struct batadv_orig_node_vlan *vlan; u8 last_ttvn; u32 crc; vlan = batadv_orig_node_vlan_get(orig->orig_node, common->vid); if (!vlan) return 0; crc = vlan->tt.crc; batadv_orig_node_vlan_put(vlan); hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_TRANSTABLE_GLOBAL); if (!hdr) return -ENOBUFS; last_ttvn = atomic_read(&orig->orig_node->last_ttvn); if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) || nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig->orig_node->orig) || nla_put_u8(msg, BATADV_ATTR_TT_TTVN, orig->ttvn) || nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) || nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) || nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) || nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, flags)) goto nla_put_failure; if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_tt_global_dump_entry() - Dump one TT global entry into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the soft interface information * @common: tt local & tt global common data * @sub_s: Number of entries to skip * * This function assumes the caller holds rcu_read_lock(). * * Return: Error code, or 0 on success */ static int batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_tt_common_entry *common, int *sub_s) { struct batadv_tt_orig_list_entry *orig_entry, *best_entry; struct batadv_tt_global_entry *global; struct hlist_head *head; int sub = 0; bool best; global = container_of(common, struct batadv_tt_global_entry, common); best_entry = batadv_transtable_best_orig(bat_priv, global); head = &global->orig_list; hlist_for_each_entry_rcu(orig_entry, head, list) { if (sub++ < *sub_s) continue; best = (orig_entry == best_entry); if (batadv_tt_global_dump_subentry(msg, portid, seq, common, orig_entry, best)) { *sub_s = sub - 1; return -EMSGSIZE; } } *sub_s = 0; return 0; } /** * batadv_tt_global_dump_bucket() - Dump one TT local bucket into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the soft interface information * @head: Pointer to the list containing the global tt entries * @idx_s: Number of entries to skip * @sub: Number of entries to skip * * Return: Error code, or 0 on success */ static int batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct hlist_head *head, int *idx_s, int *sub) { struct batadv_tt_common_entry *common; int idx = 0; rcu_read_lock(); hlist_for_each_entry_rcu(common, head, hash_entry) { if (idx++ < *idx_s) continue; if (batadv_tt_global_dump_entry(msg, portid, seq, bat_priv, common, sub)) { rcu_read_unlock(); *idx_s = idx - 1; return -EMSGSIZE; } } rcu_read_unlock(); *idx_s = 0; *sub = 0; return 0; } /** * batadv_tt_global_dump() - Dump TT global entries into a message * @msg: Netlink message to dump into * @cb: Parameters from query * * Return: Error code, or length of message on success */ int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) { struct net *net = sock_net(cb->skb->sk); struct net_device *soft_iface; struct batadv_priv *bat_priv; struct batadv_hard_iface *primary_if = NULL; struct batadv_hashtable *hash; struct hlist_head *head; int ret; int ifindex; int bucket = cb->args[0]; int idx = cb->args[1]; int sub = cb->args[2]; int portid = NETLINK_CB(cb->skb).portid; ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); if (!ifindex) return -EINVAL; soft_iface = dev_get_by_index(net, ifindex); if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { ret = -ENODEV; goto out; } bat_priv = netdev_priv(soft_iface); primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { ret = -ENOENT; goto out; } hash = bat_priv->tt.global_hash; while (bucket < hash->size) { head = &hash->table[bucket]; if (batadv_tt_global_dump_bucket(msg, portid, cb->nlh->nlmsg_seq, bat_priv, head, &idx, &sub)) break; bucket++; } ret = msg->len; out: batadv_hardif_put(primary_if); dev_put(soft_iface); cb->args[0] = bucket; cb->args[1] = idx; cb->args[2] = sub; return ret; } /** * _batadv_tt_global_del_orig_entry() - remove and free an orig_entry * @tt_global_entry: the global entry to remove the orig_entry from * @orig_entry: the orig entry to remove and free * * Remove an orig_entry from its list in the given tt_global_entry and * free this orig_entry afterwards. * * Caller must hold tt_global_entry->list_lock and ensure orig_entry->list is * part of a list. */ static void _batadv_tt_global_del_orig_entry(struct batadv_tt_global_entry *tt_global_entry, struct batadv_tt_orig_list_entry *orig_entry) { lockdep_assert_held(&tt_global_entry->list_lock); batadv_tt_global_size_dec(orig_entry->orig_node, tt_global_entry->common.vid); atomic_dec(&tt_global_entry->orig_list_count); /* requires holding tt_global_entry->list_lock and orig_entry->list * being part of a list */ hlist_del_rcu(&orig_entry->list); batadv_tt_orig_list_entry_put(orig_entry); } /* deletes the orig list of a tt_global_entry */ static void batadv_tt_global_del_orig_list(struct batadv_tt_global_entry *tt_global_entry) { struct hlist_head *head; struct hlist_node *safe; struct batadv_tt_orig_list_entry *orig_entry; spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; hlist_for_each_entry_safe(orig_entry, safe, head, list) _batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry); spin_unlock_bh(&tt_global_entry->list_lock); } /** * batadv_tt_global_del_orig_node() - remove orig_node from a global tt entry * @bat_priv: the bat priv with all the soft interface information * @tt_global_entry: the global entry to remove the orig_node from * @orig_node: the originator announcing the client * @message: message to append to the log on deletion * * Remove the given orig_node and its according orig_entry from the given * global tt entry. */ static void batadv_tt_global_del_orig_node(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global_entry, struct batadv_orig_node *orig_node, const char *message) { struct hlist_head *head; struct hlist_node *safe; struct batadv_tt_orig_list_entry *orig_entry; unsigned short vid; spin_lock_bh(&tt_global_entry->list_lock); head = &tt_global_entry->orig_list; hlist_for_each_entry_safe(orig_entry, safe, head, list) { if (orig_entry->orig_node == orig_node) { vid = tt_global_entry->common.vid; batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting %pM from global tt entry %pM (vid: %d): %s\n", orig_node->orig, tt_global_entry->common.addr, batadv_print_vid(vid), message); _batadv_tt_global_del_orig_entry(tt_global_entry, orig_entry); } } spin_unlock_bh(&tt_global_entry->list_lock); } /* If the client is to be deleted, we check if it is the last origantor entry * within tt_global entry. If yes, we set the BATADV_TT_CLIENT_ROAM flag and the * timer, otherwise we simply remove the originator scheduled for deletion. */ static void batadv_tt_global_del_roaming(struct batadv_priv *bat_priv, struct batadv_tt_global_entry *tt_global_entry, struct batadv_orig_node *orig_node, const char *message) { bool last_entry = true; struct hlist_head *head; struct batadv_tt_orig_list_entry *orig_entry; /* no local entry exists, case 1: * Check if this is the last one or if other entries exist. */ rcu_read_lock(); head = &tt_global_entry->orig_list; hlist_for_each_entry_rcu(orig_entry, head, list) { if (orig_entry->orig_node != orig_node) { last_entry = false; break; } } rcu_read_unlock(); if (last_entry) { /* its the last one, mark for roaming. */ tt_global_entry->common.flags |= BATADV_TT_CLIENT_ROAM; tt_global_entry->roam_at = jiffies; } else { /* there is another entry, we can simply delete this * one and can still use the other one. */ batadv_tt_global_del_orig_node(bat_priv, tt_global_entry, orig_node, message); } } /** * batadv_tt_global_del() - remove a client from the global table * @bat_priv: the bat priv with all the soft interface information * @orig_node: an originator serving this client * @addr: the mac address of the client * @vid: VLAN identifier * @message: a message explaining the reason for deleting the client to print * for debugging purpose * @roaming: true if the deletion has been triggered by a roaming event */ static void batadv_tt_global_del(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *addr, unsigned short vid, const char *message, bool roaming) { struct batadv_tt_global_entry *tt_global_entry; struct batadv_tt_local_entry *local_entry = NULL; tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid); if (!tt_global_entry) goto out; if (!roaming) { batadv_tt_global_del_orig_node(bat_priv, tt_global_entry, orig_node, message); if (hlist_empty(&tt_global_entry->orig_list)) batadv_tt_global_free(bat_priv, tt_global_entry, message); goto out; } /* if we are deleting a global entry due to a roam * event, there are two possibilities: * 1) the client roamed from node A to node B => if there * is only one originator left for this client, we mark * it with BATADV_TT_CLIENT_ROAM, we start a timer and we * wait for node B to claim it. In case of timeout * the entry is purged. * * If there are other originators left, we directly delete * the originator. * 2) the client roamed to us => we can directly delete * the global entry, since it is useless now. */ local_entry = batadv_tt_local_hash_find(bat_priv, tt_global_entry->common.addr, vid); if (local_entry) { /* local entry exists, case 2: client roamed to us. */ batadv_tt_global_del_orig_list(tt_global_entry); batadv_tt_global_free(bat_priv, tt_global_entry, message); } else { /* no local entry exists, case 1: check for roaming */ batadv_tt_global_del_roaming(bat_priv, tt_global_entry, orig_node, message); } out: batadv_tt_global_entry_put(tt_global_entry); batadv_tt_local_entry_put(local_entry); } /** * batadv_tt_global_del_orig() - remove all the TT global entries belonging to * the given originator matching the provided vid * @bat_priv: the bat priv with all the soft interface information * @orig_node: the originator owning the entries to remove * @match_vid: the VLAN identifier to match. If negative all the entries will be * removed * @message: debug message to print as "reason" */ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, s32 match_vid, const char *message) { struct batadv_tt_global_entry *tt_global; struct batadv_tt_common_entry *tt_common_entry; u32 i; struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_node *safe; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ unsigned short vid; if (!hash) return; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); hlist_for_each_entry_safe(tt_common_entry, safe, head, hash_entry) { /* remove only matching entries */ if (match_vid >= 0 && tt_common_entry->vid != match_vid) continue; tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, common); batadv_tt_global_del_orig_node(bat_priv, tt_global, orig_node, message); if (hlist_empty(&tt_global->orig_list)) { vid = tt_global->common.vid; batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, batadv_print_vid(vid), message); hlist_del_rcu(&tt_common_entry->hash_entry); batadv_tt_global_entry_put(tt_global); } } spin_unlock_bh(list_lock); } clear_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized); } static bool batadv_tt_global_to_purge(struct batadv_tt_global_entry *tt_global, char **msg) { bool purge = false; unsigned long roam_timeout = BATADV_TT_CLIENT_ROAM_TIMEOUT; unsigned long temp_timeout = BATADV_TT_CLIENT_TEMP_TIMEOUT; if ((tt_global->common.flags & BATADV_TT_CLIENT_ROAM) && batadv_has_timed_out(tt_global->roam_at, roam_timeout)) { purge = true; *msg = "Roaming timeout\n"; } if ((tt_global->common.flags & BATADV_TT_CLIENT_TEMP) && batadv_has_timed_out(tt_global->common.added_at, temp_timeout)) { purge = true; *msg = "Temporary client timeout\n"; } return purge; } static void batadv_tt_global_purge(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct hlist_head *head; struct hlist_node *node_tmp; spinlock_t *list_lock; /* protects write access to the hash lists */ u32 i; char *msg = NULL; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); hlist_for_each_entry_safe(tt_common, node_tmp, head, hash_entry) { tt_global = container_of(tt_common, struct batadv_tt_global_entry, common); if (!batadv_tt_global_to_purge(tt_global, &msg)) continue; batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting global tt entry %pM (vid: %d): %s\n", tt_global->common.addr, batadv_print_vid(tt_global->common.vid), msg); hlist_del_rcu(&tt_common->hash_entry); batadv_tt_global_entry_put(tt_global); } spin_unlock_bh(list_lock); } } static void batadv_tt_global_table_free(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash; spinlock_t *list_lock; /* protects write access to the hash lists */ struct batadv_tt_common_entry *tt_common_entry; struct batadv_tt_global_entry *tt_global; struct hlist_node *node_tmp; struct hlist_head *head; u32 i; if (!bat_priv->tt.global_hash) return; hash = bat_priv->tt.global_hash; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); hlist_for_each_entry_safe(tt_common_entry, node_tmp, head, hash_entry) { hlist_del_rcu(&tt_common_entry->hash_entry); tt_global = container_of(tt_common_entry, struct batadv_tt_global_entry, common); batadv_tt_global_entry_put(tt_global); } spin_unlock_bh(list_lock); } batadv_hash_destroy(hash); bat_priv->tt.global_hash = NULL; } static bool _batadv_is_ap_isolated(struct batadv_tt_local_entry *tt_local_entry, struct batadv_tt_global_entry *tt_global_entry) { if (tt_local_entry->common.flags & BATADV_TT_CLIENT_WIFI && tt_global_entry->common.flags & BATADV_TT_CLIENT_WIFI) return true; /* check if the two clients are marked as isolated */ if (tt_local_entry->common.flags & BATADV_TT_CLIENT_ISOLA && tt_global_entry->common.flags & BATADV_TT_CLIENT_ISOLA) return true; return false; } /** * batadv_transtable_search() - get the mesh destination for a given client * @bat_priv: the bat priv with all the soft interface information * @src: mac address of the source client * @addr: mac address of the destination client * @vid: VLAN identifier * * Return: a pointer to the originator that was selected as destination in the * mesh for contacting the client 'addr', NULL otherwise. * In case of multiple originators serving the same client, the function returns * the best one (best in terms of metric towards the destination node). * * If the two clients are AP isolated the function returns NULL. */ struct batadv_orig_node *batadv_transtable_search(struct batadv_priv *bat_priv, const u8 *src, const u8 *addr, unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry = NULL; struct batadv_tt_global_entry *tt_global_entry = NULL; struct batadv_orig_node *orig_node = NULL; struct batadv_tt_orig_list_entry *best_entry; if (src && batadv_vlan_ap_isola_get(bat_priv, vid)) { tt_local_entry = batadv_tt_local_hash_find(bat_priv, src, vid); if (!tt_local_entry || (tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING)) goto out; } tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid); if (!tt_global_entry) goto out; /* check whether the clients should not communicate due to AP * isolation */ if (tt_local_entry && _batadv_is_ap_isolated(tt_local_entry, tt_global_entry)) goto out; rcu_read_lock(); best_entry = batadv_transtable_best_orig(bat_priv, tt_global_entry); /* found anything? */ if (best_entry) orig_node = best_entry->orig_node; if (orig_node && !kref_get_unless_zero(&orig_node->refcount)) orig_node = NULL; rcu_read_unlock(); out: batadv_tt_global_entry_put(tt_global_entry); batadv_tt_local_entry_put(tt_local_entry); return orig_node; } /** * batadv_tt_global_crc() - calculates the checksum of the local table belonging * to the given orig_node * @bat_priv: the bat priv with all the soft interface information * @orig_node: originator for which the CRC should be computed * @vid: VLAN identifier for which the CRC32 has to be computed * * This function computes the checksum for the global table corresponding to a * specific originator. In particular, the checksum is computed as follows: For * each client connected to the originator the CRC32C of the MAC address and the * VID is computed and then all the CRC32Cs of the various clients are xor'ed * together. * * The idea behind is that CRC32C should be used as much as possible in order to * produce a unique hash of the table, but since the order which is used to feed * the CRC32C function affects the result and since every node in the network * probably sorts the clients differently, the hash function cannot be directly * computed over the entire table. Hence the CRC32C is used only on * the single client entry, while all the results are then xor'ed together * because the XOR operation can combine them all while trying to reduce the * noise as much as possible. * * Return: the checksum of the global table of a given originator. */ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->tt.global_hash; struct batadv_tt_orig_list_entry *tt_orig; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; struct hlist_head *head; u32 i, crc_tmp, crc = 0; u8 flags; __be16 tmp_vid; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); hlist_for_each_entry_rcu(tt_common, head, hash_entry) { tt_global = container_of(tt_common, struct batadv_tt_global_entry, common); /* compute the CRC only for entries belonging to the * VLAN identified by the vid passed as parameter */ if (tt_common->vid != vid) continue; /* Roaming clients are in the global table for * consistency only. They don't have to be * taken into account while computing the * global crc */ if (tt_common->flags & BATADV_TT_CLIENT_ROAM) continue; /* Temporary clients have not been announced yet, so * they have to be skipped while computing the global * crc */ if (tt_common->flags & BATADV_TT_CLIENT_TEMP) continue; /* find out if this global entry is announced by this * originator */ tt_orig = batadv_tt_global_orig_entry_find(tt_global, orig_node); if (!tt_orig) continue; /* use network order to read the VID: this ensures that * every node reads the bytes in the same order. */ tmp_vid = htons(tt_common->vid); crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid)); /* compute the CRC on flags that have to be kept in sync * among nodes */ flags = tt_orig->flags; crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags)); crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN); batadv_tt_orig_list_entry_put(tt_orig); } rcu_read_unlock(); } return crc; } /** * batadv_tt_local_crc() - calculates the checksum of the local table * @bat_priv: the bat priv with all the soft interface information * @vid: VLAN identifier for which the CRC32 has to be computed * * For details about the computation, please refer to the documentation for * batadv_tt_global_crc(). * * Return: the checksum of the local table */ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct hlist_head *head; u32 i, crc_tmp, crc = 0; u8 flags; __be16 tmp_vid; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); hlist_for_each_entry_rcu(tt_common, head, hash_entry) { /* compute the CRC only for entries belonging to the * VLAN identified by vid */ if (tt_common->vid != vid) continue; /* not yet committed clients have not to be taken into * account while computing the CRC */ if (tt_common->flags & BATADV_TT_CLIENT_NEW) continue; /* use network order to read the VID: this ensures that * every node reads the bytes in the same order. */ tmp_vid = htons(tt_common->vid); crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid)); /* compute the CRC on flags that have to be kept in sync * among nodes */ flags = tt_common->flags & BATADV_TT_SYNC_MASK; crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags)); crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN); } rcu_read_unlock(); } return crc; } /** * batadv_tt_req_node_release() - free tt_req node entry * @ref: kref pointer of the tt req_node entry */ static void batadv_tt_req_node_release(struct kref *ref) { struct batadv_tt_req_node *tt_req_node; tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount); kmem_cache_free(batadv_tt_req_cache, tt_req_node); } /** * batadv_tt_req_node_put() - decrement the tt_req_node refcounter and * possibly release it * @tt_req_node: tt_req_node to be free'd */ static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node) { if (!tt_req_node) return; kref_put(&tt_req_node->refcount, batadv_tt_req_node_release); } static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_req_node *node; struct hlist_node *safe; spin_lock_bh(&bat_priv->tt.req_list_lock); hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { hlist_del_init(&node->list); batadv_tt_req_node_put(node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); } static void batadv_tt_save_orig_buffer(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const void *tt_buff, u16 tt_buff_len) { /* Replace the old buffer only if I received something in the * last OGM (the OGM could carry no changes) */ spin_lock_bh(&orig_node->tt_buff_lock); if (tt_buff_len > 0) { kfree(orig_node->tt_buff); orig_node->tt_buff_len = 0; orig_node->tt_buff = kmalloc(tt_buff_len, GFP_ATOMIC); if (orig_node->tt_buff) { memcpy(orig_node->tt_buff, tt_buff, tt_buff_len); orig_node->tt_buff_len = tt_buff_len; } } spin_unlock_bh(&orig_node->tt_buff_lock); } static void batadv_tt_req_purge(struct batadv_priv *bat_priv) { struct batadv_tt_req_node *node; struct hlist_node *safe; spin_lock_bh(&bat_priv->tt.req_list_lock); hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { if (batadv_has_timed_out(node->issued_at, BATADV_TT_REQUEST_TIMEOUT)) { hlist_del_init(&node->list); batadv_tt_req_node_put(node); } } spin_unlock_bh(&bat_priv->tt.req_list_lock); } /** * batadv_tt_req_node_new() - search and possibly create a tt_req_node object * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node this request is being issued for * * Return: the pointer to the new tt_req_node struct if no request * has already been issued for this orig_node, NULL otherwise. */ static struct batadv_tt_req_node * batadv_tt_req_node_new(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { struct batadv_tt_req_node *tt_req_node_tmp, *tt_req_node = NULL; spin_lock_bh(&bat_priv->tt.req_list_lock); hlist_for_each_entry(tt_req_node_tmp, &bat_priv->tt.req_list, list) { if (batadv_compare_eth(tt_req_node_tmp, orig_node) && !batadv_has_timed_out(tt_req_node_tmp->issued_at, BATADV_TT_REQUEST_TIMEOUT)) goto unlock; } tt_req_node = kmem_cache_alloc(batadv_tt_req_cache, GFP_ATOMIC); if (!tt_req_node) goto unlock; kref_init(&tt_req_node->refcount); ether_addr_copy(tt_req_node->addr, orig_node->orig); tt_req_node->issued_at = jiffies; kref_get(&tt_req_node->refcount); hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list); unlock: spin_unlock_bh(&bat_priv->tt.req_list_lock); return tt_req_node; } /** * batadv_tt_local_valid() - verify local tt entry and get flags * @entry_ptr: to be checked local tt entry * @data_ptr: not used but definition required to satisfy the callback prototype * @flags: a pointer to store TT flags for this client to * * Checks the validity of the given local TT entry. If it is, then the provided * flags pointer is updated. * * Return: true if the entry is a valid, false otherwise. */ static bool batadv_tt_local_valid(const void *entry_ptr, const void *data_ptr, u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; if (tt_common_entry->flags & BATADV_TT_CLIENT_NEW) return false; if (flags) *flags = tt_common_entry->flags; return true; } /** * batadv_tt_global_valid() - verify global tt entry and get flags * @entry_ptr: to be checked global tt entry * @data_ptr: an orig_node object (may be NULL) * @flags: a pointer to store TT flags for this client to * * Checks the validity of the given global TT entry. If it is, then the provided * flags pointer is updated either with the common (summed) TT flags if data_ptr * is NULL or the specific, per originator TT flags otherwise. * * Return: true if the entry is a valid, false otherwise. */ static bool batadv_tt_global_valid(const void *entry_ptr, const void *data_ptr, u8 *flags) { const struct batadv_tt_common_entry *tt_common_entry = entry_ptr; const struct batadv_tt_global_entry *tt_global_entry; const struct batadv_orig_node *orig_node = data_ptr; if (tt_common_entry->flags & BATADV_TT_CLIENT_ROAM || tt_common_entry->flags & BATADV_TT_CLIENT_TEMP) return false; tt_global_entry = container_of(tt_common_entry, struct batadv_tt_global_entry, common); return batadv_tt_global_entry_has_orig(tt_global_entry, orig_node, flags); } /** * batadv_tt_tvlv_generate() - fill the tvlv buff with the tt entries from the * specified tt hash * @bat_priv: the bat priv with all the soft interface information * @hash: hash table containing the tt entries * @tt_len: expected tvlv tt data buffer length in number of bytes * @tvlv_buff: pointer to the buffer to fill with the TT data * @valid_cb: function to filter tt change entries and to return TT flags * @cb_data: data passed to the filter function as argument * * Fills the tvlv buff with the tt entries from the specified hash. If valid_cb * is not provided then this becomes a no-op. */ static void batadv_tt_tvlv_generate(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, void *tvlv_buff, u16 tt_len, bool (*valid_cb)(const void *, const void *, u8 *flags), void *cb_data) { struct batadv_tt_common_entry *tt_common_entry; struct batadv_tvlv_tt_change *tt_change; struct hlist_head *head; u16 tt_tot, tt_num_entries = 0; u8 flags; bool ret; u32 i; tt_tot = batadv_tt_entries(tt_len); tt_change = tvlv_buff; if (!valid_cb) return; rcu_read_lock(); for (i = 0; i < hash->size; i++) { head = &hash->table[i]; hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (tt_tot == tt_num_entries) break; ret = valid_cb(tt_common_entry, cb_data, &flags); if (!ret) continue; ether_addr_copy(tt_change->addr, tt_common_entry->addr); tt_change->flags = flags; tt_change->vid = htons(tt_common_entry->vid); memset(tt_change->reserved, 0, sizeof(tt_change->reserved)); tt_num_entries++; tt_change++; } } rcu_read_unlock(); } /** * batadv_tt_global_check_crc() - check if all the CRCs are correct * @orig_node: originator for which the CRCs have to be checked * @tt_vlan: pointer to the first tvlv VLAN entry * @num_vlan: number of tvlv VLAN entries * * Return: true if all the received CRCs match the locally stored ones, false * otherwise */ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node, struct batadv_tvlv_tt_vlan_data *tt_vlan, u16 num_vlan) { struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp; struct batadv_orig_node_vlan *vlan; int i, orig_num_vlan; u32 crc; /* check if each received CRC matches the locally stored one */ for (i = 0; i < num_vlan; i++) { tt_vlan_tmp = tt_vlan + i; /* if orig_node is a backbone node for this VLAN, don't check * the CRC as we ignore all the global entries over it */ if (batadv_bla_is_backbone_gw_orig(orig_node->bat_priv, orig_node->orig, ntohs(tt_vlan_tmp->vid))) continue; vlan = batadv_orig_node_vlan_get(orig_node, ntohs(tt_vlan_tmp->vid)); if (!vlan) return false; crc = vlan->tt.crc; batadv_orig_node_vlan_put(vlan); if (crc != ntohl(tt_vlan_tmp->crc)) return false; } /* check if any excess VLANs exist locally for the originator * which are not mentioned in the TVLV from the originator. */ rcu_read_lock(); orig_num_vlan = 0; hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) orig_num_vlan++; rcu_read_unlock(); if (orig_num_vlan > num_vlan) return false; return true; } /** * batadv_tt_local_update_crc() - update all the local CRCs * @bat_priv: the bat priv with all the soft interface information */ static void batadv_tt_local_update_crc(struct batadv_priv *bat_priv) { struct batadv_softif_vlan *vlan; /* recompute the global CRC for each VLAN */ rcu_read_lock(); hlist_for_each_entry_rcu(vlan, &bat_priv->softif_vlan_list, list) { vlan->tt.crc = batadv_tt_local_crc(bat_priv, vlan->vid); } rcu_read_unlock(); } /** * batadv_tt_global_update_crc() - update all the global CRCs for this orig_node * @bat_priv: the bat priv with all the soft interface information * @orig_node: the orig_node for which the CRCs have to be updated */ static void batadv_tt_global_update_crc(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { struct batadv_orig_node_vlan *vlan; u32 crc; /* recompute the global CRC for each VLAN */ rcu_read_lock(); hlist_for_each_entry_rcu(vlan, &orig_node->vlan_list, list) { /* if orig_node is a backbone node for this VLAN, don't compute * the CRC as we ignore all the global entries over it */ if (batadv_bla_is_backbone_gw_orig(bat_priv, orig_node->orig, vlan->vid)) continue; crc = batadv_tt_global_crc(bat_priv, orig_node, vlan->vid); vlan->tt.crc = crc; } rcu_read_unlock(); } /** * batadv_send_tt_request() - send a TT Request message to a given node * @bat_priv: the bat priv with all the soft interface information * @dst_orig_node: the destination of the message * @ttvn: the version number that the source of the message is looking for * @tt_vlan: pointer to the first tvlv VLAN object to request * @num_vlan: number of tvlv VLAN entries * @full_table: ask for the entire translation table if true, while only for the * last TT diff otherwise * * Return: true if the TT Request was sent, false otherwise */ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, struct batadv_orig_node *dst_orig_node, u8 ttvn, struct batadv_tvlv_tt_vlan_data *tt_vlan, u16 num_vlan, bool full_table) { struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_tt_req_node *tt_req_node = NULL; struct batadv_tvlv_tt_vlan_data *tt_vlan_req; struct batadv_hard_iface *primary_if; bool ret = false; int i, size; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; /* The new tt_req will be issued only if I'm not waiting for a * reply from the same orig_node yet */ tt_req_node = batadv_tt_req_node_new(bat_priv, dst_orig_node); if (!tt_req_node) goto out; size = sizeof(*tvlv_tt_data) + sizeof(*tt_vlan_req) * num_vlan; tvlv_tt_data = kzalloc(size, GFP_ATOMIC); if (!tvlv_tt_data) goto out; tvlv_tt_data->flags = BATADV_TT_REQUEST; tvlv_tt_data->ttvn = ttvn; tvlv_tt_data->num_vlan = htons(num_vlan); /* send all the CRCs within the request. This is needed by intermediate * nodes to ensure they have the correct table before replying */ tt_vlan_req = (struct batadv_tvlv_tt_vlan_data *)(tvlv_tt_data + 1); for (i = 0; i < num_vlan; i++) { tt_vlan_req->vid = tt_vlan->vid; tt_vlan_req->crc = tt_vlan->crc; tt_vlan_req++; tt_vlan++; } if (full_table) tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE; batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending TT_REQUEST to %pM [%c]\n", dst_orig_node->orig, full_table ? 'F' : '.'); batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_TX); batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr, dst_orig_node->orig, BATADV_TVLV_TT, 1, tvlv_tt_data, size); ret = true; out: batadv_hardif_put(primary_if); if (ret && tt_req_node) { spin_lock_bh(&bat_priv->tt.req_list_lock); if (!hlist_unhashed(&tt_req_node->list)) { hlist_del_init(&tt_req_node->list); batadv_tt_req_node_put(tt_req_node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); } batadv_tt_req_node_put(tt_req_node); kfree(tvlv_tt_data); return ret; } /** * batadv_send_other_tt_response() - send reply to tt request concerning another * node's translation table * @bat_priv: the bat priv with all the soft interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender * @req_dst: mac address of tt request recipient * * Return: true if tt request reply was sent, false otherwise. */ static bool batadv_send_other_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, u8 *req_src, u8 *req_dst) { struct batadv_orig_node *req_dst_orig_node; struct batadv_orig_node *res_dst_orig_node = NULL; struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_tvlv_tt_vlan_data *tt_vlan; bool ret = false, full_table; u8 orig_ttvn, req_ttvn; u16 tvlv_len; s32 tt_len; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_REQUEST from %pM for ttvn: %u (%pM) [%c]\n", req_src, tt_data->ttvn, req_dst, ((tt_data->flags & BATADV_TT_FULL_TABLE) ? 'F' : '.')); /* Let's get the orig node of the REAL destination */ req_dst_orig_node = batadv_orig_hash_find(bat_priv, req_dst); if (!req_dst_orig_node) goto out; res_dst_orig_node = batadv_orig_hash_find(bat_priv, req_src); if (!res_dst_orig_node) goto out; orig_ttvn = (u8)atomic_read(&req_dst_orig_node->last_ttvn); req_ttvn = tt_data->ttvn; tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); /* this node doesn't have the requested data */ if (orig_ttvn != req_ttvn || !batadv_tt_global_check_crc(req_dst_orig_node, tt_vlan, ntohs(tt_data->num_vlan))) goto out; /* If the full table has been explicitly requested */ if (tt_data->flags & BATADV_TT_FULL_TABLE || !req_dst_orig_node->tt_buff) full_table = true; else full_table = false; /* TT fragmentation hasn't been implemented yet, so send as many * TT entries fit a single packet as possible only */ if (!full_table) { spin_lock_bh(&req_dst_orig_node->tt_buff_lock); tt_len = req_dst_orig_node->tt_buff_len; tvlv_len = batadv_tt_prepare_tvlv_global_data(req_dst_orig_node, &tvlv_tt_data, &tt_change, &tt_len); if (!tt_len) goto unlock; /* Copy the last orig_node's OGM buffer */ memcpy(tt_change, req_dst_orig_node->tt_buff, req_dst_orig_node->tt_buff_len); spin_unlock_bh(&req_dst_orig_node->tt_buff_lock); } else { /* allocate the tvlv, put the tt_data and all the tt_vlan_data * in the initial part */ tt_len = -1; tvlv_len = batadv_tt_prepare_tvlv_global_data(req_dst_orig_node, &tvlv_tt_data, &tt_change, &tt_len); if (!tt_len) goto out; /* fill the rest of the tvlv with the real TT entries */ batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.global_hash, tt_change, tt_len, batadv_tt_global_valid, req_dst_orig_node); } /* Don't send the response, if larger than fragmented packet. */ tt_len = sizeof(struct batadv_unicast_tvlv_packet) + tvlv_len; if (tt_len > atomic_read(&bat_priv->packet_size_max)) { net_ratelimited_function(batadv_info, bat_priv->soft_iface, "Ignoring TT_REQUEST from %pM; Response size exceeds max packet size.\n", res_dst_orig_node->orig); goto out; } tvlv_tt_data->flags = BATADV_TT_RESPONSE; tvlv_tt_data->ttvn = req_ttvn; if (full_table) tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE; batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending TT_RESPONSE %pM for %pM [%c] (ttvn: %u)\n", res_dst_orig_node->orig, req_dst_orig_node->orig, full_table ? 'F' : '.', req_ttvn); batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); batadv_tvlv_unicast_send(bat_priv, req_dst_orig_node->orig, req_src, BATADV_TVLV_TT, 1, tvlv_tt_data, tvlv_len); ret = true; goto out; unlock: spin_unlock_bh(&req_dst_orig_node->tt_buff_lock); out: batadv_orig_node_put(res_dst_orig_node); batadv_orig_node_put(req_dst_orig_node); kfree(tvlv_tt_data); return ret; } /** * batadv_send_my_tt_response() - send reply to tt request concerning this * node's translation table * @bat_priv: the bat priv with all the soft interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender * * Return: true if tt request reply was sent, false otherwise. */ static bool batadv_send_my_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, u8 *req_src) { struct batadv_tvlv_tt_data *tvlv_tt_data = NULL; struct batadv_hard_iface *primary_if = NULL; struct batadv_tvlv_tt_change *tt_change; struct batadv_orig_node *orig_node; u8 my_ttvn, req_ttvn; u16 tvlv_len; bool full_table; s32 tt_len; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_REQUEST from %pM for ttvn: %u (me) [%c]\n", req_src, tt_data->ttvn, ((tt_data->flags & BATADV_TT_FULL_TABLE) ? 'F' : '.')); spin_lock_bh(&bat_priv->tt.commit_lock); my_ttvn = (u8)atomic_read(&bat_priv->tt.vn); req_ttvn = tt_data->ttvn; orig_node = batadv_orig_hash_find(bat_priv, req_src); if (!orig_node) goto out; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; /* If the full table has been explicitly requested or the gap * is too big send the whole local translation table */ if (tt_data->flags & BATADV_TT_FULL_TABLE || my_ttvn != req_ttvn || !bat_priv->tt.last_changeset) full_table = true; else full_table = false; /* TT fragmentation hasn't been implemented yet, so send as many * TT entries fit a single packet as possible only */ if (!full_table) { spin_lock_bh(&bat_priv->tt.last_changeset_lock); tt_len = bat_priv->tt.last_changeset_len; tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv, &tvlv_tt_data, &tt_change, &tt_len); if (!tt_len || !tvlv_len) goto unlock; /* Copy the last orig_node's OGM buffer */ memcpy(tt_change, bat_priv->tt.last_changeset, bat_priv->tt.last_changeset_len); spin_unlock_bh(&bat_priv->tt.last_changeset_lock); } else { req_ttvn = (u8)atomic_read(&bat_priv->tt.vn); /* allocate the tvlv, put the tt_data and all the tt_vlan_data * in the initial part */ tt_len = -1; tvlv_len = batadv_tt_prepare_tvlv_local_data(bat_priv, &tvlv_tt_data, &tt_change, &tt_len); if (!tt_len || !tvlv_len) goto out; /* fill the rest of the tvlv with the real TT entries */ batadv_tt_tvlv_generate(bat_priv, bat_priv->tt.local_hash, tt_change, tt_len, batadv_tt_local_valid, NULL); } tvlv_tt_data->flags = BATADV_TT_RESPONSE; tvlv_tt_data->ttvn = req_ttvn; if (full_table) tvlv_tt_data->flags |= BATADV_TT_FULL_TABLE; batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending TT_RESPONSE to %pM [%c] (ttvn: %u)\n", orig_node->orig, full_table ? 'F' : '.', req_ttvn); batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_TX); batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr, req_src, BATADV_TVLV_TT, 1, tvlv_tt_data, tvlv_len); goto out; unlock: spin_unlock_bh(&bat_priv->tt.last_changeset_lock); out: spin_unlock_bh(&bat_priv->tt.commit_lock); batadv_orig_node_put(orig_node); batadv_hardif_put(primary_if); kfree(tvlv_tt_data); /* The packet was for this host, so it doesn't need to be re-routed */ return true; } /** * batadv_send_tt_response() - send reply to tt request * @bat_priv: the bat priv with all the soft interface information * @tt_data: tt data containing the tt request information * @req_src: mac address of tt request sender * @req_dst: mac address of tt request recipient * * Return: true if tt request reply was sent, false otherwise. */ static bool batadv_send_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, u8 *req_src, u8 *req_dst) { if (batadv_is_my_mac(bat_priv, req_dst)) return batadv_send_my_tt_response(bat_priv, tt_data, req_src); return batadv_send_other_tt_response(bat_priv, tt_data, req_src, req_dst); } static void _batadv_tt_update_changes(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_tvlv_tt_change *tt_change, u16 tt_num_changes, u8 ttvn) { int i; int roams; for (i = 0; i < tt_num_changes; i++) { if ((tt_change + i)->flags & BATADV_TT_CLIENT_DEL) { roams = (tt_change + i)->flags & BATADV_TT_CLIENT_ROAM; batadv_tt_global_del(bat_priv, orig_node, (tt_change + i)->addr, ntohs((tt_change + i)->vid), "tt removed by changes", roams); } else { if (!batadv_tt_global_add(bat_priv, orig_node, (tt_change + i)->addr, ntohs((tt_change + i)->vid), (tt_change + i)->flags, ttvn)) /* In case of problem while storing a * global_entry, we stop the updating * procedure without committing the * ttvn change. This will avoid to send * corrupted data on tt_request */ return; } } set_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized); } static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_change *tt_change, u8 ttvn, u8 *resp_src, u16 num_entries) { struct batadv_orig_node *orig_node; orig_node = batadv_orig_hash_find(bat_priv, resp_src); if (!orig_node) goto out; /* Purge the old table first.. */ batadv_tt_global_del_orig(bat_priv, orig_node, -1, "Received full table"); _batadv_tt_update_changes(bat_priv, orig_node, tt_change, num_entries, ttvn); spin_lock_bh(&orig_node->tt_buff_lock); kfree(orig_node->tt_buff); orig_node->tt_buff_len = 0; orig_node->tt_buff = NULL; spin_unlock_bh(&orig_node->tt_buff_lock); atomic_set(&orig_node->last_ttvn, ttvn); out: batadv_orig_node_put(orig_node); } static void batadv_tt_update_changes(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, u16 tt_num_changes, u8 ttvn, struct batadv_tvlv_tt_change *tt_change) { _batadv_tt_update_changes(bat_priv, orig_node, tt_change, tt_num_changes, ttvn); batadv_tt_save_orig_buffer(bat_priv, orig_node, tt_change, batadv_tt_len(tt_num_changes)); atomic_set(&orig_node->last_ttvn, ttvn); } /** * batadv_is_my_client() - check if a client is served by the local node * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to check * @vid: VLAN identifier * * Return: true if the client is served by this node, false otherwise. */ bool batadv_is_my_client(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry; bool ret = false; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) goto out; /* Check if the client has been logically deleted (but is kept for * consistency purpose) */ if ((tt_local_entry->common.flags & BATADV_TT_CLIENT_PENDING) || (tt_local_entry->common.flags & BATADV_TT_CLIENT_ROAM)) goto out; ret = true; out: batadv_tt_local_entry_put(tt_local_entry); return ret; } /** * batadv_handle_tt_response() - process incoming tt reply * @bat_priv: the bat priv with all the soft interface information * @tt_data: tt data containing the tt request information * @resp_src: mac address of tt reply sender * @num_entries: number of tt change entries appended to the tt data */ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, struct batadv_tvlv_tt_data *tt_data, u8 *resp_src, u16 num_entries) { struct batadv_tt_req_node *node; struct hlist_node *safe; struct batadv_orig_node *orig_node = NULL; struct batadv_tvlv_tt_change *tt_change; u8 *tvlv_ptr = (u8 *)tt_data; u16 change_offset; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received TT_RESPONSE from %pM for ttvn %d t_size: %d [%c]\n", resp_src, tt_data->ttvn, num_entries, ((tt_data->flags & BATADV_TT_FULL_TABLE) ? 'F' : '.')); orig_node = batadv_orig_hash_find(bat_priv, resp_src); if (!orig_node) goto out; spin_lock_bh(&orig_node->tt_lock); change_offset = sizeof(struct batadv_tvlv_tt_vlan_data); change_offset *= ntohs(tt_data->num_vlan); change_offset += sizeof(*tt_data); tvlv_ptr += change_offset; tt_change = (struct batadv_tvlv_tt_change *)tvlv_ptr; if (tt_data->flags & BATADV_TT_FULL_TABLE) { batadv_tt_fill_gtable(bat_priv, tt_change, tt_data->ttvn, resp_src, num_entries); } else { batadv_tt_update_changes(bat_priv, orig_node, num_entries, tt_data->ttvn, tt_change); } /* Recalculate the CRC for this orig_node and store it */ batadv_tt_global_update_crc(bat_priv, orig_node); spin_unlock_bh(&orig_node->tt_lock); /* Delete the tt_req_node from pending tt_requests list */ spin_lock_bh(&bat_priv->tt.req_list_lock); hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { if (!batadv_compare_eth(node->addr, resp_src)) continue; hlist_del_init(&node->list); batadv_tt_req_node_put(node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); out: batadv_orig_node_put(orig_node); } static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_roam_node *node, *safe; spin_lock_bh(&bat_priv->tt.roam_list_lock); list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) { list_del(&node->list); kmem_cache_free(batadv_tt_roam_cache, node); } spin_unlock_bh(&bat_priv->tt.roam_list_lock); } static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) { struct batadv_tt_roam_node *node, *safe; spin_lock_bh(&bat_priv->tt.roam_list_lock); list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) { if (!batadv_has_timed_out(node->first_time, BATADV_ROAMING_MAX_TIME)) continue; list_del(&node->list); kmem_cache_free(batadv_tt_roam_cache, node); } spin_unlock_bh(&bat_priv->tt.roam_list_lock); } /** * batadv_tt_check_roam_count() - check if a client has roamed too frequently * @bat_priv: the bat priv with all the soft interface information * @client: mac address of the roaming client * * This function checks whether the client already reached the * maximum number of possible roaming phases. In this case the ROAMING_ADV * will not be sent. * * Return: true if the ROAMING_ADV can be sent, false otherwise */ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) { struct batadv_tt_roam_node *tt_roam_node; bool ret = false; spin_lock_bh(&bat_priv->tt.roam_list_lock); /* The new tt_req will be issued only if I'm not waiting for a * reply from the same orig_node yet */ list_for_each_entry(tt_roam_node, &bat_priv->tt.roam_list, list) { if (!batadv_compare_eth(tt_roam_node->addr, client)) continue; if (batadv_has_timed_out(tt_roam_node->first_time, BATADV_ROAMING_MAX_TIME)) continue; if (!batadv_atomic_dec_not_zero(&tt_roam_node->counter)) /* Sorry, you roamed too many times! */ goto unlock; ret = true; break; } if (!ret) { tt_roam_node = kmem_cache_alloc(batadv_tt_roam_cache, GFP_ATOMIC); if (!tt_roam_node) goto unlock; tt_roam_node->first_time = jiffies; atomic_set(&tt_roam_node->counter, BATADV_ROAMING_MAX_COUNT - 1); ether_addr_copy(tt_roam_node->addr, client); list_add(&tt_roam_node->list, &bat_priv->tt.roam_list); ret = true; } unlock: spin_unlock_bh(&bat_priv->tt.roam_list_lock); return ret; } /** * batadv_send_roam_adv() - send a roaming advertisement message * @bat_priv: the bat priv with all the soft interface information * @client: mac address of the roaming client * @vid: VLAN identifier * @orig_node: message destination * * Send a ROAMING_ADV message to the node which was previously serving this * client. This is done to inform the node that from now on all traffic destined * for this particular roamed client has to be forwarded to the sender of the * roaming message. */ static void batadv_send_roam_adv(struct batadv_priv *bat_priv, u8 *client, unsigned short vid, struct batadv_orig_node *orig_node) { struct batadv_hard_iface *primary_if; struct batadv_tvlv_roam_adv tvlv_roam; primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) goto out; /* before going on we have to check whether the client has * already roamed to us too many times */ if (!batadv_tt_check_roam_count(bat_priv, client)) goto out; batadv_dbg(BATADV_DBG_TT, bat_priv, "Sending ROAMING_ADV to %pM (client %pM, vid: %d)\n", orig_node->orig, client, batadv_print_vid(vid)); batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_TX); memcpy(tvlv_roam.client, client, sizeof(tvlv_roam.client)); tvlv_roam.vid = htons(vid); batadv_tvlv_unicast_send(bat_priv, primary_if->net_dev->dev_addr, orig_node->orig, BATADV_TVLV_ROAM, 1, &tvlv_roam, sizeof(tvlv_roam)); out: batadv_hardif_put(primary_if); } static void batadv_tt_purge(struct work_struct *work) { struct delayed_work *delayed_work; struct batadv_priv_tt *priv_tt; struct batadv_priv *bat_priv; delayed_work = to_delayed_work(work); priv_tt = container_of(delayed_work, struct batadv_priv_tt, work); bat_priv = container_of(priv_tt, struct batadv_priv, tt); batadv_tt_local_purge(bat_priv, BATADV_TT_LOCAL_TIMEOUT); batadv_tt_global_purge(bat_priv); batadv_tt_req_purge(bat_priv); batadv_tt_roam_purge(bat_priv); queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, msecs_to_jiffies(BATADV_TT_WORK_PERIOD)); } /** * batadv_tt_free() - Free translation table of soft interface * @bat_priv: the bat priv with all the soft interface information */ void batadv_tt_free(struct batadv_priv *bat_priv) { batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_ROAM, 1); batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_TT, 1); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_TT, 1); cancel_delayed_work_sync(&bat_priv->tt.work); batadv_tt_local_table_free(bat_priv); batadv_tt_global_table_free(bat_priv); batadv_tt_req_list_free(bat_priv); batadv_tt_changes_list_free(bat_priv); batadv_tt_roam_list_free(bat_priv); kfree(bat_priv->tt.last_changeset); } /** * batadv_tt_local_set_flags() - set or unset the specified flags on the local * table and possibly count them in the TT size * @bat_priv: the bat priv with all the soft interface information * @flags: the flag to switch * @enable: whether to set or unset the flag * @count: whether to increase the TT size by the number of changed entries */ static void batadv_tt_local_set_flags(struct batadv_priv *bat_priv, u16 flags, bool enable, bool count) { struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common_entry; struct hlist_head *head; u32 i; if (!hash) return; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); hlist_for_each_entry_rcu(tt_common_entry, head, hash_entry) { if (enable) { if ((tt_common_entry->flags & flags) == flags) continue; tt_common_entry->flags |= flags; } else { if (!(tt_common_entry->flags & flags)) continue; tt_common_entry->flags &= ~flags; } if (!count) continue; batadv_tt_local_size_inc(bat_priv, tt_common_entry->vid); } rcu_read_unlock(); } } /* Purge out all the tt local entries marked with BATADV_TT_CLIENT_PENDING */ static void batadv_tt_local_purge_pending_clients(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->tt.local_hash; struct batadv_tt_common_entry *tt_common; struct batadv_tt_local_entry *tt_local; struct hlist_node *node_tmp; struct hlist_head *head; spinlock_t *list_lock; /* protects write access to the hash lists */ u32 i; if (!hash) return; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; list_lock = &hash->list_locks[i]; spin_lock_bh(list_lock); hlist_for_each_entry_safe(tt_common, node_tmp, head, hash_entry) { if (!(tt_common->flags & BATADV_TT_CLIENT_PENDING)) continue; batadv_dbg(BATADV_DBG_TT, bat_priv, "Deleting local tt entry (%pM, vid: %d): pending\n", tt_common->addr, batadv_print_vid(tt_common->vid)); batadv_tt_local_size_dec(bat_priv, tt_common->vid); hlist_del_rcu(&tt_common->hash_entry); tt_local = container_of(tt_common, struct batadv_tt_local_entry, common); batadv_tt_local_entry_put(tt_local); } spin_unlock_bh(list_lock); } } /** * batadv_tt_local_commit_changes_nolock() - commit all pending local tt changes * which have been queued in the time since the last commit * @bat_priv: the bat priv with all the soft interface information * * Caller must hold tt->commit_lock. */ static void batadv_tt_local_commit_changes_nolock(struct batadv_priv *bat_priv) { lockdep_assert_held(&bat_priv->tt.commit_lock); if (atomic_read(&bat_priv->tt.local_changes) < 1) { if (!batadv_atomic_dec_not_zero(&bat_priv->tt.ogm_append_cnt)) batadv_tt_tvlv_container_update(bat_priv); return; } batadv_tt_local_set_flags(bat_priv, BATADV_TT_CLIENT_NEW, false, true); batadv_tt_local_purge_pending_clients(bat_priv); batadv_tt_local_update_crc(bat_priv); /* Increment the TTVN only once per OGM interval */ atomic_inc(&bat_priv->tt.vn); batadv_dbg(BATADV_DBG_TT, bat_priv, "Local changes committed, updating to ttvn %u\n", (u8)atomic_read(&bat_priv->tt.vn)); /* reset the sending counter */ atomic_set(&bat_priv->tt.ogm_append_cnt, BATADV_TT_OGM_APPEND_MAX); batadv_tt_tvlv_container_update(bat_priv); } /** * batadv_tt_local_commit_changes() - commit all pending local tt changes which * have been queued in the time since the last commit * @bat_priv: the bat priv with all the soft interface information */ void batadv_tt_local_commit_changes(struct batadv_priv *bat_priv) { spin_lock_bh(&bat_priv->tt.commit_lock); batadv_tt_local_commit_changes_nolock(bat_priv); spin_unlock_bh(&bat_priv->tt.commit_lock); } /** * batadv_is_ap_isolated() - Check if packet from upper layer should be dropped * @bat_priv: the bat priv with all the soft interface information * @src: source mac address of packet * @dst: destination mac address of packet * @vid: vlan id of packet * * Return: true when src+dst(+vid) pair should be isolated, false otherwise */ bool batadv_is_ap_isolated(struct batadv_priv *bat_priv, u8 *src, u8 *dst, unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry; struct batadv_tt_global_entry *tt_global_entry; struct batadv_softif_vlan *vlan; bool ret = false; vlan = batadv_softif_vlan_get(bat_priv, vid); if (!vlan) return false; if (!atomic_read(&vlan->ap_isolation)) goto vlan_put; tt_local_entry = batadv_tt_local_hash_find(bat_priv, dst, vid); if (!tt_local_entry) goto vlan_put; tt_global_entry = batadv_tt_global_hash_find(bat_priv, src, vid); if (!tt_global_entry) goto local_entry_put; if (_batadv_is_ap_isolated(tt_local_entry, tt_global_entry)) ret = true; batadv_tt_global_entry_put(tt_global_entry); local_entry_put: batadv_tt_local_entry_put(tt_local_entry); vlan_put: batadv_softif_vlan_put(vlan); return ret; } /** * batadv_tt_update_orig() - update global translation table with new tt * information received via ogms * @bat_priv: the bat priv with all the soft interface information * @orig_node: the orig_node of the ogm * @tt_buff: pointer to the first tvlv VLAN entry * @tt_num_vlan: number of tvlv VLAN entries * @tt_change: pointer to the first entry in the TT buffer * @tt_num_changes: number of tt changes inside the tt buffer * @ttvn: translation table version number of this changeset */ static void batadv_tt_update_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const void *tt_buff, u16 tt_num_vlan, struct batadv_tvlv_tt_change *tt_change, u16 tt_num_changes, u8 ttvn) { u8 orig_ttvn = (u8)atomic_read(&orig_node->last_ttvn); struct batadv_tvlv_tt_vlan_data *tt_vlan; bool full_table = true; bool has_tt_init; tt_vlan = (struct batadv_tvlv_tt_vlan_data *)tt_buff; has_tt_init = test_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized); /* orig table not initialised AND first diff is in the OGM OR the ttvn * increased by one -> we can apply the attached changes */ if ((!has_tt_init && ttvn == 1) || ttvn - orig_ttvn == 1) { /* the OGM could not contain the changes due to their size or * because they have already been sent BATADV_TT_OGM_APPEND_MAX * times. * In this case send a tt request */ if (!tt_num_changes) { full_table = false; goto request_table; } spin_lock_bh(&orig_node->tt_lock); batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes, ttvn, tt_change); /* Even if we received the precomputed crc with the OGM, we * prefer to recompute it to spot any possible inconsistency * in the global table */ batadv_tt_global_update_crc(bat_priv, orig_node); spin_unlock_bh(&orig_node->tt_lock); /* The ttvn alone is not enough to guarantee consistency * because a single value could represent different states * (due to the wrap around). Thus a node has to check whether * the resulting table (after applying the changes) is still * consistent or not. E.g. a node could disconnect while its * ttvn is X and reconnect on ttvn = X + TTVN_MAX: in this case * checking the CRC value is mandatory to detect the * inconsistency */ if (!batadv_tt_global_check_crc(orig_node, tt_vlan, tt_num_vlan)) goto request_table; } else { /* if we missed more than one change or our tables are not * in sync anymore -> request fresh tt data */ if (!has_tt_init || ttvn != orig_ttvn || !batadv_tt_global_check_crc(orig_node, tt_vlan, tt_num_vlan)) { request_table: batadv_dbg(BATADV_DBG_TT, bat_priv, "TT inconsistency for %pM. Need to retrieve the correct information (ttvn: %u last_ttvn: %u num_changes: %u)\n", orig_node->orig, ttvn, orig_ttvn, tt_num_changes); batadv_send_tt_request(bat_priv, orig_node, ttvn, tt_vlan, tt_num_vlan, full_table); return; } } } /** * batadv_tt_global_client_is_roaming() - check if a client is marked as roaming * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client to check * @vid: VLAN identifier * * Return: true if we know that the client has moved from its old originator * to another one. This entry is still kept for consistency purposes and will be * deleted later by a DEL or because of timeout */ bool batadv_tt_global_client_is_roaming(struct batadv_priv *bat_priv, u8 *addr, unsigned short vid) { struct batadv_tt_global_entry *tt_global_entry; bool ret = false; tt_global_entry = batadv_tt_global_hash_find(bat_priv, addr, vid); if (!tt_global_entry) goto out; ret = tt_global_entry->common.flags & BATADV_TT_CLIENT_ROAM; batadv_tt_global_entry_put(tt_global_entry); out: return ret; } /** * batadv_tt_local_client_is_roaming() - tells whether the client is roaming * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the local client to query * @vid: VLAN identifier * * Return: true if the local client is known to be roaming (it is not served by * this node anymore) or not. If yes, the client is still present in the table * to keep the latter consistent with the node TTVN */ bool batadv_tt_local_client_is_roaming(struct batadv_priv *bat_priv, u8 *addr, unsigned short vid) { struct batadv_tt_local_entry *tt_local_entry; bool ret = false; tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid); if (!tt_local_entry) goto out; ret = tt_local_entry->common.flags & BATADV_TT_CLIENT_ROAM; batadv_tt_local_entry_put(tt_local_entry); out: return ret; } /** * batadv_tt_add_temporary_global_entry() - Add temporary entry to global TT * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node which the temporary entry should be associated with * @addr: mac address of the client * @vid: VLAN id of the new temporary global translation table * * Return: true when temporary tt entry could be added, false otherwise */ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, const unsigned char *addr, unsigned short vid) { /* ignore loop detect macs, they are not supposed to be in the tt local * data as well. */ if (batadv_bla_is_loopdetect_mac(addr)) return false; if (!batadv_tt_global_add(bat_priv, orig_node, addr, vid, BATADV_TT_CLIENT_TEMP, atomic_read(&orig_node->last_ttvn))) return false; batadv_dbg(BATADV_DBG_TT, bat_priv, "Added temporary global client (addr: %pM, vid: %d, orig: %pM)\n", addr, batadv_print_vid(vid), orig_node->orig); return true; } /** * batadv_tt_local_resize_to_mtu() - resize the local translation table fit the * maximum packet size that can be transported through the mesh * @soft_iface: netdev struct of the mesh interface * * Remove entries older than 'timeout' and half timeout if more entries need * to be removed. */ void batadv_tt_local_resize_to_mtu(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); int packet_size_max = atomic_read(&bat_priv->packet_size_max); int table_size, timeout = BATADV_TT_LOCAL_TIMEOUT / 2; bool reduced = false; spin_lock_bh(&bat_priv->tt.commit_lock); while (timeout) { table_size = batadv_tt_local_table_transmit_size(bat_priv); if (packet_size_max >= table_size) break; batadv_tt_local_purge(bat_priv, timeout); batadv_tt_local_purge_pending_clients(bat_priv); timeout /= 2; reduced = true; net_ratelimited_function(batadv_info, soft_iface, "Forced to purge local tt entries to fit new maximum fragment MTU (%i)\n", packet_size_max); } /* commit these changes immediately, to avoid synchronization problem * with the TTVN */ if (reduced) batadv_tt_local_commit_changes_nolock(bat_priv); spin_unlock_bh(&bat_priv->tt.commit_lock); } /** * batadv_tt_tvlv_ogm_handler_v1() - process incoming tt tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the gateway data * @tvlv_value_len: tvlv buffer length */ static void batadv_tt_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 flags, void *tvlv_value, u16 tvlv_value_len) { struct batadv_tvlv_tt_vlan_data *tt_vlan; struct batadv_tvlv_tt_change *tt_change; struct batadv_tvlv_tt_data *tt_data; u16 num_entries, num_vlan; if (tvlv_value_len < sizeof(*tt_data)) return; tt_data = tvlv_value; tvlv_value_len -= sizeof(*tt_data); num_vlan = ntohs(tt_data->num_vlan); if (tvlv_value_len < sizeof(*tt_vlan) * num_vlan) return; tt_vlan = (struct batadv_tvlv_tt_vlan_data *)(tt_data + 1); tt_change = (struct batadv_tvlv_tt_change *)(tt_vlan + num_vlan); tvlv_value_len -= sizeof(*tt_vlan) * num_vlan; num_entries = batadv_tt_entries(tvlv_value_len); batadv_tt_update_orig(bat_priv, orig, tt_vlan, num_vlan, tt_change, num_entries, tt_data->ttvn); } /** * batadv_tt_tvlv_unicast_handler_v1() - process incoming (unicast) tt tvlv * container * @bat_priv: the bat priv with all the soft interface information * @src: mac address of tt tvlv sender * @dst: mac address of tt tvlv recipient * @tvlv_value: tvlv buffer containing the tt data * @tvlv_value_len: tvlv buffer length * * Return: NET_RX_DROP if the tt tvlv is to be re-routed, NET_RX_SUCCESS * otherwise. */ static int batadv_tt_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, u8 *src, u8 *dst, void *tvlv_value, u16 tvlv_value_len) { struct batadv_tvlv_tt_data *tt_data; u16 tt_vlan_len, tt_num_entries; char tt_flag; bool ret; if (tvlv_value_len < sizeof(*tt_data)) return NET_RX_SUCCESS; tt_data = tvlv_value; tvlv_value_len -= sizeof(*tt_data); tt_vlan_len = sizeof(struct batadv_tvlv_tt_vlan_data); tt_vlan_len *= ntohs(tt_data->num_vlan); if (tvlv_value_len < tt_vlan_len) return NET_RX_SUCCESS; tvlv_value_len -= tt_vlan_len; tt_num_entries = batadv_tt_entries(tvlv_value_len); switch (tt_data->flags & BATADV_TT_DATA_TYPE_MASK) { case BATADV_TT_REQUEST: batadv_inc_counter(bat_priv, BATADV_CNT_TT_REQUEST_RX); /* If this node cannot provide a TT response the tt_request is * forwarded */ ret = batadv_send_tt_response(bat_priv, tt_data, src, dst); if (!ret) { if (tt_data->flags & BATADV_TT_FULL_TABLE) tt_flag = 'F'; else tt_flag = '.'; batadv_dbg(BATADV_DBG_TT, bat_priv, "Routing TT_REQUEST to %pM [%c]\n", dst, tt_flag); /* tvlv API will re-route the packet */ return NET_RX_DROP; } break; case BATADV_TT_RESPONSE: batadv_inc_counter(bat_priv, BATADV_CNT_TT_RESPONSE_RX); if (batadv_is_my_mac(bat_priv, dst)) { batadv_handle_tt_response(bat_priv, tt_data, src, tt_num_entries); return NET_RX_SUCCESS; } if (tt_data->flags & BATADV_TT_FULL_TABLE) tt_flag = 'F'; else tt_flag = '.'; batadv_dbg(BATADV_DBG_TT, bat_priv, "Routing TT_RESPONSE to %pM [%c]\n", dst, tt_flag); /* tvlv API will re-route the packet */ return NET_RX_DROP; } return NET_RX_SUCCESS; } /** * batadv_roam_tvlv_unicast_handler_v1() - process incoming tt roam tvlv * container * @bat_priv: the bat priv with all the soft interface information * @src: mac address of tt tvlv sender * @dst: mac address of tt tvlv recipient * @tvlv_value: tvlv buffer containing the tt data * @tvlv_value_len: tvlv buffer length * * Return: NET_RX_DROP if the tt roam tvlv is to be re-routed, NET_RX_SUCCESS * otherwise. */ static int batadv_roam_tvlv_unicast_handler_v1(struct batadv_priv *bat_priv, u8 *src, u8 *dst, void *tvlv_value, u16 tvlv_value_len) { struct batadv_tvlv_roam_adv *roaming_adv; struct batadv_orig_node *orig_node = NULL; /* If this node is not the intended recipient of the * roaming advertisement the packet is forwarded * (the tvlv API will re-route the packet). */ if (!batadv_is_my_mac(bat_priv, dst)) return NET_RX_DROP; if (tvlv_value_len < sizeof(*roaming_adv)) goto out; orig_node = batadv_orig_hash_find(bat_priv, src); if (!orig_node) goto out; batadv_inc_counter(bat_priv, BATADV_CNT_TT_ROAM_ADV_RX); roaming_adv = tvlv_value; batadv_dbg(BATADV_DBG_TT, bat_priv, "Received ROAMING_ADV from %pM (client %pM)\n", src, roaming_adv->client); batadv_tt_global_add(bat_priv, orig_node, roaming_adv->client, ntohs(roaming_adv->vid), BATADV_TT_CLIENT_ROAM, atomic_read(&orig_node->last_ttvn) + 1); out: batadv_orig_node_put(orig_node); return NET_RX_SUCCESS; } /** * batadv_tt_init() - initialise the translation table internals * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or negative error number in case of failure. */ int batadv_tt_init(struct batadv_priv *bat_priv) { int ret; /* synchronized flags must be remote */ BUILD_BUG_ON(!(BATADV_TT_SYNC_MASK & BATADV_TT_REMOTE_MASK)); ret = batadv_tt_local_init(bat_priv); if (ret < 0) return ret; ret = batadv_tt_global_init(bat_priv); if (ret < 0) { batadv_tt_local_table_free(bat_priv); return ret; } batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1, batadv_tt_tvlv_unicast_handler_v1, NULL, BATADV_TVLV_TT, 1, BATADV_NO_FLAGS); batadv_tvlv_handler_register(bat_priv, NULL, batadv_roam_tvlv_unicast_handler_v1, NULL, BATADV_TVLV_ROAM, 1, BATADV_NO_FLAGS); INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge); queue_delayed_work(batadv_event_workqueue, &bat_priv->tt.work, msecs_to_jiffies(BATADV_TT_WORK_PERIOD)); return 1; } /** * batadv_tt_global_is_isolated() - check if a client is marked as isolated * @bat_priv: the bat priv with all the soft interface information * @addr: the mac address of the client * @vid: the identifier of the VLAN where this client is connected * * Return: true if the client is marked with the TT_CLIENT_ISOLA flag, false * otherwise */ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid) { struct batadv_tt_global_entry *tt; bool ret; tt = batadv_tt_global_hash_find(bat_priv, addr, vid); if (!tt) return false; ret = tt->common.flags & BATADV_TT_CLIENT_ISOLA; batadv_tt_global_entry_put(tt); return ret; } /** * batadv_tt_cache_init() - Initialize tt memory object cache * * Return: 0 on success or negative error number in case of failure. */ int __init batadv_tt_cache_init(void) { size_t tl_size = sizeof(struct batadv_tt_local_entry); size_t tg_size = sizeof(struct batadv_tt_global_entry); size_t tt_orig_size = sizeof(struct batadv_tt_orig_list_entry); size_t tt_change_size = sizeof(struct batadv_tt_change_node); size_t tt_req_size = sizeof(struct batadv_tt_req_node); size_t tt_roam_size = sizeof(struct batadv_tt_roam_node); batadv_tl_cache = kmem_cache_create("batadv_tl_cache", tl_size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!batadv_tl_cache) return -ENOMEM; batadv_tg_cache = kmem_cache_create("batadv_tg_cache", tg_size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!batadv_tg_cache) goto err_tt_tl_destroy; batadv_tt_orig_cache = kmem_cache_create("batadv_tt_orig_cache", tt_orig_size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!batadv_tt_orig_cache) goto err_tt_tg_destroy; batadv_tt_change_cache = kmem_cache_create("batadv_tt_change_cache", tt_change_size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!batadv_tt_change_cache) goto err_tt_orig_destroy; batadv_tt_req_cache = kmem_cache_create("batadv_tt_req_cache", tt_req_size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!batadv_tt_req_cache) goto err_tt_change_destroy; batadv_tt_roam_cache = kmem_cache_create("batadv_tt_roam_cache", tt_roam_size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!batadv_tt_roam_cache) goto err_tt_req_destroy; return 0; err_tt_req_destroy: kmem_cache_destroy(batadv_tt_req_cache); batadv_tt_req_cache = NULL; err_tt_change_destroy: kmem_cache_destroy(batadv_tt_change_cache); batadv_tt_change_cache = NULL; err_tt_orig_destroy: kmem_cache_destroy(batadv_tt_orig_cache); batadv_tt_orig_cache = NULL; err_tt_tg_destroy: kmem_cache_destroy(batadv_tg_cache); batadv_tg_cache = NULL; err_tt_tl_destroy: kmem_cache_destroy(batadv_tl_cache); batadv_tl_cache = NULL; return -ENOMEM; } /** * batadv_tt_cache_destroy() - Destroy tt memory object cache */ void batadv_tt_cache_destroy(void) { kmem_cache_destroy(batadv_tl_cache); kmem_cache_destroy(batadv_tg_cache); kmem_cache_destroy(batadv_tt_orig_cache); kmem_cache_destroy(batadv_tt_change_cache); kmem_cache_destroy(batadv_tt_req_cache); kmem_cache_destroy(batadv_tt_roam_cache); } |
| 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 | // SPDX-License-Identifier: GPL-2.0 /* * io_misc.c - fallocate, fpunch, truncate: */ #include "bcachefs.h" #include "alloc_foreground.h" #include "bkey_buf.h" #include "btree_update.h" #include "buckets.h" #include "clock.h" #include "error.h" #include "extents.h" #include "extent_update.h" #include "inode.h" #include "io_misc.h" #include "io_write.h" #include "logged_ops.h" #include "rebalance.h" #include "subvolume.h" /* Overwrites whatever was present with zeroes: */ int bch2_extent_fallocate(struct btree_trans *trans, subvol_inum inum, struct btree_iter *iter, u64 sectors, struct bch_io_opts opts, s64 *i_sectors_delta, struct write_point_specifier write_point) { struct bch_fs *c = trans->c; struct disk_reservation disk_res = { 0 }; struct closure cl; struct open_buckets open_buckets = { 0 }; struct bkey_s_c k; struct bkey_buf old, new; unsigned sectors_allocated = 0, new_replicas; bool unwritten = opts.nocow && c->sb.version >= bcachefs_metadata_version_unwritten_extents; int ret; bch2_bkey_buf_init(&old); bch2_bkey_buf_init(&new); closure_init_stack(&cl); k = bch2_btree_iter_peek_slot(iter); ret = bkey_err(k); if (ret) return ret; sectors = min_t(u64, sectors, k.k->p.offset - iter->pos.offset); new_replicas = max(0, (int) opts.data_replicas - (int) bch2_bkey_nr_ptrs_fully_allocated(k)); /* * Get a disk reservation before (in the nocow case) calling * into the allocator: */ ret = bch2_disk_reservation_get(c, &disk_res, sectors, new_replicas, 0); if (unlikely(ret)) goto err_noprint; bch2_bkey_buf_reassemble(&old, c, k); if (!unwritten) { struct bkey_i_reservation *reservation; bch2_bkey_buf_realloc(&new, c, sizeof(*reservation) / sizeof(u64)); reservation = bkey_reservation_init(new.k); reservation->k.p = iter->pos; bch2_key_resize(&reservation->k, sectors); reservation->v.nr_replicas = opts.data_replicas; } else { struct bkey_i_extent *e; struct bch_devs_list devs_have; struct write_point *wp; devs_have.nr = 0; bch2_bkey_buf_realloc(&new, c, BKEY_EXTENT_U64s_MAX); e = bkey_extent_init(new.k); e->k.p = iter->pos; ret = bch2_alloc_sectors_start_trans(trans, opts.foreground_target, false, write_point, &devs_have, opts.data_replicas, opts.data_replicas, BCH_WATERMARK_normal, 0, &cl, &wp); if (bch2_err_matches(ret, BCH_ERR_operation_blocked)) ret = -BCH_ERR_transaction_restart_nested; if (ret) goto err; sectors = min_t(u64, sectors, wp->sectors_free); sectors_allocated = sectors; bch2_key_resize(&e->k, sectors); bch2_open_bucket_get(c, wp, &open_buckets); bch2_alloc_sectors_append_ptrs(c, wp, &e->k_i, sectors, false); bch2_alloc_sectors_done(c, wp); extent_for_each_ptr(extent_i_to_s(e), ptr) ptr->unwritten = true; } ret = bch2_extent_update(trans, inum, iter, new.k, &disk_res, 0, i_sectors_delta, true); err: if (!ret && sectors_allocated) bch2_increment_clock(c, sectors_allocated, WRITE); if (should_print_err(ret)) bch_err_inum_offset_ratelimited(c, inum.inum, iter->pos.offset << 9, "%s(): error: %s", __func__, bch2_err_str(ret)); err_noprint: bch2_open_buckets_put(c, &open_buckets); bch2_disk_reservation_put(c, &disk_res); bch2_bkey_buf_exit(&new, c); bch2_bkey_buf_exit(&old, c); if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); bch2_wait_on_allocator(c, &cl); } return ret; } /* * Returns -BCH_ERR_transacton_restart if we had to drop locks: */ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter, subvol_inum inum, u64 end, s64 *i_sectors_delta) { struct bch_fs *c = trans->c; unsigned max_sectors = KEY_SIZE_MAX & (~0 << c->block_bits); struct bpos end_pos = POS(inum.inum, end); struct bkey_s_c k; int ret = 0, ret2 = 0; u32 snapshot; while (!ret || bch2_err_matches(ret, BCH_ERR_transaction_restart)) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete; if (ret) ret2 = ret; bch2_trans_begin(trans); ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) continue; bch2_btree_iter_set_snapshot(iter, snapshot); /* * peek_upto() doesn't have ideal semantics for extents: */ k = bch2_btree_iter_peek_upto(iter, end_pos); if (!k.k) break; ret = bkey_err(k); if (ret) continue; bkey_init(&delete.k); delete.k.p = iter->pos; /* create the biggest key we can */ bch2_key_resize(&delete.k, max_sectors); bch2_cut_back(end_pos, &delete); ret = bch2_extent_update(trans, inum, iter, &delete, &disk_res, 0, i_sectors_delta, false); bch2_disk_reservation_put(c, &disk_res); } return ret ?: ret2; } int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end, s64 *i_sectors_delta) { struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter; int ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, start), BTREE_ITER_intent); ret = bch2_fpunch_at(trans, &iter, inum, end, i_sectors_delta); bch2_trans_iter_exit(trans, &iter); bch2_trans_put(trans); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; return ret; } /* truncate: */ void bch2_logged_op_truncate_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_logged_op_truncate op = bkey_s_c_to_logged_op_truncate(k); prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol)); prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum)); prt_printf(out, " new_i_size=%llu", le64_to_cpu(op.v->new_i_size)); } static int truncate_set_isize(struct btree_trans *trans, subvol_inum inum, u64 new_i_size) { struct btree_iter iter = { NULL }; struct bch_inode_unpacked inode_u; int ret; ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent) ?: (inode_u.bi_size = new_i_size, 0) ?: bch2_inode_write(trans, &iter, &inode_u); bch2_trans_iter_exit(trans, &iter); return ret; } static int __bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k, u64 *i_sectors_delta) { struct bch_fs *c = trans->c; struct btree_iter fpunch_iter; struct bkey_i_logged_op_truncate *op = bkey_i_to_logged_op_truncate(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; u64 new_i_size = le64_to_cpu(op->v.new_i_size); int ret; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, truncate_set_isize(trans, inum, new_i_size)); if (ret) goto err; bch2_trans_iter_init(trans, &fpunch_iter, BTREE_ID_extents, POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9), BTREE_ITER_intent); ret = bch2_fpunch_at(trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta); bch2_trans_iter_exit(trans, &fpunch_iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) ret = 0; err: bch2_logged_op_finish(trans, op_k); bch_err_fn(c, ret); return ret; } int bch2_resume_logged_op_truncate(struct btree_trans *trans, struct bkey_i *op_k) { return __bch2_resume_logged_op_truncate(trans, op_k, NULL); } int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta) { struct bkey_i_logged_op_truncate op; bkey_logged_op_truncate_init(&op.k_i); op.v.subvol = cpu_to_le32(inum.subvol); op.v.inum = cpu_to_le64(inum.inum); op.v.new_i_size = cpu_to_le64(new_i_size); /* * Logged ops aren't atomic w.r.t. snapshot creation: creating a * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); int ret = bch2_trans_run(c, bch2_logged_op_start(trans, &op.k_i) ?: __bch2_resume_logged_op_truncate(trans, &op.k_i, i_sectors_delta)); up_read(&c->snapshot_create_lock); return ret; } /* finsert/fcollapse: */ void bch2_logged_op_finsert_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_s_c_logged_op_finsert op = bkey_s_c_to_logged_op_finsert(k); prt_printf(out, "subvol=%u", le32_to_cpu(op.v->subvol)); prt_printf(out, " inum=%llu", le64_to_cpu(op.v->inum)); prt_printf(out, " dst_offset=%lli", le64_to_cpu(op.v->dst_offset)); prt_printf(out, " src_offset=%llu", le64_to_cpu(op.v->src_offset)); } static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len) { struct btree_iter iter; struct bch_inode_unpacked inode_u; int ret; offset <<= 9; len <<= 9; ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_intent); if (ret) return ret; if (len > 0) { if (MAX_LFS_FILESIZE - inode_u.bi_size < len) { ret = -EFBIG; goto err; } if (offset >= inode_u.bi_size) { ret = -EINVAL; goto err; } } inode_u.bi_size += len; inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c); ret = bch2_inode_write(trans, &iter, &inode_u); err: bch2_trans_iter_exit(trans, &iter); return ret; } static int __bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k, u64 *i_sectors_delta) { struct bch_fs *c = trans->c; struct btree_iter iter; struct bkey_i_logged_op_finsert *op = bkey_i_to_logged_op_finsert(op_k); subvol_inum inum = { le32_to_cpu(op->v.subvol), le64_to_cpu(op->v.inum) }; struct bch_io_opts opts; u64 dst_offset = le64_to_cpu(op->v.dst_offset); u64 src_offset = le64_to_cpu(op->v.src_offset); s64 shift = dst_offset - src_offset; u64 len = abs(shift); u64 pos = le64_to_cpu(op->v.pos); bool insert = shift > 0; int ret = 0; ret = bch2_inum_opts_get(trans, inum, &opts); if (ret) return ret; bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, POS(inum.inum, 0), BTREE_ITER_intent); switch (op->v.state) { case LOGGED_OP_FINSERT_start: op->v.state = LOGGED_OP_FINSERT_shift_extents; if (insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, src_offset, len) ?: bch2_logged_op_update(trans, &op->k_i)); if (ret) goto err; } else { bch2_btree_iter_set_pos(&iter, POS(inum.inum, src_offset)); ret = bch2_fpunch_at(trans, &iter, inum, src_offset + len, i_sectors_delta); if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart)) goto err; ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, bch2_logged_op_update(trans, &op->k_i)); } fallthrough; case LOGGED_OP_FINSERT_shift_extents: while (1) { struct disk_reservation disk_res = bch2_disk_reservation_init(c, 0); struct bkey_i delete, *copy; struct bkey_s_c k; struct bpos src_pos = POS(inum.inum, src_offset); u32 snapshot; bch2_trans_begin(trans); ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); if (ret) goto btree_err; bch2_btree_iter_set_snapshot(&iter, snapshot); bch2_btree_iter_set_pos(&iter, SPOS(inum.inum, pos, snapshot)); k = insert ? bch2_btree_iter_peek_prev(&iter) : bch2_btree_iter_peek_upto(&iter, POS(inum.inum, U64_MAX)); if ((ret = bkey_err(k))) goto btree_err; if (!k.k || k.k->p.inode != inum.inum || bkey_le(k.k->p, POS(inum.inum, src_offset))) break; copy = bch2_bkey_make_mut_noupdate(trans, k); if ((ret = PTR_ERR_OR_ZERO(copy))) goto btree_err; if (insert && bkey_lt(bkey_start_pos(k.k), src_pos)) { bch2_cut_front(src_pos, copy); /* Splitting compressed extent? */ bch2_disk_reservation_add(c, &disk_res, copy->k.size * bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy)), BCH_DISK_RESERVATION_NOFAIL); } bkey_init(&delete.k); delete.k.p = copy->k.p; delete.k.p.snapshot = snapshot; delete.k.size = copy->k.size; copy->k.p.offset += shift; copy->k.p.snapshot = snapshot; op->v.pos = cpu_to_le64(insert ? bkey_start_offset(&delete.k) : delete.k.p.offset); ret = bch2_bkey_set_needs_rebalance(c, copy, &opts) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, &delete, 0) ?: bch2_btree_insert_trans(trans, BTREE_ID_extents, copy, 0) ?: bch2_logged_op_update(trans, &op->k_i) ?: bch2_trans_commit(trans, &disk_res, NULL, BCH_TRANS_COMMIT_no_enospc); btree_err: bch2_disk_reservation_put(c, &disk_res); if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) continue; if (ret) goto err; pos = le64_to_cpu(op->v.pos); } op->v.state = LOGGED_OP_FINSERT_finish; if (!insert) { ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, src_offset, shift) ?: bch2_logged_op_update(trans, &op->k_i)); } else { /* We need an inode update to update bi_journal_seq for fsync: */ ret = commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, adjust_i_size(trans, inum, 0, 0) ?: bch2_logged_op_update(trans, &op->k_i)); } break; case LOGGED_OP_FINSERT_finish: break; } err: bch_err_fn(c, ret); bch2_logged_op_finish(trans, op_k); bch2_trans_iter_exit(trans, &iter); return ret; } int bch2_resume_logged_op_finsert(struct btree_trans *trans, struct bkey_i *op_k) { return __bch2_resume_logged_op_finsert(trans, op_k, NULL); } int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum, u64 offset, u64 len, bool insert, s64 *i_sectors_delta) { struct bkey_i_logged_op_finsert op; s64 shift = insert ? len : -len; bkey_logged_op_finsert_init(&op.k_i); op.v.subvol = cpu_to_le32(inum.subvol); op.v.inum = cpu_to_le64(inum.inum); op.v.dst_offset = cpu_to_le64(offset + shift); op.v.src_offset = cpu_to_le64(offset); op.v.pos = cpu_to_le64(insert ? U64_MAX : offset); /* * Logged ops aren't atomic w.r.t. snapshot creation: creating a * snapshot while they're in progress, then crashing, will result in the * resume only proceeding in one of the snapshots */ down_read(&c->snapshot_create_lock); int ret = bch2_trans_run(c, bch2_logged_op_start(trans, &op.k_i) ?: __bch2_resume_logged_op_finsert(trans, &op.k_i, i_sectors_delta)); up_read(&c->snapshot_create_lock); return ret; } |
| 23 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 | // SPDX-License-Identifier: GPL-2.0-only /* * vivid-cec.c - A Virtual Video Test Driver, cec emulation * * Copyright 2016 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/delay.h> #include <media/cec.h> #include "vivid-core.h" #include "vivid-cec.h" #define CEC_START_BIT_US 4500 #define CEC_DATA_BIT_US 2400 #define CEC_MARGIN_US 350 struct xfer_on_bus { struct cec_adapter *adap; u8 status; }; static bool find_dest_adap(struct vivid_dev *dev, struct cec_adapter *adap, u8 dest) { unsigned int i, j; if (dest >= 0xf) return false; if (adap != dev->cec_rx_adap && dev->cec_rx_adap && dev->cec_rx_adap->is_configured && cec_has_log_addr(dev->cec_rx_adap, dest)) return true; for (i = 0, j = 0; i < dev->num_inputs; i++) { unsigned int menu_idx = dev->input_is_connected_to_output[i]; if (dev->input_type[i] != HDMI) continue; j++; if (menu_idx < FIXED_MENU_ITEMS) continue; struct vivid_dev *dev_tx = vivid_ctrl_hdmi_to_output_instance[menu_idx]; unsigned int output = vivid_ctrl_hdmi_to_output_index[menu_idx]; if (!dev_tx) continue; unsigned int hdmi_output = dev_tx->output_to_iface_index[output]; if (adap == dev_tx->cec_tx_adap[hdmi_output]) continue; if (!dev_tx->cec_tx_adap[hdmi_output]->is_configured) continue; if (cec_has_log_addr(dev_tx->cec_tx_adap[hdmi_output], dest)) return true; } return false; } static bool xfer_ready(struct vivid_dev *dev) { unsigned int i; bool ready = false; spin_lock(&dev->cec_xfers_slock); for (i = 0; i < ARRAY_SIZE(dev->xfers); i++) { if (dev->xfers[i].sft && dev->xfers[i].sft <= dev->cec_sft) { ready = true; break; } } spin_unlock(&dev->cec_xfers_slock); return ready; } /* * If an adapter tries to send successive messages, it must wait for the * longest signal-free time between its transmissions. But, if another * adapter sends a message in the interim, then the wait can be reduced * because the messages are no longer successive. Make these adjustments * if necessary. Should be called holding cec_xfers_slock. */ static void adjust_sfts(struct vivid_dev *dev) { unsigned int i; u8 initiator; for (i = 0; i < ARRAY_SIZE(dev->xfers); i++) { if (dev->xfers[i].sft <= CEC_SIGNAL_FREE_TIME_RETRY) continue; initiator = dev->xfers[i].msg[0] >> 4; if (initiator == dev->last_initiator) dev->xfers[i].sft = CEC_SIGNAL_FREE_TIME_NEXT_XFER; else dev->xfers[i].sft = CEC_SIGNAL_FREE_TIME_NEW_INITIATOR; } } /* * The main emulation of the bus on which CEC adapters attempt to send * messages to each other. The bus keeps track of how long it has been * signal-free and accepts a pending transmission only if the state of * the bus matches the transmission's signal-free requirements. It calls * cec_transmit_attempt_done() for all transmits that enter the bus and * cec_received_msg() for successful transmits. */ int vivid_cec_bus_thread(void *_dev) { u32 last_sft; unsigned int i, j; unsigned int dest; ktime_t start, end; s64 delta_us, retry_us; struct vivid_dev *dev = _dev; dev->cec_sft = CEC_SIGNAL_FREE_TIME_NEXT_XFER; for (;;) { bool first = true; int wait_xfer_us = 0; bool valid_dest = false; int wait_arb_lost_us = 0; unsigned int first_idx = 0; unsigned int first_status = 0; struct cec_msg first_msg = {}; struct xfer_on_bus xfers_on_bus[MAX_OUTPUTS] = {}; wait_event_interruptible(dev->kthread_waitq_cec, xfer_ready(dev) || kthread_should_stop()); if (kthread_should_stop()) break; last_sft = dev->cec_sft; dev->cec_sft = 0; /* * Move the messages that are ready onto the bus. The adapter with * the most leading zeros will win control of the bus and any other * adapters will lose arbitration. */ spin_lock(&dev->cec_xfers_slock); for (i = 0; i < ARRAY_SIZE(dev->xfers); i++) { if (!dev->xfers[i].sft || dev->xfers[i].sft > last_sft) continue; if (first) { first = false; first_idx = i; xfers_on_bus[first_idx].adap = dev->xfers[i].adap; memcpy(first_msg.msg, dev->xfers[i].msg, dev->xfers[i].len); first_msg.len = dev->xfers[i].len; } else { xfers_on_bus[i].adap = dev->xfers[i].adap; xfers_on_bus[i].status = CEC_TX_STATUS_ARB_LOST; /* * For simplicity wait for all 4 bits of the initiator's * address even though HDMI specification uses bit-level * precision. */ wait_arb_lost_us = 4 * CEC_DATA_BIT_US + CEC_START_BIT_US; } dev->xfers[i].sft = 0; } dev->last_initiator = cec_msg_initiator(&first_msg); adjust_sfts(dev); spin_unlock(&dev->cec_xfers_slock); dest = cec_msg_destination(&first_msg); valid_dest = cec_msg_is_broadcast(&first_msg); if (!valid_dest) valid_dest = find_dest_adap(dev, xfers_on_bus[first_idx].adap, dest); if (valid_dest) { first_status = CEC_TX_STATUS_OK; /* * Message length is in bytes, but each byte is transmitted in * a block of 10 bits. */ wait_xfer_us = first_msg.len * 10 * CEC_DATA_BIT_US; } else { first_status = CEC_TX_STATUS_NACK; /* * A message that is not acknowledged stops transmitting after * the header block of 10 bits. */ wait_xfer_us = 10 * CEC_DATA_BIT_US; } wait_xfer_us += CEC_START_BIT_US; xfers_on_bus[first_idx].status = first_status; /* Sleep as if sending messages on a real hardware bus. */ start = ktime_get(); if (wait_arb_lost_us) { usleep_range(wait_arb_lost_us - CEC_MARGIN_US, wait_arb_lost_us); for (i = 0; i < ARRAY_SIZE(xfers_on_bus); i++) { if (xfers_on_bus[i].status != CEC_TX_STATUS_ARB_LOST) continue; cec_transmit_attempt_done(xfers_on_bus[i].adap, CEC_TX_STATUS_ARB_LOST); } if (kthread_should_stop()) break; } wait_xfer_us -= wait_arb_lost_us; usleep_range(wait_xfer_us - CEC_MARGIN_US, wait_xfer_us); cec_transmit_attempt_done(xfers_on_bus[first_idx].adap, first_status); if (kthread_should_stop()) break; if (first_status == CEC_TX_STATUS_OK) { if (xfers_on_bus[first_idx].adap != dev->cec_rx_adap) cec_received_msg(dev->cec_rx_adap, &first_msg); for (i = 0, j = 0; i < dev->num_inputs; i++) { unsigned int menu_idx = dev->input_is_connected_to_output[i]; if (dev->input_type[i] != HDMI) continue; j++; if (menu_idx < FIXED_MENU_ITEMS) continue; struct vivid_dev *dev_tx = vivid_ctrl_hdmi_to_output_instance[menu_idx]; unsigned int output = vivid_ctrl_hdmi_to_output_index[menu_idx]; if (!dev_tx) continue; unsigned int hdmi_output = dev_tx->output_to_iface_index[output]; if (xfers_on_bus[first_idx].adap != dev_tx->cec_tx_adap[hdmi_output]) cec_received_msg(dev_tx->cec_tx_adap[hdmi_output], &first_msg); } } end = ktime_get(); /* * If the emulated transfer took more or less time than it should * have, then compensate by adjusting the wait time needed for the * bus to be signal-free for 3 bit periods (the retry time). */ delta_us = div_s64(end - start, 1000); delta_us -= wait_xfer_us + wait_arb_lost_us; retry_us = CEC_SIGNAL_FREE_TIME_RETRY * CEC_DATA_BIT_US - delta_us; if (retry_us > CEC_MARGIN_US) usleep_range(retry_us - CEC_MARGIN_US, retry_us); dev->cec_sft = CEC_SIGNAL_FREE_TIME_RETRY; /* * If there are no messages that need to be retried, check if any * adapters that did not just transmit a message are ready to * transmit. If none of these adapters are ready, then increase * the signal-free time so that the bus is available to all * adapters and go back to waiting for a transmission. */ while (dev->cec_sft >= CEC_SIGNAL_FREE_TIME_RETRY && dev->cec_sft < CEC_SIGNAL_FREE_TIME_NEXT_XFER && !xfer_ready(dev) && !kthread_should_stop()) { usleep_range(2 * CEC_DATA_BIT_US - CEC_MARGIN_US, 2 * CEC_DATA_BIT_US); dev->cec_sft += 2; } } return 0; } static int vivid_cec_adap_enable(struct cec_adapter *adap, bool enable) { adap->cec_pin_is_high = true; return 0; } static int vivid_cec_adap_log_addr(struct cec_adapter *adap, u8 log_addr) { return 0; } static int vivid_cec_adap_transmit(struct cec_adapter *adap, u8 attempts, u32 signal_free_time, struct cec_msg *msg) { struct vivid_dev *dev = cec_get_drvdata(adap); struct vivid_dev *dev_rx = dev; u8 idx = cec_msg_initiator(msg); u8 output = 0; if (dev->cec_rx_adap != adap) { int i; for (i = 0; i < dev->num_hdmi_outputs; i++) if (dev->cec_tx_adap[i] == adap) break; if (i == dev->num_hdmi_outputs) return -ENONET; output = dev->hdmi_index_to_output_index[i]; dev_rx = dev->output_to_input_instance[output]; if (!dev_rx) return -ENONET; } spin_lock(&dev_rx->cec_xfers_slock); dev_rx->xfers[idx].adap = adap; memcpy(dev_rx->xfers[idx].msg, msg->msg, CEC_MAX_MSG_SIZE); dev_rx->xfers[idx].len = msg->len; dev_rx->xfers[idx].sft = CEC_SIGNAL_FREE_TIME_RETRY; if (signal_free_time > CEC_SIGNAL_FREE_TIME_RETRY) { if (idx == dev_rx->last_initiator) dev_rx->xfers[idx].sft = CEC_SIGNAL_FREE_TIME_NEXT_XFER; else dev_rx->xfers[idx].sft = CEC_SIGNAL_FREE_TIME_NEW_INITIATOR; } spin_unlock(&dev_rx->cec_xfers_slock); wake_up_interruptible(&dev_rx->kthread_waitq_cec); return 0; } static int vivid_received(struct cec_adapter *adap, struct cec_msg *msg) { struct vivid_dev *dev = cec_get_drvdata(adap); struct cec_msg reply; u8 dest = cec_msg_destination(msg); u8 disp_ctl; char osd[14]; if (cec_msg_is_broadcast(msg)) dest = adap->log_addrs.log_addr[0]; cec_msg_init(&reply, dest, cec_msg_initiator(msg)); switch (cec_msg_opcode(msg)) { case CEC_MSG_SET_OSD_STRING: if (!cec_is_sink(adap)) return -ENOMSG; cec_ops_set_osd_string(msg, &disp_ctl, osd); switch (disp_ctl) { case CEC_OP_DISP_CTL_DEFAULT: strscpy(dev->osd, osd, sizeof(dev->osd)); dev->osd_jiffies = jiffies; break; case CEC_OP_DISP_CTL_UNTIL_CLEARED: strscpy(dev->osd, osd, sizeof(dev->osd)); dev->osd_jiffies = 0; break; case CEC_OP_DISP_CTL_CLEAR: dev->osd[0] = 0; dev->osd_jiffies = 0; break; default: cec_msg_feature_abort(&reply, cec_msg_opcode(msg), CEC_OP_ABORT_INVALID_OP); cec_transmit_msg(adap, &reply, false); break; } break; default: return -ENOMSG; } return 0; } static const struct cec_adap_ops vivid_cec_adap_ops = { .adap_enable = vivid_cec_adap_enable, .adap_log_addr = vivid_cec_adap_log_addr, .adap_transmit = vivid_cec_adap_transmit, .received = vivid_received, }; struct cec_adapter *vivid_cec_alloc_adap(struct vivid_dev *dev, unsigned int idx, bool is_source) { u32 caps = CEC_CAP_DEFAULTS | CEC_CAP_MONITOR_ALL | CEC_CAP_MONITOR_PIN; char name[32]; snprintf(name, sizeof(name), "vivid-%03d-vid-%s%d", dev->inst, is_source ? "out" : "cap", idx); return cec_allocate_adapter(&vivid_cec_adap_ops, dev, name, caps, CEC_MAX_LOG_ADDRS); } |
| 58 3 58 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_TTY_FLIP_H #define _LINUX_TTY_FLIP_H #include <linux/tty_buffer.h> #include <linux/tty_port.h> struct tty_ldisc; int tty_buffer_set_limit(struct tty_port *port, int limit); unsigned int tty_buffer_space_avail(struct tty_port *port); int tty_buffer_request_room(struct tty_port *port, size_t size); size_t __tty_insert_flip_string_flags(struct tty_port *port, const u8 *chars, const u8 *flags, bool mutable_flags, size_t size); size_t tty_prepare_flip_string(struct tty_port *port, u8 **chars, size_t size); void tty_flip_buffer_push(struct tty_port *port); /** * tty_insert_flip_string_fixed_flag - add characters to the tty buffer * @port: tty port * @chars: characters * @flag: flag value for each character * @size: size * * Queue a series of bytes to the tty buffering. All the characters passed are * marked with the supplied flag. * * Returns: the number added. */ static inline size_t tty_insert_flip_string_fixed_flag(struct tty_port *port, const u8 *chars, u8 flag, size_t size) { return __tty_insert_flip_string_flags(port, chars, &flag, false, size); } /** * tty_insert_flip_string_flags - add characters to the tty buffer * @port: tty port * @chars: characters * @flags: flag bytes * @size: size * * Queue a series of bytes to the tty buffering. For each character the flags * array indicates the status of the character. * * Returns: the number added. */ static inline size_t tty_insert_flip_string_flags(struct tty_port *port, const u8 *chars, const u8 *flags, size_t size) { return __tty_insert_flip_string_flags(port, chars, flags, true, size); } /** * tty_insert_flip_char - add one character to the tty buffer * @port: tty port * @ch: character * @flag: flag byte * * Queue a single byte @ch to the tty buffering, with an optional flag. */ static inline size_t tty_insert_flip_char(struct tty_port *port, u8 ch, u8 flag) { struct tty_buffer *tb = port->buf.tail; int change; change = !tb->flags && (flag != TTY_NORMAL); if (!change && tb->used < tb->size) { if (tb->flags) *flag_buf_ptr(tb, tb->used) = flag; *char_buf_ptr(tb, tb->used++) = ch; return 1; } return __tty_insert_flip_string_flags(port, &ch, &flag, false, 1); } static inline size_t tty_insert_flip_string(struct tty_port *port, const u8 *chars, size_t size) { return tty_insert_flip_string_fixed_flag(port, chars, TTY_NORMAL, size); } size_t tty_ldisc_receive_buf(struct tty_ldisc *ld, const u8 *p, const u8 *f, size_t count); void tty_buffer_lock_exclusive(struct tty_port *port); void tty_buffer_unlock_exclusive(struct tty_port *port); #endif /* _LINUX_TTY_FLIP_H */ |
| 1 1 1 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" #include "bitset.h" struct privflags_req_info { struct ethnl_req_info base; }; struct privflags_reply_data { struct ethnl_reply_data base; const char (*priv_flag_names)[ETH_GSTRING_LEN]; unsigned int n_priv_flags; u32 priv_flags; }; #define PRIVFLAGS_REPDATA(__reply_base) \ container_of(__reply_base, struct privflags_reply_data, base) const struct nla_policy ethnl_privflags_get_policy[] = { [ETHTOOL_A_PRIVFLAGS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int ethnl_get_priv_flags_info(struct net_device *dev, unsigned int *count, const char (**names)[ETH_GSTRING_LEN]) { const struct ethtool_ops *ops = dev->ethtool_ops; int nflags; nflags = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); if (nflags < 0) return nflags; if (names) { *names = kcalloc(nflags, ETH_GSTRING_LEN, GFP_KERNEL); if (!*names) return -ENOMEM; ops->get_strings(dev, ETH_SS_PRIV_FLAGS, (u8 *)*names); } /* We can pass more than 32 private flags to userspace via netlink but * we cannot get more with ethtool_ops::get_priv_flags(). Note that we * must not adjust nflags before allocating the space for flag names * as the buffer must be large enough for all flags. */ if (WARN_ONCE(nflags > 32, "device %s reports more than 32 private flags (%d)\n", netdev_name(dev), nflags)) nflags = 32; *count = nflags; return 0; } static int privflags_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); struct net_device *dev = reply_base->dev; const char (*names)[ETH_GSTRING_LEN]; const struct ethtool_ops *ops; unsigned int nflags; int ret; ops = dev->ethtool_ops; if (!ops->get_priv_flags || !ops->get_sset_count || !ops->get_strings) return -EOPNOTSUPP; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; ret = ethnl_get_priv_flags_info(dev, &nflags, &names); if (ret < 0) goto out_ops; data->priv_flags = ops->get_priv_flags(dev); data->priv_flag_names = names; data->n_priv_flags = nflags; out_ops: ethnl_ops_complete(dev); return ret; } static int privflags_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const u32 all_flags = ~(u32)0 >> (32 - data->n_priv_flags); return ethnl_bitset32_size(&data->priv_flags, &all_flags, data->n_priv_flags, data->priv_flag_names, compact); } static int privflags_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; const u32 all_flags = ~(u32)0 >> (32 - data->n_priv_flags); return ethnl_put_bitset32(skb, ETHTOOL_A_PRIVFLAGS_FLAGS, &data->priv_flags, &all_flags, data->n_priv_flags, data->priv_flag_names, compact); } static void privflags_cleanup_data(struct ethnl_reply_data *reply_data) { struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_data); kfree(data->priv_flag_names); } /* PRIVFLAGS_SET */ const struct nla_policy ethnl_privflags_set_policy[] = { [ETHTOOL_A_PRIVFLAGS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_PRIVFLAGS_FLAGS] = { .type = NLA_NESTED }, }; static int ethnl_set_privflags_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; if (!info->attrs[ETHTOOL_A_PRIVFLAGS_FLAGS]) return -EINVAL; if (!ops->get_priv_flags || !ops->set_priv_flags || !ops->get_sset_count || !ops->get_strings) return -EOPNOTSUPP; return 1; } static int ethnl_set_privflags(struct ethnl_req_info *req_info, struct genl_info *info) { const char (*names)[ETH_GSTRING_LEN] = NULL; struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; unsigned int nflags; bool mod = false; bool compact; u32 flags; int ret; ret = ethnl_bitset_is_compact(tb[ETHTOOL_A_PRIVFLAGS_FLAGS], &compact); if (ret < 0) return ret; ret = ethnl_get_priv_flags_info(dev, &nflags, compact ? NULL : &names); if (ret < 0) return ret; flags = dev->ethtool_ops->get_priv_flags(dev); ret = ethnl_update_bitset32(&flags, nflags, tb[ETHTOOL_A_PRIVFLAGS_FLAGS], names, info->extack, &mod); if (ret < 0 || !mod) goto out_free; ret = dev->ethtool_ops->set_priv_flags(dev, flags); if (ret < 0) goto out_free; ret = 1; out_free: kfree(names); return ret; } const struct ethnl_request_ops ethnl_privflags_request_ops = { .request_cmd = ETHTOOL_MSG_PRIVFLAGS_GET, .reply_cmd = ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, .hdr_attr = ETHTOOL_A_PRIVFLAGS_HEADER, .req_info_size = sizeof(struct privflags_req_info), .reply_data_size = sizeof(struct privflags_reply_data), .prepare_data = privflags_prepare_data, .reply_size = privflags_reply_size, .fill_reply = privflags_fill_reply, .cleanup_data = privflags_cleanup_data, .set_validate = ethnl_set_privflags_validate, .set = ethnl_set_privflags, .set_ntf_cmd = ETHTOOL_MSG_PRIVFLAGS_NTF, }; |
| 3 3 11 11 6 4 2 7 4 2 2 3 1 2 2 1 1 6 1 1 3 2 1 1 2 4 3 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * Copyright (c) 2012-2014 Pablo Neira Ayuso <pablo@netfilter.org> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <linux/audit.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/ipv6.h> #include <net/ip.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_log.h> #include <linux/netdevice.h> static const char *nft_log_null_prefix = ""; struct nft_log { struct nf_loginfo loginfo; char *prefix; }; static bool audit_ip4(struct audit_buffer *ab, struct sk_buff *skb) { struct iphdr _iph; const struct iphdr *ih; ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_iph), &_iph); if (!ih) return false; audit_log_format(ab, " saddr=%pI4 daddr=%pI4 proto=%hhu", &ih->saddr, &ih->daddr, ih->protocol); return true; } static bool audit_ip6(struct audit_buffer *ab, struct sk_buff *skb) { struct ipv6hdr _ip6h; const struct ipv6hdr *ih; u8 nexthdr; __be16 frag_off; ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); if (!ih) return false; nexthdr = ih->nexthdr; ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), &nexthdr, &frag_off); audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu", &ih->saddr, &ih->daddr, nexthdr); return true; } static void nft_log_eval_audit(const struct nft_pktinfo *pkt) { struct sk_buff *skb = pkt->skb; struct audit_buffer *ab; int fam = -1; if (!audit_enabled) return; ab = audit_log_start(NULL, GFP_ATOMIC, AUDIT_NETFILTER_PKT); if (!ab) return; audit_log_format(ab, "mark=%#x", skb->mark); switch (nft_pf(pkt)) { case NFPROTO_BRIDGE: switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1; break; case htons(ETH_P_IPV6): fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1; break; } break; case NFPROTO_IPV4: fam = audit_ip4(ab, skb) ? NFPROTO_IPV4 : -1; break; case NFPROTO_IPV6: fam = audit_ip6(ab, skb) ? NFPROTO_IPV6 : -1; break; } if (fam == -1) audit_log_format(ab, " saddr=? daddr=? proto=-1"); audit_log_end(ab); } static void nft_log_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_log *priv = nft_expr_priv(expr); if (priv->loginfo.type == NF_LOG_TYPE_LOG && priv->loginfo.u.log.level == NFT_LOGLEVEL_AUDIT) { nft_log_eval_audit(pkt); return; } nf_log_packet(nft_net(pkt), nft_pf(pkt), nft_hook(pkt), pkt->skb, nft_in(pkt), nft_out(pkt), &priv->loginfo, "%s", priv->prefix); } static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { [NFTA_LOG_GROUP] = { .type = NLA_U16 }, [NFTA_LOG_PREFIX] = { .type = NLA_STRING, .len = NF_LOG_PREFIXLEN - 1 }, [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, [NFTA_LOG_LEVEL] = { .type = NLA_U32 }, [NFTA_LOG_FLAGS] = { .type = NLA_U32 }, }; static int nft_log_modprobe(struct net *net, enum nf_log_type t) { switch (t) { case NF_LOG_TYPE_LOG: return nft_request_module(net, "%s", "nf_log_syslog"); case NF_LOG_TYPE_ULOG: return nft_request_module(net, "%s", "nfnetlink_log"); case NF_LOG_TYPE_MAX: break; } return -ENOENT; } static int nft_log_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; const struct nlattr *nla; int err; li->type = NF_LOG_TYPE_LOG; if (tb[NFTA_LOG_LEVEL] != NULL && tb[NFTA_LOG_GROUP] != NULL) return -EINVAL; if (tb[NFTA_LOG_GROUP] != NULL) { li->type = NF_LOG_TYPE_ULOG; if (tb[NFTA_LOG_FLAGS] != NULL) return -EINVAL; } nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL); if (priv->prefix == NULL) return -ENOMEM; nla_strscpy(priv->prefix, nla, nla_len(nla) + 1); } else { priv->prefix = (char *)nft_log_null_prefix; } switch (li->type) { case NF_LOG_TYPE_LOG: if (tb[NFTA_LOG_LEVEL] != NULL) { li->u.log.level = ntohl(nla_get_be32(tb[NFTA_LOG_LEVEL])); } else { li->u.log.level = NFT_LOGLEVEL_WARNING; } if (li->u.log.level > NFT_LOGLEVEL_AUDIT) { err = -EINVAL; goto err1; } if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); if (li->u.log.logflags & ~NF_LOG_MASK) { err = -EINVAL; goto err1; } } break; case NF_LOG_TYPE_ULOG: li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP])); if (tb[NFTA_LOG_SNAPLEN] != NULL) { li->u.ulog.flags |= NF_LOG_F_COPY_LEN; li->u.ulog.copy_len = ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN])); } if (tb[NFTA_LOG_QTHRESHOLD] != NULL) { li->u.ulog.qthreshold = ntohs(nla_get_be16(tb[NFTA_LOG_QTHRESHOLD])); } break; } if (li->u.log.level == NFT_LOGLEVEL_AUDIT) return 0; err = nf_logger_find_get(ctx->family, li->type); if (err < 0) { if (nft_log_modprobe(ctx->net, li->type) == -EAGAIN) err = -EAGAIN; goto err1; } return 0; err1: if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); return err; } static void nft_log_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) { struct nft_log *priv = nft_expr_priv(expr); struct nf_loginfo *li = &priv->loginfo; if (priv->prefix != nft_log_null_prefix) kfree(priv->prefix); if (li->u.log.level == NFT_LOGLEVEL_AUDIT) return; nf_logger_put(ctx->family, li->type); } static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_log *priv = nft_expr_priv(expr); const struct nf_loginfo *li = &priv->loginfo; if (priv->prefix != nft_log_null_prefix) if (nla_put_string(skb, NFTA_LOG_PREFIX, priv->prefix)) goto nla_put_failure; switch (li->type) { case NF_LOG_TYPE_LOG: if (nla_put_be32(skb, NFTA_LOG_LEVEL, htonl(li->u.log.level))) goto nla_put_failure; if (li->u.log.logflags) { if (nla_put_be32(skb, NFTA_LOG_FLAGS, htonl(li->u.log.logflags))) goto nla_put_failure; } break; case NF_LOG_TYPE_ULOG: if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group))) goto nla_put_failure; if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) { if (nla_put_be32(skb, NFTA_LOG_SNAPLEN, htonl(li->u.ulog.copy_len))) goto nla_put_failure; } if (li->u.ulog.qthreshold) { if (nla_put_be16(skb, NFTA_LOG_QTHRESHOLD, htons(li->u.ulog.qthreshold))) goto nla_put_failure; } break; } return 0; nla_put_failure: return -1; } static struct nft_expr_type nft_log_type; static const struct nft_expr_ops nft_log_ops = { .type = &nft_log_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_log)), .eval = nft_log_eval, .init = nft_log_init, .destroy = nft_log_destroy, .dump = nft_log_dump, .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_log_type __read_mostly = { .name = "log", .ops = &nft_log_ops, .policy = nft_log_policy, .maxattr = NFTA_LOG_MAX, .owner = THIS_MODULE, }; static int __init nft_log_module_init(void) { return nft_register_expr(&nft_log_type); } static void __exit nft_log_module_exit(void) { nft_unregister_expr(&nft_log_type); } module_init(nft_log_module_init); module_exit(nft_log_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("log"); MODULE_DESCRIPTION("Netfilter nf_tables log module"); |
| 63 63 44 41 40 5 5 3 45 46 46 44 4 4 4 1 17 17 3 3 3 14 14 15 3 1 1 2 1 4 4 2 2 4 2 2 2 2 2 2 2 1 11 9 3 6 1 1 11 11 7 1 1 3 1 1 5 5 1 1 4 24 25 10 9 6 4 5 5 6 4 10 2 2 22 20 2 22 3 3 4 2 1 1 12 1 11 22 2 3 2 2 2 21 21 6 1 1 10 6 9 1 5 12 12 11 2 12 2 14 1 1 15 15 4 1 3 2 9 15 14 12 11 2 1 12 12 3 4 12 1 10 10 1 1 3 1 5 6 2 1 2 1 3 1 3 3 1 1 2 9 1 1 14 14 14 13 1 1 13 8 8 8 3 6 5 1 4 8 8 5 3 8 4 5 8 8 5 8 5 4 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * "Ping" sockets * * Based on ipv4/udp.c code. * * Authors: Vasiliy Kulikov / Openwall (for Linux 2.6), * Pavel Kankovsky (for Linux 2.4.32) * * Pavel gave all rights to bugs to Vasiliy, * none of the bugs are Pavel's now. */ #include <linux/uaccess.h> #include <linux/types.h> #include <linux/fcntl.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/in.h> #include <linux/errno.h> #include <linux/timer.h> #include <linux/mm.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <net/snmp.h> #include <net/ip.h> #include <net/icmp.h> #include <net/protocol.h> #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/export.h> #include <linux/bpf-cgroup.h> #include <net/sock.h> #include <net/ping.h> #include <net/udp.h> #include <net/route.h> #include <net/inet_common.h> #include <net/checksum.h> #if IS_ENABLED(CONFIG_IPV6) #include <linux/in6.h> #include <linux/icmpv6.h> #include <net/addrconf.h> #include <net/ipv6.h> #include <net/transp_v6.h> #endif struct ping_table { struct hlist_head hash[PING_HTABLE_SIZE]; spinlock_t lock; }; static struct ping_table ping_table; struct pingv6_ops pingv6_ops; EXPORT_SYMBOL_GPL(pingv6_ops); static u16 ping_port_rover; static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask) { u32 res = (num + net_hash_mix(net)) & mask; pr_debug("hash(%u) = %u\n", num, res); return res; } EXPORT_SYMBOL_GPL(ping_hash); static inline struct hlist_head *ping_hashslot(struct ping_table *table, struct net *net, unsigned int num) { return &table->hash[ping_hashfn(net, num, PING_HTABLE_MASK)]; } int ping_get_port(struct sock *sk, unsigned short ident) { struct inet_sock *isk, *isk2; struct hlist_head *hlist; struct sock *sk2 = NULL; isk = inet_sk(sk); spin_lock(&ping_table.lock); if (ident == 0) { u32 i; u16 result = ping_port_rover + 1; for (i = 0; i < (1L << 16); i++, result++) { if (!result) result++; /* avoid zero */ hlist = ping_hashslot(&ping_table, sock_net(sk), result); sk_for_each(sk2, hlist) { isk2 = inet_sk(sk2); if (isk2->inet_num == result) goto next_port; } /* found */ ping_port_rover = ident = result; break; next_port: ; } if (i >= (1L << 16)) goto fail; } else { hlist = ping_hashslot(&ping_table, sock_net(sk), ident); sk_for_each(sk2, hlist) { isk2 = inet_sk(sk2); /* BUG? Why is this reuse and not reuseaddr? ping.c * doesn't turn off SO_REUSEADDR, and it doesn't expect * that other ping processes can steal its packets. */ if ((isk2->inet_num == ident) && (sk2 != sk) && (!sk2->sk_reuse || !sk->sk_reuse)) goto fail; } } pr_debug("found port/ident = %d\n", ident); isk->inet_num = ident; if (sk_unhashed(sk)) { pr_debug("was not hashed\n"); sk_add_node_rcu(sk, hlist); sock_set_flag(sk, SOCK_RCU_FREE); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } spin_unlock(&ping_table.lock); return 0; fail: spin_unlock(&ping_table.lock); return -EADDRINUSE; } EXPORT_SYMBOL_GPL(ping_get_port); int ping_hash(struct sock *sk) { pr_debug("ping_hash(sk->port=%u)\n", inet_sk(sk)->inet_num); BUG(); /* "Please do not press this button again." */ return 0; } void ping_unhash(struct sock *sk) { struct inet_sock *isk = inet_sk(sk); pr_debug("ping_unhash(isk=%p,isk->num=%u)\n", isk, isk->inet_num); spin_lock(&ping_table.lock); if (sk_del_node_init_rcu(sk)) { isk->inet_num = 0; isk->inet_sport = 0; sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); } spin_unlock(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_unhash); /* Called under rcu_read_lock() */ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) { struct hlist_head *hslot = ping_hashslot(&ping_table, net, ident); struct sock *sk = NULL; struct inet_sock *isk; int dif, sdif; if (skb->protocol == htons(ETH_P_IP)) { dif = inet_iif(skb); sdif = inet_sdif(skb); pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n", (int)ident, &ip_hdr(skb)->daddr, dif); #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6)) { dif = inet6_iif(skb); sdif = inet6_sdif(skb); pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n", (int)ident, &ipv6_hdr(skb)->daddr, dif); #endif } else { return NULL; } sk_for_each_rcu(sk, hslot) { isk = inet_sk(sk); pr_debug("iterate\n"); if (isk->inet_num != ident) continue; if (skb->protocol == htons(ETH_P_IP) && sk->sk_family == AF_INET) { pr_debug("found: %p: num=%d, daddr=%pI4, dif=%d\n", sk, (int) isk->inet_num, &isk->inet_rcv_saddr, sk->sk_bound_dev_if); if (isk->inet_rcv_saddr && isk->inet_rcv_saddr != ip_hdr(skb)->daddr) continue; #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6) && sk->sk_family == AF_INET6) { pr_debug("found: %p: num=%d, daddr=%pI6c, dif=%d\n", sk, (int) isk->inet_num, &sk->sk_v6_rcv_saddr, sk->sk_bound_dev_if); if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, &ipv6_hdr(skb)->daddr)) continue; #endif } else { continue; } if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && sk->sk_bound_dev_if != sdif) continue; goto exit; } sk = NULL; exit: return sk; } static void inet_get_ping_group_range_net(struct net *net, kgid_t *low, kgid_t *high) { kgid_t *data = net->ipv4.ping_group_range.range; unsigned int seq; do { seq = read_seqbegin(&net->ipv4.ping_group_range.lock); *low = data[0]; *high = data[1]; } while (read_seqretry(&net->ipv4.ping_group_range.lock, seq)); } int ping_init_sock(struct sock *sk) { struct net *net = sock_net(sk); kgid_t group = current_egid(); struct group_info *group_info; int i; kgid_t low, high; int ret = 0; if (sk->sk_family == AF_INET6) sk->sk_ipv6only = 1; inet_get_ping_group_range_net(net, &low, &high); if (gid_lte(low, group) && gid_lte(group, high)) return 0; group_info = get_current_groups(); for (i = 0; i < group_info->ngroups; i++) { kgid_t gid = group_info->gid[i]; if (gid_lte(low, gid) && gid_lte(gid, high)) goto out_release_group; } ret = -EACCES; out_release_group: put_group_info(group_info); return ret; } EXPORT_SYMBOL_GPL(ping_init_sock); void ping_close(struct sock *sk, long timeout) { pr_debug("ping_close(sk=%p,sk->num=%u)\n", inet_sk(sk), inet_sk(sk)->inet_num); pr_debug("isk->refcnt = %d\n", refcount_read(&sk->sk_refcnt)); sk_common_release(sk); } EXPORT_SYMBOL_GPL(ping_close); static int ping_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { /* This check is replicated from __ip4_datagram_connect() and * intended to prevent BPF program called below from accessing bytes * that are out of the bound specified by user in addr_len. */ if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } /* Checks the bind address and possibly modifies sk->sk_bound_dev_if. */ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, struct sockaddr *uaddr, int addr_len) { struct net *net = sock_net(sk); if (sk->sk_family == AF_INET) { struct sockaddr_in *addr = (struct sockaddr_in *) uaddr; u32 tb_id = RT_TABLE_LOCAL; int chk_addr_ret; if (addr_len < sizeof(*addr)) return -EINVAL; if (addr->sin_family != AF_INET && !(addr->sin_family == AF_UNSPEC && addr->sin_addr.s_addr == htonl(INADDR_ANY))) return -EAFNOSUPPORT; pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); if (addr->sin_addr.s_addr == htonl(INADDR_ANY)) return 0; tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id; chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id); if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST || (chk_addr_ret != RTN_LOCAL && !inet_can_nonlocal_bind(net, isk))) return -EADDRNOTAVAIL; #if IS_ENABLED(CONFIG_IPV6) } else if (sk->sk_family == AF_INET6) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *) uaddr; int addr_type, scoped, has_addr; struct net_device *dev = NULL; if (addr_len < sizeof(*addr)) return -EINVAL; if (addr->sin6_family != AF_INET6) return -EAFNOSUPPORT; pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); addr_type = ipv6_addr_type(&addr->sin6_addr); scoped = __ipv6_addr_needs_scope_id(addr_type); if ((addr_type != IPV6_ADDR_ANY && !(addr_type & IPV6_ADDR_UNICAST)) || (scoped && !addr->sin6_scope_id)) return -EINVAL; rcu_read_lock(); if (addr->sin6_scope_id) { dev = dev_get_by_index_rcu(net, addr->sin6_scope_id); if (!dev) { rcu_read_unlock(); return -ENODEV; } } if (!dev && sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { rcu_read_unlock(); return -ENODEV; } } has_addr = pingv6_ops.ipv6_chk_addr(net, &addr->sin6_addr, dev, scoped); rcu_read_unlock(); if (!(ipv6_can_nonlocal_bind(net, isk) || has_addr || addr_type == IPV6_ADDR_ANY)) return -EADDRNOTAVAIL; if (scoped) sk->sk_bound_dev_if = addr->sin6_scope_id; #endif } else { return -EAFNOSUPPORT; } return 0; } static void ping_set_saddr(struct sock *sk, struct sockaddr *saddr) { if (saddr->sa_family == AF_INET) { struct inet_sock *isk = inet_sk(sk); struct sockaddr_in *addr = (struct sockaddr_in *) saddr; isk->inet_rcv_saddr = isk->inet_saddr = addr->sin_addr.s_addr; #if IS_ENABLED(CONFIG_IPV6) } else if (saddr->sa_family == AF_INET6) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *) saddr; struct ipv6_pinfo *np = inet6_sk(sk); sk->sk_v6_rcv_saddr = np->saddr = addr->sin6_addr; #endif } } /* * We need our own bind because there are no privileged id's == local ports. * Moreover, we don't allow binding to multi- and broadcast addresses. */ int ping_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_sock *isk = inet_sk(sk); unsigned short snum; int err; int dif = sk->sk_bound_dev_if; err = ping_check_bind_addr(sk, isk, uaddr, addr_len); if (err) return err; lock_sock(sk); err = -EINVAL; if (isk->inet_num != 0) goto out; err = -EADDRINUSE; snum = ntohs(((struct sockaddr_in *)uaddr)->sin_port); if (ping_get_port(sk, snum) != 0) { /* Restore possibly modified sk->sk_bound_dev_if by ping_check_bind_addr(). */ sk->sk_bound_dev_if = dif; goto out; } ping_set_saddr(sk, uaddr); pr_debug("after bind(): num = %hu, dif = %d\n", isk->inet_num, sk->sk_bound_dev_if); err = 0; if (sk->sk_family == AF_INET && isk->inet_rcv_saddr) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6 && !ipv6_addr_any(&sk->sk_v6_rcv_saddr)) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; #endif if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; isk->inet_sport = htons(isk->inet_num); isk->inet_daddr = 0; isk->inet_dport = 0; #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) memset(&sk->sk_v6_daddr, 0, sizeof(sk->sk_v6_daddr)); #endif sk_dst_reset(sk); out: release_sock(sk); pr_debug("ping_v4_bind -> %d\n", err); return err; } EXPORT_SYMBOL_GPL(ping_bind); /* * Is this a supported type of ICMP message? */ static inline int ping_supported(int family, int type, int code) { return (family == AF_INET && type == ICMP_ECHO && code == 0) || (family == AF_INET && type == ICMP_EXT_ECHO && code == 0) || (family == AF_INET6 && type == ICMPV6_ECHO_REQUEST && code == 0) || (family == AF_INET6 && type == ICMPV6_EXT_ECHO_REQUEST && code == 0); } /* * This routine is called by the ICMP module when it gets some * sort of error condition. */ void ping_err(struct sk_buff *skb, int offset, u32 info) { int family; struct icmphdr *icmph; struct inet_sock *inet_sock; int type; int code; struct net *net = dev_net(skb->dev); struct sock *sk; int harderr; int err; if (skb->protocol == htons(ETH_P_IP)) { family = AF_INET; type = icmp_hdr(skb)->type; code = icmp_hdr(skb)->code; icmph = (struct icmphdr *)(skb->data + offset); } else if (skb->protocol == htons(ETH_P_IPV6)) { family = AF_INET6; type = icmp6_hdr(skb)->icmp6_type; code = icmp6_hdr(skb)->icmp6_code; icmph = (struct icmphdr *) (skb->data + offset); } else { BUG(); } /* We assume the packet has already been checked by icmp_unreach */ if (!ping_supported(family, icmph->type, icmph->code)) return; pr_debug("ping_err(proto=0x%x,type=%d,code=%d,id=%04x,seq=%04x)\n", skb->protocol, type, code, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (!sk) { pr_debug("no socket, dropping\n"); return; /* No socket for error */ } pr_debug("err on socket %p\n", sk); err = 0; harderr = 0; inet_sock = inet_sk(sk); if (skb->protocol == htons(ETH_P_IP)) { switch (type) { default: case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; break; case ICMP_SOURCE_QUENCH: /* This is not a real error but ping wants to see it. * Report it with some fake errno. */ err = EREMOTEIO; break; case ICMP_PARAMETERPROB: err = EPROTO; harderr = 1; break; case ICMP_DEST_UNREACH: if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ ipv4_sk_update_pmtu(skb, sk, info); if (READ_ONCE(inet_sock->pmtudisc) != IP_PMTUDISC_DONT) { err = EMSGSIZE; harderr = 1; break; } goto out; } err = EHOSTUNREACH; if (code <= NR_ICMP_UNREACH) { harderr = icmp_err_convert[code].fatal; err = icmp_err_convert[code].errno; } break; case ICMP_REDIRECT: /* See ICMP_SOURCE_QUENCH */ ipv4_sk_redirect(skb, sk); err = EREMOTEIO; break; } #if IS_ENABLED(CONFIG_IPV6) } else if (skb->protocol == htons(ETH_P_IPV6)) { harderr = pingv6_ops.icmpv6_err_convert(type, code, &err); #endif } /* * RFC1122: OK. Passes ICMP errors back to application, as per * 4.1.3.3. */ if ((family == AF_INET && !inet_test_bit(RECVERR, sk)) || (family == AF_INET6 && !inet6_test_bit(RECVERR6, sk))) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) goto out; } else { if (family == AF_INET) { ip_icmp_error(sk, skb, err, 0 /* no remote port */, info, (u8 *)icmph); #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { pingv6_ops.ipv6_icmp_error(sk, skb, err, 0, info, (u8 *)icmph); #endif } } sk->sk_err = err; sk_error_report(sk); out: return; } EXPORT_SYMBOL_GPL(ping_err); /* * Copy and checksum an ICMP Echo packet from user space into a buffer * starting from the payload. */ int ping_getfrag(void *from, char *to, int offset, int fraglen, int odd, struct sk_buff *skb) { struct pingfakehdr *pfh = from; if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck, &pfh->msg->msg_iter)) return -EFAULT; #if IS_ENABLED(CONFIG_IPV6) /* For IPv6, checksum each skb as we go along, as expected by * icmpv6_push_pending_frames. For IPv4, accumulate the checksum in * wcheck, it will be finalized in ping_v4_push_pending_frames. */ if (pfh->family == AF_INET6) { skb->csum = csum_block_add(skb->csum, pfh->wcheck, odd); skb->ip_summed = CHECKSUM_NONE; pfh->wcheck = 0; } #endif return 0; } EXPORT_SYMBOL_GPL(ping_getfrag); static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, struct flowi4 *fl4) { struct sk_buff *skb = skb_peek(&sk->sk_write_queue); if (!skb) return 0; pfh->wcheck = csum_partial((char *)&pfh->icmph, sizeof(struct icmphdr), pfh->wcheck); pfh->icmph.checksum = csum_fold(pfh->wcheck); memcpy(icmp_hdr(skb), &pfh->icmph, sizeof(struct icmphdr)); skb->ip_summed = CHECKSUM_NONE; return ip_push_pending_frames(sk, fl4); } int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, void *user_icmph, size_t icmph_len) { u8 type, code; if (len > 0xFFFF) return -EMSGSIZE; /* Must have at least a full ICMP header. */ if (len < icmph_len) return -EINVAL; /* * Check the flags. */ /* Mirror BSD error message compatibility */ if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; /* * Fetch the ICMP header provided by the userland. * iovec is modified! The ICMP header is consumed. */ if (memcpy_from_msg(user_icmph, msg, icmph_len)) return -EFAULT; if (family == AF_INET) { type = ((struct icmphdr *) user_icmph)->type; code = ((struct icmphdr *) user_icmph)->code; #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { type = ((struct icmp6hdr *) user_icmph)->icmp6_type; code = ((struct icmp6hdr *) user_icmph)->icmp6_code; #endif } else { BUG(); } if (!ping_supported(family, type, code)) return -EINVAL; return 0; } EXPORT_SYMBOL_GPL(ping_common_sendmsg); static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct net *net = sock_net(sk); struct flowi4 fl4; struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct icmphdr user_icmph; struct pingfakehdr pfh; struct rtable *rt = NULL; struct ip_options_data opt_copy; int free = 0; __be32 saddr, daddr, faddr; u8 tos, scope; int err; pr_debug("ping_v4_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num); err = ping_common_sendmsg(AF_INET, msg, len, &user_icmph, sizeof(user_icmph)); if (err) return err; /* * Get and verify the address. */ if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; daddr = usin->sin_addr.s_addr; /* no remote port */ } else { if (sk->sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; daddr = inet->inet_daddr; /* no remote port */ } ipcm_init_sk(&ipc, inet); if (msg->msg_controllen) { err = ip_cmsg_send(sk, msg, &ipc, false); if (unlikely(err)) { kfree(ipc.opt); return err; } if (ipc.opt) free = 1; } if (!ipc.opt) { struct ip_options_rcu *inet_opt; rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt) { memcpy(&opt_copy, inet_opt, sizeof(*inet_opt) + inet_opt->opt.optlen); ipc.opt = &opt_copy.opt; } rcu_read_unlock(); } saddr = ipc.addr; ipc.addr = faddr = daddr; if (ipc.opt && ipc.opt->opt.srr) { if (!daddr) { err = -EINVAL; goto out_free; } faddr = ipc.opt->opt.faddr; } tos = get_rttos(&ipc, inet); scope = ip_sendmsg_scope(inet, &ipc, msg); if (ipv4_is_multicast(daddr)) { if (!ipc.oif || netif_index_is_l3_master(sock_net(sk), ipc.oif)) ipc.oif = READ_ONCE(inet->mc_index); if (!saddr) saddr = READ_ONCE(inet->mc_addr); } else if (!ipc.oif) ipc.oif = READ_ONCE(inet->uc_index); flowi4_init_output(&fl4, ipc.oif, ipc.sockc.mark, tos, scope, sk->sk_protocol, inet_sk_flowi_flags(sk), faddr, saddr, 0, 0, sk->sk_uid); fl4.fl4_icmp_type = user_icmph.type; fl4.fl4_icmp_code = user_icmph.code; security_sk_classify_flow(sk, flowi4_to_flowi_common(&fl4)); rt = ip_route_output_flow(net, &fl4, sk); if (IS_ERR(rt)) { err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } err = -EACCES; if ((rt->rt_flags & RTCF_BROADCAST) && !sock_flag(sk, SOCK_BROADCAST)) goto out; if (msg->msg_flags & MSG_CONFIRM) goto do_confirm; back_from_confirm: if (!ipc.addr) ipc.addr = fl4.daddr; lock_sock(sk); pfh.icmph.type = user_icmph.type; /* already checked */ pfh.icmph.code = user_icmph.code; /* ditto */ pfh.icmph.checksum = 0; pfh.icmph.un.echo.id = inet->inet_sport; pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence; pfh.msg = msg; pfh.wcheck = 0; pfh.family = AF_INET; err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, sizeof(struct icmphdr), &ipc, &rt, msg->msg_flags); if (err) ip_flush_pending_frames(sk); else err = ping_v4_push_pending_frames(sk, &pfh, &fl4); release_sock(sk); out: ip_rt_put(rt); out_free: if (free) kfree(ipc.opt); if (!err) { icmp_out_count(sock_net(sk), user_icmph.type); return len; } return err; do_confirm: if (msg->msg_flags & MSG_PROBE) dst_confirm_neigh(&rt->dst, &fl4.daddr); if (!(msg->msg_flags & MSG_PROBE) || len) goto back_from_confirm; err = 0; goto out; } int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); int family = sk->sk_family; struct sk_buff *skb; int copied, err; pr_debug("ping_recvmsg(sk=%p,sk->num=%u)\n", isk, isk->inet_num); err = -EOPNOTSUPP; if (flags & MSG_OOB) goto out; if (flags & MSG_ERRQUEUE) return inet_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, &err); if (!skb) goto out; copied = skb->len; if (copied > len) { msg->msg_flags |= MSG_TRUNC; copied = len; } /* Don't bother checking the checksum */ err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto done; sock_recv_timestamp(msg, sk, skb); /* Copy the address and add cmsg data. */ if (family == AF_INET) { DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); if (sin) { sin->sin_family = AF_INET; sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); *addr_len = sizeof(*sin); } if (inet_cmsg_flags(isk)) ip_cmsg_recv(msg, skb); #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { struct ipv6hdr *ip6 = ipv6_hdr(skb); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); if (sin6) { sin6->sin6_family = AF_INET6; sin6->sin6_port = 0; sin6->sin6_addr = ip6->saddr; sin6->sin6_flowinfo = 0; if (inet6_test_bit(SNDFLOW, sk)) sin6->sin6_flowinfo = ip6_flowinfo(ip6); sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); *addr_len = sizeof(*sin6); } if (inet6_sk(sk)->rxopt.all) pingv6_ops.ip6_datagram_recv_common_ctl(sk, msg, skb); if (skb->protocol == htons(ETH_P_IPV6) && inet6_sk(sk)->rxopt.all) pingv6_ops.ip6_datagram_recv_specific_ctl(sk, msg, skb); else if (skb->protocol == htons(ETH_P_IP) && inet_cmsg_flags(isk)) ip_cmsg_recv(msg, skb); #endif } else { BUG(); } err = copied; done: skb_free_datagram(sk, skb); out: pr_debug("ping_recvmsg -> %d\n", err); return err; } EXPORT_SYMBOL_GPL(ping_recvmsg); static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason reason; pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { sk_skb_reason_drop(sk, skb, reason); pr_debug("ping_queue_rcv_skb -> failed\n"); return reason; } return SKB_NOT_DROPPED_YET; } int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { return __ping_queue_rcv_skb(sk, skb) ? -1 : 0; } EXPORT_SYMBOL_GPL(ping_queue_rcv_skb); /* * All we need to do is get the socket. */ enum skb_drop_reason ping_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NO_SOCKET; struct sock *sk; struct net *net = dev_net(skb->dev); struct icmphdr *icmph = icmp_hdr(skb); /* We assume the packet has already been checked by icmp_rcv */ pr_debug("ping_rcv(skb=%p,id=%04x,seq=%04x)\n", skb, ntohs(icmph->un.echo.id), ntohs(icmph->un.echo.sequence)); /* Push ICMP header back */ skb_push(skb, skb->data - (u8 *)icmph); sk = ping_lookup(net, skb, ntohs(icmph->un.echo.id)); if (sk) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); pr_debug("rcv on socket %p\n", sk); if (skb2) reason = __ping_queue_rcv_skb(sk, skb2); else reason = SKB_DROP_REASON_NOMEM; } if (reason) pr_debug("no socket, dropping\n"); return reason; } EXPORT_SYMBOL_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, .pre_connect = ping_pre_connect, .connect = ip4_datagram_connect, .disconnect = __udp_disconnect, .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, .sendmsg = ping_v4_sendmsg, .recvmsg = ping_recvmsg, .bind = ping_bind, .backlog_rcv = ping_queue_rcv_skb, .release_cb = ip4_datagram_release_cb, .hash = ping_hash, .unhash = ping_unhash, .get_port = ping_get_port, .put_port = ping_unhash, .obj_size = sizeof(struct inet_sock), }; EXPORT_SYMBOL(ping_prot); #ifdef CONFIG_PROC_FS static struct sock *ping_get_first(struct seq_file *seq, int start) { struct sock *sk; struct ping_iter_state *state = seq->private; struct net *net = seq_file_net(seq); for (state->bucket = start; state->bucket < PING_HTABLE_SIZE; ++state->bucket) { struct hlist_head *hslot; hslot = &ping_table.hash[state->bucket]; if (hlist_empty(hslot)) continue; sk_for_each(sk, hslot) { if (net_eq(sock_net(sk), net) && sk->sk_family == state->family) goto found; } } sk = NULL; found: return sk; } static struct sock *ping_get_next(struct seq_file *seq, struct sock *sk) { struct ping_iter_state *state = seq->private; struct net *net = seq_file_net(seq); do { sk = sk_next(sk); } while (sk && (!net_eq(sock_net(sk), net))); if (!sk) return ping_get_first(seq, state->bucket + 1); return sk; } static struct sock *ping_get_idx(struct seq_file *seq, loff_t pos) { struct sock *sk = ping_get_first(seq, 0); if (sk) while (pos && (sk = ping_get_next(seq, sk)) != NULL) --pos; return pos ? NULL : sk; } void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) __acquires(ping_table.lock) { struct ping_iter_state *state = seq->private; state->bucket = 0; state->family = family; spin_lock(&ping_table.lock); return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } EXPORT_SYMBOL_GPL(ping_seq_start); static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) { return ping_seq_start(seq, pos, AF_INET); } void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock *sk; if (v == SEQ_START_TOKEN) sk = ping_get_idx(seq, 0); else sk = ping_get_next(seq, v); ++*pos; return sk; } EXPORT_SYMBOL_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) __releases(ping_table.lock) { spin_unlock(&ping_table.lock); } EXPORT_SYMBOL_GPL(ping_seq_stop); static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket) { struct inet_sock *inet = inet_sk(sp); __be32 dest = inet->inet_daddr; __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); __u16 srcp = ntohs(inet->inet_sport); seq_printf(f, "%5d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u", bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), 0, sock_i_ino(sp), refcount_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } static int ping_v4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) seq_puts(seq, " sl local_address rem_address st tx_queue " "rx_queue tr tm->when retrnsmt uid timeout " "inode ref pointer drops"); else { struct ping_iter_state *state = seq->private; ping_v4_format_sock(v, seq, state->bucket); } seq_pad(seq, '\n'); return 0; } static const struct seq_operations ping_v4_seq_ops = { .start = ping_v4_seq_start, .show = ping_v4_seq_show, .next = ping_seq_next, .stop = ping_seq_stop, }; static int __net_init ping_v4_proc_init_net(struct net *net) { if (!proc_create_net("icmp", 0444, net->proc_net, &ping_v4_seq_ops, sizeof(struct ping_iter_state))) return -ENOMEM; return 0; } static void __net_exit ping_v4_proc_exit_net(struct net *net) { remove_proc_entry("icmp", net->proc_net); } static struct pernet_operations ping_v4_net_ops = { .init = ping_v4_proc_init_net, .exit = ping_v4_proc_exit_net, }; int __init ping_proc_init(void) { return register_pernet_subsys(&ping_v4_net_ops); } void ping_proc_exit(void) { unregister_pernet_subsys(&ping_v4_net_ops); } #endif void __init ping_init(void) { int i; for (i = 0; i < PING_HTABLE_SIZE; i++) INIT_HLIST_HEAD(&ping_table.hash[i]); spin_lock_init(&ping_table.lock); } |
| 29 1 26 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2011 Florian Westphal <fw@strlen.de> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/route.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <linux/netfilter/xt_rpfilter.h> #include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_DESCRIPTION("Xtables: IPv6 reverse path filter match"); static bool rpfilter_addr_unicast(const struct in6_addr *addr) { int addr_type = ipv6_addr_type(addr); return addr_type & IPV6_ADDR_UNICAST; } static bool rpfilter_addr_linklocal(const struct in6_addr *addr) { int addr_type = ipv6_addr_type(addr); return addr_type & IPV6_ADDR_LINKLOCAL; } static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, const struct net_device *dev, u8 flags) { struct rt6_info *rt; struct ipv6hdr *iph = ipv6_hdr(skb); bool ret = false; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev), .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_proto = iph->nexthdr, .flowi6_uid = sock_net_uid(net, NULL), .daddr = iph->saddr, }; int lookup_flags; if (rpfilter_addr_unicast(&iph->daddr)) { memcpy(&fl6.saddr, &iph->daddr, sizeof(struct in6_addr)); lookup_flags = RT6_LOOKUP_F_HAS_SADDR; } else { lookup_flags = 0; } fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; if (rpfilter_addr_linklocal(&iph->saddr)) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6.flowi6_oif = dev->ifindex; } else if ((flags & XT_RPFILTER_LOOSE) == 0) fl6.flowi6_oif = dev->ifindex; rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); if (rt->dst.error) goto out; if (rt->rt6i_flags & (RTF_REJECT|RTF_ANYCAST)) goto out; if (rt->rt6i_flags & RTF_LOCAL) { ret = flags & XT_RPFILTER_ACCEPT_LOCAL; goto out; } if (rt->rt6i_idev->dev == dev || l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex || (flags & XT_RPFILTER_LOOSE)) ret = true; out: ip6_rt_put(rt); return ret; } static bool rpfilter_is_loopback(const struct sk_buff *skb, const struct net_device *in) { return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_rpfilter_info *info = par->matchinfo; int saddrtype; struct ipv6hdr *iph; bool invert = info->flags & XT_RPFILTER_INVERT; if (rpfilter_is_loopback(skb, xt_in(par))) return true ^ invert; iph = ipv6_hdr(skb); saddrtype = ipv6_addr_type(&iph->saddr); if (unlikely(saddrtype == IPV6_ADDR_ANY)) return true ^ invert; /* not routable: forward path will drop it */ return rpfilter_lookup_reverse6(xt_net(par), skb, xt_in(par), info->flags) ^ invert; } static int rpfilter_check(const struct xt_mtchk_param *par) { const struct xt_rpfilter_info *info = par->matchinfo; unsigned int options = ~XT_RPFILTER_OPTION_MASK; if (info->flags & options) { pr_info_ratelimited("unknown options\n"); return -EINVAL; } if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "raw") != 0) { pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n", par->table); return -EINVAL; } return 0; } static struct xt_match rpfilter_mt_reg __read_mostly = { .name = "rpfilter", .family = NFPROTO_IPV6, .checkentry = rpfilter_check, .match = rpfilter_mt, .matchsize = sizeof(struct xt_rpfilter_info), .hooks = (1 << NF_INET_PRE_ROUTING), .me = THIS_MODULE }; static int __init rpfilter_mt_init(void) { return xt_register_match(&rpfilter_mt_reg); } static void __exit rpfilter_mt_exit(void) { xt_unregister_match(&rpfilter_mt_reg); } module_init(rpfilter_mt_init); module_exit(rpfilter_mt_exit); |
| 4 2 15 39 13 4 41 41 1 2 33 16 2 40 29 40 4 30 29 30 15 2 17 27 22 11 11 2 1 1 17 1 24 55 18 42 42 25 37 13 2 10 28 12 11 1 4 26 19 3 2 19 1 20 2 20 2 5 1 3 5 2 2 1 1 4 6 29 1 2 4 1 1 3 1 1 3 56 16 16 24 8 7 7 1 1 7 7 5 5 1 4 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/isofs/rock.c * * (C) 1992, 1993 Eric Youngdale * * Rock Ridge Extensions to iso9660 */ #include <linux/slab.h> #include <linux/pagemap.h> #include "isofs.h" #include "rock.h" /* * These functions are designed to read the system areas of a directory record * and extract relevant information. There are different functions provided * depending upon what information we need at the time. One function fills * out an inode structure, a second one extracts a filename, a third one * returns a symbolic link name, and a fourth one returns the extent number * for the file. */ #define SIG(A,B) ((A) | ((B) << 8)) /* isonum_721() */ struct rock_state { void *buffer; unsigned char *chr; int len; int cont_size; int cont_extent; int cont_offset; int cont_loops; struct inode *inode; }; /* * This is a way of ensuring that we have something in the system * use fields that is compatible with Rock Ridge. Return zero on success. */ static int check_sp(struct rock_ridge *rr, struct inode *inode) { if (rr->u.SP.magic[0] != 0xbe) return -1; if (rr->u.SP.magic[1] != 0xef) return -1; ISOFS_SB(inode->i_sb)->s_rock_offset = rr->u.SP.skip; return 0; } static void setup_rock_ridge(struct iso_directory_record *de, struct inode *inode, struct rock_state *rs) { rs->len = sizeof(struct iso_directory_record) + de->name_len[0]; if (rs->len & 1) (rs->len)++; rs->chr = (unsigned char *)de + rs->len; rs->len = *((unsigned char *)de) - rs->len; if (rs->len < 0) rs->len = 0; if (ISOFS_SB(inode->i_sb)->s_rock_offset != -1) { rs->len -= ISOFS_SB(inode->i_sb)->s_rock_offset; rs->chr += ISOFS_SB(inode->i_sb)->s_rock_offset; if (rs->len < 0) rs->len = 0; } } static void init_rock_state(struct rock_state *rs, struct inode *inode) { memset(rs, 0, sizeof(*rs)); rs->inode = inode; } /* Maximum number of Rock Ridge continuation entries */ #define RR_MAX_CE_ENTRIES 32 /* * Returns 0 if the caller should continue scanning, 1 if the scan must end * and -ve on error. */ static int rock_continue(struct rock_state *rs) { int ret = 1; int blocksize = 1 << rs->inode->i_blkbits; const int min_de_size = offsetof(struct rock_ridge, u); kfree(rs->buffer); rs->buffer = NULL; if ((unsigned)rs->cont_offset > blocksize - min_de_size || (unsigned)rs->cont_size > blocksize || (unsigned)(rs->cont_offset + rs->cont_size) > blocksize) { printk(KERN_NOTICE "rock: corrupted directory entry. " "extent=%d, offset=%d, size=%d\n", rs->cont_extent, rs->cont_offset, rs->cont_size); ret = -EIO; goto out; } if (rs->cont_extent) { struct buffer_head *bh; rs->buffer = kmalloc(rs->cont_size, GFP_KERNEL); if (!rs->buffer) { ret = -ENOMEM; goto out; } ret = -EIO; if (++rs->cont_loops >= RR_MAX_CE_ENTRIES) goto out; bh = sb_bread(rs->inode->i_sb, rs->cont_extent); if (bh) { memcpy(rs->buffer, bh->b_data + rs->cont_offset, rs->cont_size); put_bh(bh); rs->chr = rs->buffer; rs->len = rs->cont_size; rs->cont_extent = 0; rs->cont_size = 0; rs->cont_offset = 0; return 0; } printk("Unable to read rock-ridge attributes\n"); } out: kfree(rs->buffer); rs->buffer = NULL; return ret; } /* * We think there's a record of type `sig' at rs->chr. Parse the signature * and make sure that there's really room for a record of that type. */ static int rock_check_overflow(struct rock_state *rs, int sig) { int len; switch (sig) { case SIG('S', 'P'): len = sizeof(struct SU_SP_s); break; case SIG('C', 'E'): len = sizeof(struct SU_CE_s); break; case SIG('E', 'R'): len = sizeof(struct SU_ER_s); break; case SIG('R', 'R'): len = sizeof(struct RR_RR_s); break; case SIG('P', 'X'): len = sizeof(struct RR_PX_s); break; case SIG('P', 'N'): len = sizeof(struct RR_PN_s); break; case SIG('S', 'L'): len = sizeof(struct RR_SL_s); break; case SIG('N', 'M'): len = sizeof(struct RR_NM_s); break; case SIG('C', 'L'): len = sizeof(struct RR_CL_s); break; case SIG('P', 'L'): len = sizeof(struct RR_PL_s); break; case SIG('T', 'F'): len = sizeof(struct RR_TF_s); break; case SIG('Z', 'F'): len = sizeof(struct RR_ZF_s); break; default: len = 0; break; } len += offsetof(struct rock_ridge, u); if (len > rs->len) { printk(KERN_NOTICE "rock: directory entry would overflow " "storage\n"); printk(KERN_NOTICE "rock: sig=0x%02x, size=%d, remaining=%d\n", sig, len, rs->len); return -EIO; } return 0; } /* * return length of name field; 0: not found, -1: to be ignored */ int get_rock_ridge_filename(struct iso_directory_record *de, char *retname, struct inode *inode) { struct rock_state rs; struct rock_ridge *rr; int sig; int retnamlen = 0; int truncate = 0; int ret = 0; char *p; int len; if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; *retname = 0; init_rock_state(&rs, inode); setup_rock_ridge(de, inode, &rs); repeat: while (rs.len > 2) { /* There may be one byte for padding somewhere */ rr = (struct rock_ridge *)rs.chr; /* * Ignore rock ridge info if rr->len is out of range, but * don't return -EIO because that would make the file * invisible. */ if (rr->len < 3) goto out; /* Something got screwed up here */ sig = isonum_721(rs.chr); if (rock_check_overflow(&rs, sig)) goto eio; rs.chr += rr->len; rs.len -= rr->len; /* * As above, just ignore the rock ridge info if rr->len * is bogus. */ if (rs.len < 0) goto out; /* Something got screwed up here */ switch (sig) { case SIG('R', 'R'): if ((rr->u.RR.flags[0] & RR_NM) == 0) goto out; break; case SIG('S', 'P'): if (check_sp(rr, inode)) goto out; break; case SIG('C', 'E'): rs.cont_extent = isonum_733(rr->u.CE.extent); rs.cont_offset = isonum_733(rr->u.CE.offset); rs.cont_size = isonum_733(rr->u.CE.size); break; case SIG('N', 'M'): if (truncate) break; if (rr->len < 5) break; /* * If the flags are 2 or 4, this indicates '.' or '..'. * We don't want to do anything with this, because it * screws up the code that calls us. We don't really * care anyways, since we can just use the non-RR * name. */ if (rr->u.NM.flags & 6) break; if (rr->u.NM.flags & ~1) { printk("Unsupported NM flag settings (%d)\n", rr->u.NM.flags); break; } len = rr->len - 5; if (retnamlen + len >= 254) { truncate = 1; break; } p = memchr(rr->u.NM.name, '\0', len); if (unlikely(p)) len = p - rr->u.NM.name; memcpy(retname + retnamlen, rr->u.NM.name, len); retnamlen += len; retname[retnamlen] = '\0'; break; case SIG('R', 'E'): kfree(rs.buffer); return -1; default: break; } } ret = rock_continue(&rs); if (ret == 0) goto repeat; if (ret == 1) return retnamlen; /* If 0, this file did not have a NM field */ out: kfree(rs.buffer); return ret; eio: ret = -EIO; goto out; } #define RR_REGARD_XA 1 #define RR_RELOC_DE 2 static int parse_rock_ridge_inode_internal(struct iso_directory_record *de, struct inode *inode, int flags) { int symlink_len = 0; int cnt, sig; unsigned int reloc_block; struct inode *reloc; struct rock_ridge *rr; int rootflag; struct rock_state rs; int ret = 0; if (!ISOFS_SB(inode->i_sb)->s_rock) return 0; init_rock_state(&rs, inode); setup_rock_ridge(de, inode, &rs); if (flags & RR_REGARD_XA) { rs.chr += 14; rs.len -= 14; if (rs.len < 0) rs.len = 0; } repeat: while (rs.len > 2) { /* There may be one byte for padding somewhere */ rr = (struct rock_ridge *)rs.chr; /* * Ignore rock ridge info if rr->len is out of range, but * don't return -EIO because that would make the file * invisible. */ if (rr->len < 3) goto out; /* Something got screwed up here */ sig = isonum_721(rs.chr); if (rock_check_overflow(&rs, sig)) goto eio; rs.chr += rr->len; rs.len -= rr->len; /* * As above, just ignore the rock ridge info if rr->len * is bogus. */ if (rs.len < 0) goto out; /* Something got screwed up here */ switch (sig) { #ifndef CONFIG_ZISOFS /* No flag for SF or ZF */ case SIG('R', 'R'): if ((rr->u.RR.flags[0] & (RR_PX | RR_TF | RR_SL | RR_CL)) == 0) goto out; break; #endif case SIG('S', 'P'): if (check_sp(rr, inode)) goto out; break; case SIG('C', 'E'): rs.cont_extent = isonum_733(rr->u.CE.extent); rs.cont_offset = isonum_733(rr->u.CE.offset); rs.cont_size = isonum_733(rr->u.CE.size); break; case SIG('E', 'R'): /* Invalid length of ER tag id? */ if (rr->u.ER.len_id + offsetof(struct rock_ridge, u.ER.data) > rr->len) goto out; ISOFS_SB(inode->i_sb)->s_rock = 1; printk(KERN_DEBUG "ISO 9660 Extensions: "); { int p; for (p = 0; p < rr->u.ER.len_id; p++) printk(KERN_CONT "%c", rr->u.ER.data[p]); } printk(KERN_CONT "\n"); break; case SIG('P', 'X'): inode->i_mode = isonum_733(rr->u.PX.mode); set_nlink(inode, isonum_733(rr->u.PX.n_links)); i_uid_write(inode, isonum_733(rr->u.PX.uid)); i_gid_write(inode, isonum_733(rr->u.PX.gid)); break; case SIG('P', 'N'): { int high, low; high = isonum_733(rr->u.PN.dev_high); low = isonum_733(rr->u.PN.dev_low); /* * The Rock Ridge standard specifies that if * sizeof(dev_t) <= 4, then the high field is * unused, and the device number is completely * stored in the low field. Some writers may * ignore this subtlety, * and as a result we test to see if the entire * device number is * stored in the low field, and use that. */ if ((low & ~0xff) && high == 0) { inode->i_rdev = MKDEV(low >> 8, low & 0xff); } else { inode->i_rdev = MKDEV(high, low); } } break; case SIG('T', 'F'): /* * Some RRIP writers incorrectly place ctime in the * TF_CREATE field. Try to handle this correctly for * either case. */ /* Rock ridge never appears on a High Sierra disk */ cnt = 0; if (rr->u.TF.flags & TF_CREATE) { inode_set_ctime(inode, iso_date(rr->u.TF.times[cnt++].time, 0), 0); } if (rr->u.TF.flags & TF_MODIFY) { inode_set_mtime(inode, iso_date(rr->u.TF.times[cnt++].time, 0), 0); } if (rr->u.TF.flags & TF_ACCESS) { inode_set_atime(inode, iso_date(rr->u.TF.times[cnt++].time, 0), 0); } if (rr->u.TF.flags & TF_ATTRIBUTES) { inode_set_ctime(inode, iso_date(rr->u.TF.times[cnt++].time, 0), 0); } break; case SIG('S', 'L'): { int slen; struct SL_component *slp; struct SL_component *oldslp; slen = rr->len - 5; slp = &rr->u.SL.link; inode->i_size = symlink_len; while (slen > 1) { rootflag = 0; switch (slp->flags & ~1) { case 0: inode->i_size += slp->len; break; case 2: inode->i_size += 1; break; case 4: inode->i_size += 2; break; case 8: rootflag = 1; inode->i_size += 1; break; default: printk("Symlink component flag " "not implemented\n"); } slen -= slp->len + 2; oldslp = slp; slp = (struct SL_component *) (((char *)slp) + slp->len + 2); if (slen < 2) { if (((rr->u.SL. flags & 1) != 0) && ((oldslp-> flags & 1) == 0)) inode->i_size += 1; break; } /* * If this component record isn't * continued, then append a '/'. */ if (!rootflag && (oldslp->flags & 1) == 0) inode->i_size += 1; } } symlink_len = inode->i_size; break; case SIG('R', 'E'): printk(KERN_WARNING "Attempt to read inode for " "relocated directory\n"); goto out; case SIG('C', 'L'): if (flags & RR_RELOC_DE) { printk(KERN_ERR "ISOFS: Recursive directory relocation " "is not supported\n"); goto eio; } reloc_block = isonum_733(rr->u.CL.location); if (reloc_block == ISOFS_I(inode)->i_iget5_block && ISOFS_I(inode)->i_iget5_offset == 0) { printk(KERN_ERR "ISOFS: Directory relocation points to " "itself\n"); goto eio; } ISOFS_I(inode)->i_first_extent = reloc_block; reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0); if (IS_ERR(reloc)) { ret = PTR_ERR(reloc); goto out; } inode->i_mode = reloc->i_mode; set_nlink(inode, reloc->i_nlink); inode->i_uid = reloc->i_uid; inode->i_gid = reloc->i_gid; inode->i_rdev = reloc->i_rdev; inode->i_size = reloc->i_size; inode->i_blocks = reloc->i_blocks; inode_set_atime_to_ts(inode, inode_get_atime(reloc)); inode_set_ctime_to_ts(inode, inode_get_ctime(reloc)); inode_set_mtime_to_ts(inode, inode_get_mtime(reloc)); iput(reloc); break; #ifdef CONFIG_ZISOFS case SIG('Z', 'F'): { int algo; if (ISOFS_SB(inode->i_sb)->s_nocompress) break; algo = isonum_721(rr->u.ZF.algorithm); if (algo == SIG('p', 'z')) { int block_shift = isonum_711(&rr->u.ZF.parms[1]); if (block_shift > 17) { printk(KERN_WARNING "isofs: " "Can't handle ZF block " "size of 2^%d\n", block_shift); } else { /* * Note: we don't change * i_blocks here */ ISOFS_I(inode)->i_file_format = isofs_file_compressed; /* * Parameters to compression * algorithm (header size, * block size) */ ISOFS_I(inode)->i_format_parm[0] = isonum_711(&rr->u.ZF.parms[0]); ISOFS_I(inode)->i_format_parm[1] = isonum_711(&rr->u.ZF.parms[1]); inode->i_size = isonum_733(rr->u.ZF. real_size); } } else { printk(KERN_WARNING "isofs: Unknown ZF compression " "algorithm: %c%c\n", rr->u.ZF.algorithm[0], rr->u.ZF.algorithm[1]); } break; } #endif default: break; } } ret = rock_continue(&rs); if (ret == 0) goto repeat; if (ret == 1) ret = 0; out: kfree(rs.buffer); return ret; eio: ret = -EIO; goto out; } static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit) { int slen; int rootflag; struct SL_component *oldslp; struct SL_component *slp; slen = rr->len - 5; slp = &rr->u.SL.link; while (slen > 1) { rootflag = 0; switch (slp->flags & ~1) { case 0: if (slp->len > plimit - rpnt) return NULL; memcpy(rpnt, slp->text, slp->len); rpnt += slp->len; break; case 2: if (rpnt >= plimit) return NULL; *rpnt++ = '.'; break; case 4: if (2 > plimit - rpnt) return NULL; *rpnt++ = '.'; *rpnt++ = '.'; break; case 8: if (rpnt >= plimit) return NULL; rootflag = 1; *rpnt++ = '/'; break; default: printk("Symlink component flag not implemented (%d)\n", slp->flags); } slen -= slp->len + 2; oldslp = slp; slp = (struct SL_component *)((char *)slp + slp->len + 2); if (slen < 2) { /* * If there is another SL record, and this component * record isn't continued, then add a slash. */ if ((!rootflag) && (rr->u.SL.flags & 1) && !(oldslp->flags & 1)) { if (rpnt >= plimit) return NULL; *rpnt++ = '/'; } break; } /* * If this component record isn't continued, then append a '/'. */ if (!rootflag && !(oldslp->flags & 1)) { if (rpnt >= plimit) return NULL; *rpnt++ = '/'; } } return rpnt; } int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode, int relocated) { int flags = relocated ? RR_RELOC_DE : 0; int result = parse_rock_ridge_inode_internal(de, inode, flags); /* * if rockridge flag was reset and we didn't look for attributes * behind eventual XA attributes, have a look there */ if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1) && (ISOFS_SB(inode->i_sb)->s_rock == 2)) { result = parse_rock_ridge_inode_internal(de, inode, flags | RR_REGARD_XA); } return result; } /* * read_folio() for symlinks: reads symlink contents into the folio and either * makes it uptodate and returns 0 or returns error (-EIO) */ static int rock_ridge_symlink_read_folio(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; struct iso_inode_info *ei = ISOFS_I(inode); struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); char *link = folio_address(folio); unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); struct buffer_head *bh; char *rpnt = link; unsigned char *pnt; struct iso_directory_record *raw_de; unsigned long block, offset; int sig; struct rock_ridge *rr; struct rock_state rs; int ret; if (!sbi->s_rock) goto error; init_rock_state(&rs, inode); block = ei->i_iget5_block; bh = sb_bread(inode->i_sb, block); if (!bh) goto out_noread; offset = ei->i_iget5_offset; pnt = (unsigned char *)bh->b_data + offset; raw_de = (struct iso_directory_record *)pnt; /* * If we go past the end of the buffer, there is some sort of error. */ if (offset + *pnt > bufsize) goto out_bad_span; /* * Now test for possible Rock Ridge extensions which will override * some of these numbers in the inode structure. */ setup_rock_ridge(raw_de, inode, &rs); repeat: while (rs.len > 2) { /* There may be one byte for padding somewhere */ rr = (struct rock_ridge *)rs.chr; if (rr->len < 3) goto out; /* Something got screwed up here */ sig = isonum_721(rs.chr); if (rock_check_overflow(&rs, sig)) goto out; rs.chr += rr->len; rs.len -= rr->len; if (rs.len < 0) goto out; /* corrupted isofs */ switch (sig) { case SIG('R', 'R'): if ((rr->u.RR.flags[0] & RR_SL) == 0) goto out; break; case SIG('S', 'P'): if (check_sp(rr, inode)) goto out; break; case SIG('S', 'L'): rpnt = get_symlink_chunk(rpnt, rr, link + (PAGE_SIZE - 1)); if (rpnt == NULL) goto out; break; case SIG('C', 'E'): /* This tells is if there is a continuation record */ rs.cont_extent = isonum_733(rr->u.CE.extent); rs.cont_offset = isonum_733(rr->u.CE.offset); rs.cont_size = isonum_733(rr->u.CE.size); break; default: break; } } ret = rock_continue(&rs); if (ret == 0) goto repeat; if (ret < 0) goto fail; if (rpnt == link) goto fail; brelse(bh); *rpnt = '\0'; ret = 0; end: folio_end_read(folio, ret == 0); return ret; /* error exit from macro */ out: kfree(rs.buffer); goto fail; out_noread: printk("unable to read i-node block"); goto fail; out_bad_span: printk("symlink spans iso9660 blocks\n"); fail: brelse(bh); error: ret = -EIO; goto end; } const struct address_space_operations isofs_symlink_aops = { .read_folio = rock_ridge_symlink_read_folio }; |
| 143 125 141 141 12 4 36 13 20 15 9 9 9 1 128 128 117 132 4 140 137 4 51 3 39 21 142 135 2 34 7 9 13 1 8 42 43 2 4 139 142 3 143 139 137 35 7 50 129 139 6 115 23 121 11 2 11 125 126 36 36 18 2 25 18 13 13 2 1 12 12 1 5 1 4 4 6 4 1 2 2 1 25 25 15 18 25 2 6 2 2 2 1 2 1 1 1 1 1 5 4 1 5 4 4 1 3 3 2 2 14 4 4 4 13 21 13 3 25 8 6 1 5 1 6 5 4 3 3 25 21 13 24 21 13 16 17 25 25 25 25 9 8 7 6 8 1 4 1 1 1 1 1 3 2 1 4 4 4 11 3 3 8 8 1 3 8 4 7 5 9 9 8 4 11 11 1 11 11 9 13 13 13 11 3 2 9 2 9 10 5 2 3 3 5 5 12 2 1 5 2 6 8 13 13 13 13 8 8 8 8 5 3 8 5 5 5 3 2 2 11 14 6 8 14 13 10 10 3 9 7 8 8 7 8 8 1 4 10 10 8 7 1 4 8 10 12 8 4 4 10 12 12 12 12 12 12 10 10 10 10 10 10 31 18 18 18 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 | // SPDX-License-Identifier: GPL-2.0+ /* * NILFS B-tree. * * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. * * Written by Koji Sato. */ #include <linux/slab.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/pagevec.h> #include "nilfs.h" #include "page.h" #include "btnode.h" #include "btree.h" #include "alloc.h" #include "dat.h" static void __nilfs_btree_init(struct nilfs_bmap *bmap); static struct nilfs_btree_path *nilfs_btree_alloc_path(void) { struct nilfs_btree_path *path; int level = NILFS_BTREE_LEVEL_DATA; path = kmem_cache_alloc(nilfs_btree_path_cache, GFP_NOFS); if (path == NULL) goto out; for (; level < NILFS_BTREE_LEVEL_MAX; level++) { path[level].bp_bh = NULL; path[level].bp_sib_bh = NULL; path[level].bp_index = 0; path[level].bp_oldreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; path[level].bp_newreq.bpr_ptr = NILFS_BMAP_INVALID_PTR; path[level].bp_op = NULL; } out: return path; } static void nilfs_btree_free_path(struct nilfs_btree_path *path) { int level = NILFS_BTREE_LEVEL_DATA; for (; level < NILFS_BTREE_LEVEL_MAX; level++) brelse(path[level].bp_bh); kmem_cache_free(nilfs_btree_path_cache, path); } /* * B-tree node operations */ static int nilfs_btree_get_new_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh; bh = nilfs_btnode_create_block(btnc, ptr); if (IS_ERR(bh)) return PTR_ERR(bh); set_buffer_nilfs_volatile(bh); *bhp = bh; return 0; } static int nilfs_btree_node_get_flags(const struct nilfs_btree_node *node) { return node->bn_flags; } static void nilfs_btree_node_set_flags(struct nilfs_btree_node *node, int flags) { node->bn_flags = flags; } static int nilfs_btree_node_root(const struct nilfs_btree_node *node) { return nilfs_btree_node_get_flags(node) & NILFS_BTREE_NODE_ROOT; } static int nilfs_btree_node_get_level(const struct nilfs_btree_node *node) { return node->bn_level; } static void nilfs_btree_node_set_level(struct nilfs_btree_node *node, int level) { node->bn_level = level; } static int nilfs_btree_node_get_nchildren(const struct nilfs_btree_node *node) { return le16_to_cpu(node->bn_nchildren); } static void nilfs_btree_node_set_nchildren(struct nilfs_btree_node *node, int nchildren) { node->bn_nchildren = cpu_to_le16(nchildren); } static int nilfs_btree_node_size(const struct nilfs_bmap *btree) { return i_blocksize(btree->b_inode); } static int nilfs_btree_nchildren_per_block(const struct nilfs_bmap *btree) { return btree->b_nchildren_per_block; } static __le64 * nilfs_btree_node_dkeys(const struct nilfs_btree_node *node) { return (__le64 *)((char *)(node + 1) + (nilfs_btree_node_root(node) ? 0 : NILFS_BTREE_NODE_EXTRA_PAD_SIZE)); } static __le64 * nilfs_btree_node_dptrs(const struct nilfs_btree_node *node, int ncmax) { return (__le64 *)(nilfs_btree_node_dkeys(node) + ncmax); } static __u64 nilfs_btree_node_get_key(const struct nilfs_btree_node *node, int index) { return le64_to_cpu(*(nilfs_btree_node_dkeys(node) + index)); } static void nilfs_btree_node_set_key(struct nilfs_btree_node *node, int index, __u64 key) { *(nilfs_btree_node_dkeys(node) + index) = cpu_to_le64(key); } static __u64 nilfs_btree_node_get_ptr(const struct nilfs_btree_node *node, int index, int ncmax) { return le64_to_cpu(*(nilfs_btree_node_dptrs(node, ncmax) + index)); } static void nilfs_btree_node_set_ptr(struct nilfs_btree_node *node, int index, __u64 ptr, int ncmax) { *(nilfs_btree_node_dptrs(node, ncmax) + index) = cpu_to_le64(ptr); } static void nilfs_btree_node_init(struct nilfs_btree_node *node, int flags, int level, int nchildren, int ncmax, const __u64 *keys, const __u64 *ptrs) { __le64 *dkeys; __le64 *dptrs; int i; nilfs_btree_node_set_flags(node, flags); nilfs_btree_node_set_level(node, level); nilfs_btree_node_set_nchildren(node, nchildren); dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); for (i = 0; i < nchildren; i++) { dkeys[i] = cpu_to_le64(keys[i]); dptrs[i] = cpu_to_le64(ptrs[i]); } } /* Assume the buffer heads corresponding to left and right are locked. */ static void nilfs_btree_node_move_left(struct nilfs_btree_node *left, struct nilfs_btree_node *right, int n, int lncmax, int rncmax) { __le64 *ldkeys, *rdkeys; __le64 *ldptrs, *rdptrs; int lnchildren, rnchildren; ldkeys = nilfs_btree_node_dkeys(left); ldptrs = nilfs_btree_node_dptrs(left, lncmax); lnchildren = nilfs_btree_node_get_nchildren(left); rdkeys = nilfs_btree_node_dkeys(right); rdptrs = nilfs_btree_node_dptrs(right, rncmax); rnchildren = nilfs_btree_node_get_nchildren(right); memcpy(ldkeys + lnchildren, rdkeys, n * sizeof(*rdkeys)); memcpy(ldptrs + lnchildren, rdptrs, n * sizeof(*rdptrs)); memmove(rdkeys, rdkeys + n, (rnchildren - n) * sizeof(*rdkeys)); memmove(rdptrs, rdptrs + n, (rnchildren - n) * sizeof(*rdptrs)); lnchildren += n; rnchildren -= n; nilfs_btree_node_set_nchildren(left, lnchildren); nilfs_btree_node_set_nchildren(right, rnchildren); } /* Assume that the buffer heads corresponding to left and right are locked. */ static void nilfs_btree_node_move_right(struct nilfs_btree_node *left, struct nilfs_btree_node *right, int n, int lncmax, int rncmax) { __le64 *ldkeys, *rdkeys; __le64 *ldptrs, *rdptrs; int lnchildren, rnchildren; ldkeys = nilfs_btree_node_dkeys(left); ldptrs = nilfs_btree_node_dptrs(left, lncmax); lnchildren = nilfs_btree_node_get_nchildren(left); rdkeys = nilfs_btree_node_dkeys(right); rdptrs = nilfs_btree_node_dptrs(right, rncmax); rnchildren = nilfs_btree_node_get_nchildren(right); memmove(rdkeys + n, rdkeys, rnchildren * sizeof(*rdkeys)); memmove(rdptrs + n, rdptrs, rnchildren * sizeof(*rdptrs)); memcpy(rdkeys, ldkeys + lnchildren - n, n * sizeof(*rdkeys)); memcpy(rdptrs, ldptrs + lnchildren - n, n * sizeof(*rdptrs)); lnchildren -= n; rnchildren += n; nilfs_btree_node_set_nchildren(left, lnchildren); nilfs_btree_node_set_nchildren(right, rnchildren); } /* Assume that the buffer head corresponding to node is locked. */ static void nilfs_btree_node_insert(struct nilfs_btree_node *node, int index, __u64 key, __u64 ptr, int ncmax) { __le64 *dkeys; __le64 *dptrs; int nchildren; dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); nchildren = nilfs_btree_node_get_nchildren(node); if (index < nchildren) { memmove(dkeys + index + 1, dkeys + index, (nchildren - index) * sizeof(*dkeys)); memmove(dptrs + index + 1, dptrs + index, (nchildren - index) * sizeof(*dptrs)); } dkeys[index] = cpu_to_le64(key); dptrs[index] = cpu_to_le64(ptr); nchildren++; nilfs_btree_node_set_nchildren(node, nchildren); } /* Assume that the buffer head corresponding to node is locked. */ static void nilfs_btree_node_delete(struct nilfs_btree_node *node, int index, __u64 *keyp, __u64 *ptrp, int ncmax) { __u64 key; __u64 ptr; __le64 *dkeys; __le64 *dptrs; int nchildren; dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); key = le64_to_cpu(dkeys[index]); ptr = le64_to_cpu(dptrs[index]); nchildren = nilfs_btree_node_get_nchildren(node); if (keyp != NULL) *keyp = key; if (ptrp != NULL) *ptrp = ptr; if (index < nchildren - 1) { memmove(dkeys + index, dkeys + index + 1, (nchildren - index - 1) * sizeof(*dkeys)); memmove(dptrs + index, dptrs + index + 1, (nchildren - index - 1) * sizeof(*dptrs)); } nchildren--; nilfs_btree_node_set_nchildren(node, nchildren); } static int nilfs_btree_node_lookup(const struct nilfs_btree_node *node, __u64 key, int *indexp) { __u64 nkey; int index, low, high, s; /* binary search */ low = 0; high = nilfs_btree_node_get_nchildren(node) - 1; index = 0; s = 0; while (low <= high) { index = (low + high) / 2; nkey = nilfs_btree_node_get_key(node, index); if (nkey == key) { s = 0; goto out; } else if (nkey < key) { low = index + 1; s = -1; } else { high = index - 1; s = 1; } } /* adjust index */ if (nilfs_btree_node_get_level(node) > NILFS_BTREE_LEVEL_NODE_MIN) { if (s > 0 && index > 0) index--; } else if (s < 0) index++; out: *indexp = index; return s == 0; } /** * nilfs_btree_node_broken - verify consistency of btree node * @node: btree node block to be examined * @size: node size (in bytes) * @inode: host inode of btree * @blocknr: block number * * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. */ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node, size_t size, struct inode *inode, sector_t blocknr) { int level, flags, nchildren; int ret = 0; level = nilfs_btree_node_get_level(node); flags = nilfs_btree_node_get_flags(node); nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || (flags & NILFS_BTREE_NODE_ROOT) || nchildren < 0 || nchildren > NILFS_BTREE_NODE_NCHILDREN_MAX(size))) { nilfs_crit(inode->i_sb, "bad btree node (ino=%lu, blocknr=%llu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, (unsigned long long)blocknr, level, flags, nchildren); ret = 1; } return ret; } /** * nilfs_btree_root_broken - verify consistency of btree root node * @node: btree root node to be examined * @inode: host inode of btree * * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned. */ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, struct inode *inode) { int level, flags, nchildren; int ret = 0; level = nilfs_btree_node_get_level(node); flags = nilfs_btree_node_get_flags(node); nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { nilfs_crit(inode->i_sb, "bad btree root (ino=%lu): level = %d, flags = 0x%x, nchildren = %d", inode->i_ino, level, flags, nchildren); ret = 1; } return ret; } int nilfs_btree_broken_node_block(struct buffer_head *bh) { struct inode *inode; int ret; if (buffer_nilfs_checked(bh)) return 0; inode = bh->b_folio->mapping->host; ret = nilfs_btree_node_broken((struct nilfs_btree_node *)bh->b_data, bh->b_size, inode, bh->b_blocknr); if (likely(!ret)) set_buffer_nilfs_checked(bh); return ret; } static struct nilfs_btree_node * nilfs_btree_get_root(const struct nilfs_bmap *btree) { return (struct nilfs_btree_node *)btree->b_u.u_data; } static struct nilfs_btree_node * nilfs_btree_get_nonroot_node(const struct nilfs_btree_path *path, int level) { return (struct nilfs_btree_node *)path[level].bp_bh->b_data; } static struct nilfs_btree_node * nilfs_btree_get_sib_node(const struct nilfs_btree_path *path, int level) { return (struct nilfs_btree_node *)path[level].bp_sib_bh->b_data; } static int nilfs_btree_height(const struct nilfs_bmap *btree) { return nilfs_btree_node_get_level(nilfs_btree_get_root(btree)) + 1; } static struct nilfs_btree_node * nilfs_btree_get_node(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, int level, int *ncmaxp) { struct nilfs_btree_node *node; if (level == nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_root(btree); *ncmaxp = NILFS_BTREE_ROOT_NCHILDREN_MAX; } else { node = nilfs_btree_get_nonroot_node(path, level); *ncmaxp = nilfs_btree_nchildren_per_block(btree); } return node; } static int nilfs_btree_bad_node(const struct nilfs_bmap *btree, struct nilfs_btree_node *node, int level) { if (unlikely(nilfs_btree_node_get_level(node) != level)) { dump_stack(); nilfs_crit(btree->b_inode->i_sb, "btree level mismatch (ino=%lu): %d != %d", btree->b_inode->i_ino, nilfs_btree_node_get_level(node), level); return 1; } return 0; } struct nilfs_btree_readahead_info { struct nilfs_btree_node *node; /* parent node */ int max_ra_blocks; /* max nof blocks to read ahead */ int index; /* current index on the parent node */ int ncmax; /* nof children in the parent node */ }; static int __nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp, const struct nilfs_btree_readahead_info *ra) { struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; struct address_space *btnc = btnc_inode->i_mapping; struct buffer_head *bh, *ra_bh; sector_t submit_ptr = 0; int ret; ret = nilfs_btnode_submit_block(btnc, ptr, 0, REQ_OP_READ, &bh, &submit_ptr); if (ret) { if (likely(ret == -EEXIST)) goto out_check; if (ret == -ENOENT) { /* * Block address translation failed due to invalid * value of 'ptr'. In this case, return internal code * -EINVAL (broken bmap) to notify bmap layer of fatal * metadata corruption. */ ret = -EINVAL; } return ret; } if (ra) { int i, n; __u64 ptr2; /* read ahead sibling nodes */ for (n = ra->max_ra_blocks, i = ra->index + 1; n > 0 && i < ra->ncmax; n--, i++) { ptr2 = nilfs_btree_node_get_ptr(ra->node, i, ra->ncmax); ret = nilfs_btnode_submit_block(btnc, ptr2, 0, REQ_OP_READ | REQ_RAHEAD, &ra_bh, &submit_ptr); if (likely(!ret || ret == -EEXIST)) brelse(ra_bh); else if (ret != -EBUSY) break; if (!buffer_locked(bh)) goto out_no_wait; } } wait_on_buffer(bh); out_no_wait: if (!buffer_uptodate(bh)) { nilfs_err(btree->b_inode->i_sb, "I/O error reading b-tree node block (ino=%lu, blocknr=%llu)", btree->b_inode->i_ino, (unsigned long long)ptr); brelse(bh); return -EIO; } out_check: if (nilfs_btree_broken_node_block(bh)) { clear_buffer_uptodate(bh); brelse(bh); return -EINVAL; } *bhp = bh; return 0; } static int nilfs_btree_get_block(const struct nilfs_bmap *btree, __u64 ptr, struct buffer_head **bhp) { return __nilfs_btree_get_block(btree, ptr, bhp, NULL); } static int nilfs_btree_do_lookup(const struct nilfs_bmap *btree, struct nilfs_btree_path *path, __u64 key, __u64 *ptrp, int minlevel, int readahead) { struct nilfs_btree_node *node; struct nilfs_btree_readahead_info p, *ra; __u64 ptr; int level, index, found, ncmax, ret; node = nilfs_btree_get_root(btree); level = nilfs_btree_node_get_level(node); if (level < minlevel || nilfs_btree_node_get_nchildren(node) <= 0) return -ENOENT; found = nilfs_btree_node_lookup(node, key, &index); ptr = nilfs_btree_node_get_ptr(node, index, NILFS_BTREE_ROOT_NCHILDREN_MAX); path[level].bp_bh = NULL; path[level].bp_index = index; ncmax = nilfs_btree_nchildren_per_block(btree); while (--level >= minlevel) { ra = NULL; if (level == NILFS_BTREE_LEVEL_NODE_MIN && readahead) { p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); p.index = index; p.max_ra_blocks = 7; ra = &p; } ret = __nilfs_btree_get_block(btree, ptr, &path[level].bp_bh, ra); if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); if (nilfs_btree_bad_node(btree, node, level)) return -EINVAL; if (!found) found = nilfs_btree_node_lookup(node, key, &index); else index = 0; if (index < ncmax) { ptr = nilfs_btree_node_get_ptr(node, index, ncmax); } else { WARN_ON(found || level != NILFS_BTREE_LEVEL_NODE_MIN); /* insert */ ptr = NILFS_BMAP_INVALID_PTR; } path[level].bp_index = index; } if (!found) return -ENOENT; if (ptrp != NULL) *ptrp = ptr; return 0; } static int nilfs_btree_do_lookup_last(const struct nilfs_bmap *btree, struct nilfs_btree_path *path, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; __u64 ptr; int index, level, ncmax, ret; node = nilfs_btree_get_root(btree); index = nilfs_btree_node_get_nchildren(node) - 1; if (index < 0) return -ENOENT; level = nilfs_btree_node_get_level(node); ptr = nilfs_btree_node_get_ptr(node, index, NILFS_BTREE_ROOT_NCHILDREN_MAX); path[level].bp_bh = NULL; path[level].bp_index = index; ncmax = nilfs_btree_nchildren_per_block(btree); for (level--; level > 0; level--) { ret = nilfs_btree_get_block(btree, ptr, &path[level].bp_bh); if (ret < 0) return ret; node = nilfs_btree_get_nonroot_node(path, level); if (nilfs_btree_bad_node(btree, node, level)) return -EINVAL; index = nilfs_btree_node_get_nchildren(node) - 1; ptr = nilfs_btree_node_get_ptr(node, index, ncmax); path[level].bp_index = index; } if (keyp != NULL) *keyp = nilfs_btree_node_get_key(node, index); if (ptrp != NULL) *ptrp = ptr; return 0; } /** * nilfs_btree_get_next_key - get next valid key from btree path array * @btree: bmap struct of btree * @path: array of nilfs_btree_path struct * @minlevel: start level * @nextkey: place to store the next valid key * * Return Value: If a next key was found, 0 is returned. Otherwise, * -ENOENT is returned. */ static int nilfs_btree_get_next_key(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, int minlevel, __u64 *nextkey) { struct nilfs_btree_node *node; int maxlevel = nilfs_btree_height(btree) - 1; int index, next_adj, level; /* Next index is already set to bp_index for leaf nodes. */ next_adj = 0; for (level = minlevel; level <= maxlevel; level++) { if (level == maxlevel) node = nilfs_btree_get_root(btree); else node = nilfs_btree_get_nonroot_node(path, level); index = path[level].bp_index + next_adj; if (index < nilfs_btree_node_get_nchildren(node)) { /* Next key is in this node */ *nextkey = nilfs_btree_node_get_key(node, index); return 0; } /* For non-leaf nodes, next index is stored at bp_index + 1. */ next_adj = 1; } return -ENOENT; } static int nilfs_btree_lookup(const struct nilfs_bmap *btree, __u64 key, int level, __u64 *ptrp) { struct nilfs_btree_path *path; int ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, ptrp, level, 0); nilfs_btree_free_path(path); return ret; } static int nilfs_btree_lookup_contig(const struct nilfs_bmap *btree, __u64 key, __u64 *ptrp, unsigned int maxblocks) { struct nilfs_btree_path *path; struct nilfs_btree_node *node; struct inode *dat = NULL; __u64 ptr, ptr2; sector_t blocknr; int level = NILFS_BTREE_LEVEL_NODE_MIN; int ret, cnt, index, maxlevel, ncmax; struct nilfs_btree_readahead_info p; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level, 1); if (ret < 0) goto out; if (NILFS_BMAP_USE_VBN(btree)) { dat = nilfs_bmap_get_dat(btree); ret = nilfs_dat_translate(dat, ptr, &blocknr); if (ret < 0) goto dat_error; ptr = blocknr; } cnt = 1; if (cnt == maxblocks) goto end; maxlevel = nilfs_btree_height(btree) - 1; node = nilfs_btree_get_node(btree, path, level, &ncmax); index = path[level].bp_index + 1; for (;;) { while (index < nilfs_btree_node_get_nchildren(node)) { if (nilfs_btree_node_get_key(node, index) != key + cnt) goto end; ptr2 = nilfs_btree_node_get_ptr(node, index, ncmax); if (dat) { ret = nilfs_dat_translate(dat, ptr2, &blocknr); if (ret < 0) goto dat_error; ptr2 = blocknr; } if (ptr2 != ptr + cnt || ++cnt == maxblocks) goto end; index++; } if (level == maxlevel) break; /* look-up right sibling node */ p.node = nilfs_btree_get_node(btree, path, level + 1, &p.ncmax); p.index = path[level + 1].bp_index + 1; p.max_ra_blocks = 7; if (p.index >= nilfs_btree_node_get_nchildren(p.node) || nilfs_btree_node_get_key(p.node, p.index) != key + cnt) break; ptr2 = nilfs_btree_node_get_ptr(p.node, p.index, p.ncmax); path[level + 1].bp_index = p.index; brelse(path[level].bp_bh); path[level].bp_bh = NULL; ret = __nilfs_btree_get_block(btree, ptr2, &path[level].bp_bh, &p); if (ret < 0) goto out; node = nilfs_btree_get_nonroot_node(path, level); ncmax = nilfs_btree_nchildren_per_block(btree); index = 0; path[level].bp_index = index; } end: *ptrp = ptr; ret = cnt; out: nilfs_btree_free_path(path); return ret; dat_error: if (ret == -ENOENT) ret = -EINVAL; /* Notify bmap layer of metadata corruption */ goto out; } static void nilfs_btree_promote_key(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 key) { if (level < nilfs_btree_height(btree) - 1) { do { nilfs_btree_node_set_key( nilfs_btree_get_nonroot_node(path, level), path[level].bp_index, key); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); } while ((path[level].bp_index == 0) && (++level < nilfs_btree_height(btree) - 1)); } /* root */ if (level == nilfs_btree_height(btree) - 1) { nilfs_btree_node_set_key(nilfs_btree_get_root(btree), path[level].bp_index, key); } } static void nilfs_btree_do_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; int ncblk; if (level < nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_nonroot_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_insert(node, path[level].bp_index, *keyp, *ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); } else { node = nilfs_btree_get_root(btree); nilfs_btree_node_insert(node, path[level].bp_index, *keyp, *ptrp, NILFS_BTREE_ROOT_NCHILDREN_MAX); } } static void nilfs_btree_carry_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; int nchildren, lnchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); lnchildren = nilfs_btree_node_get_nchildren(left); ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + lnchildren + 1) / 2 - lnchildren; if (n > path[level].bp_index) { /* move insert point */ n--; move = 1; } nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); if (move) { brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += lnchildren; path[level + 1].bp_index--; } else { brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index -= n; } nilfs_btree_do_insert(btree, path, level, keyp, ptrp); } static void nilfs_btree_carry_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int nchildren, rnchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); rnchildren = nilfs_btree_node_get_nchildren(right); ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + rnchildren + 1) / 2 - rnchildren; if (n > nchildren - path[level].bp_index) { /* move insert point */ n--; move = 1; } nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(right, 0)); path[level + 1].bp_index--; if (move) { brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index -= nilfs_btree_node_get_nchildren(node); path[level + 1].bp_index++; } else { brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } nilfs_btree_do_insert(btree, path, level, keyp, ptrp); } static void nilfs_btree_split(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int nchildren, n, move, ncblk; node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); ncblk = nilfs_btree_nchildren_per_block(btree); move = 0; n = (nchildren + 1) / 2; if (n > nchildren - path[level].bp_index) { n--; move = 1; } nilfs_btree_node_move_right(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); if (move) { path[level].bp_index -= nilfs_btree_node_get_nchildren(node); nilfs_btree_node_insert(right, path[level].bp_index, *keyp, *ptrp, ncblk); *keyp = nilfs_btree_node_get_key(right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; brelse(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; } else { nilfs_btree_do_insert(btree, path, level, keyp, ptrp); *keyp = nilfs_btree_node_get_key(right, 0); *ptrp = path[level].bp_newreq.bpr_ptr; brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } path[level + 1].bp_index++; } static void nilfs_btree_grow(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *root, *child; int n, ncblk; root = nilfs_btree_get_root(btree); child = nilfs_btree_get_sib_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(root); nilfs_btree_node_move_right(root, child, n, NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); nilfs_btree_node_set_level(root, level + 1); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; nilfs_btree_do_insert(btree, path, level, keyp, ptrp); *keyp = nilfs_btree_node_get_key(child, 0); *ptrp = path[level].bp_newreq.bpr_ptr; } static __u64 nilfs_btree_find_near(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path) { struct nilfs_btree_node *node; int level, ncmax; if (path == NULL) return NILFS_BMAP_INVALID_PTR; /* left sibling */ level = NILFS_BTREE_LEVEL_NODE_MIN; if (path[level].bp_index > 0) { node = nilfs_btree_get_node(btree, path, level, &ncmax); return nilfs_btree_node_get_ptr(node, path[level].bp_index - 1, ncmax); } /* parent */ level = NILFS_BTREE_LEVEL_NODE_MIN + 1; if (level <= nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_node(btree, path, level, &ncmax); return nilfs_btree_node_get_ptr(node, path[level].bp_index, ncmax); } return NILFS_BMAP_INVALID_PTR; } static __u64 nilfs_btree_find_target_v(const struct nilfs_bmap *btree, const struct nilfs_btree_path *path, __u64 key) { __u64 ptr; ptr = nilfs_bmap_find_target_seq(btree, key); if (ptr != NILFS_BMAP_INVALID_PTR) /* sequential access */ return ptr; ptr = nilfs_btree_find_near(btree, path); if (ptr != NILFS_BMAP_INVALID_PTR) /* near */ return ptr; /* block group */ return nilfs_bmap_find_target_in_group(btree); } static int nilfs_btree_prepare_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int *levelp, __u64 key, __u64 ptr, struct nilfs_bmap_stats *stats) { struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; int pindex, level, ncmax, ncblk, ret; struct inode *dat = NULL; stats->bs_nblocks = 0; level = NILFS_BTREE_LEVEL_DATA; /* allocate a new ptr for data block */ if (NILFS_BMAP_USE_VBN(btree)) { path[level].bp_newreq.bpr_ptr = nilfs_btree_find_target_v(btree, path, key); dat = nilfs_bmap_get_dat(btree); } ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_data; ncblk = nilfs_btree_nchildren_per_block(btree); for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < nilfs_btree_height(btree) - 1; level++) { node = nilfs_btree_get_nonroot_node(path, level); if (nilfs_btree_node_get_nchildren(node) < ncblk) { path[level].bp_op = nilfs_btree_do_insert; stats->bs_nblocks++; goto out; } parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; /* left sibling */ if (pindex > 0) { sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) < ncblk) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_carry_left; stats->bs_nblocks++; goto out; } else { brelse(bh); } } /* right sibling */ if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_child_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) < ncblk) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_carry_right; stats->bs_nblocks++; goto out; } else { brelse(bh); } } /* split */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_child_node; ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, &bh); if (ret < 0) goto err_out_curr_node; stats->bs_nblocks++; sib = (struct nilfs_btree_node *)bh->b_data; nilfs_btree_node_init(sib, 0, level, 0, ncblk, NULL, NULL); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_split; } /* root */ node = nilfs_btree_get_root(btree); if (nilfs_btree_node_get_nchildren(node) < NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_do_insert; stats->bs_nblocks++; goto out; } /* grow */ path[level].bp_newreq.bpr_ptr = path[level - 1].bp_newreq.bpr_ptr + 1; ret = nilfs_bmap_prepare_alloc_ptr(btree, &path[level].bp_newreq, dat); if (ret < 0) goto err_out_child_node; ret = nilfs_btree_get_new_block(btree, path[level].bp_newreq.bpr_ptr, &bh); if (ret < 0) goto err_out_curr_node; nilfs_btree_node_init((struct nilfs_btree_node *)bh->b_data, 0, level, 0, ncblk, NULL, NULL); path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_grow; level++; path[level].bp_op = nilfs_btree_do_insert; /* a newly-created node block and a data block are added */ stats->bs_nblocks += 2; /* success */ out: *levelp = level; return ret; /* error */ err_out_curr_node: nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); err_out_child_node: for (level--; level > NILFS_BTREE_LEVEL_DATA; level--) { nilfs_btnode_delete(path[level].bp_sib_bh); nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); } nilfs_bmap_abort_alloc_ptr(btree, &path[level].bp_newreq, dat); err_out_data: *levelp = level; stats->bs_nblocks = 0; return ret; } static void nilfs_btree_commit_insert(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int maxlevel, __u64 key, __u64 ptr) { struct inode *dat = NULL; int level; set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); ptr = path[NILFS_BTREE_LEVEL_DATA].bp_newreq.bpr_ptr; if (NILFS_BMAP_USE_VBN(btree)) { nilfs_bmap_set_target_v(btree, key, ptr); dat = nilfs_bmap_get_dat(btree); } for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { nilfs_bmap_commit_alloc_ptr(btree, &path[level - 1].bp_newreq, dat); path[level].bp_op(btree, path, level, &key, &ptr); } if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); } static int nilfs_btree_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr) { struct nilfs_btree_path *path; struct nilfs_bmap_stats stats; int level, ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, NULL, NILFS_BTREE_LEVEL_NODE_MIN, 0); if (ret != -ENOENT) { if (ret == 0) ret = -EEXIST; goto out; } ret = nilfs_btree_prepare_insert(btree, path, &level, key, ptr, &stats); if (ret < 0) goto out; nilfs_btree_commit_insert(btree, path, level, key, ptr); nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); out: nilfs_btree_free_path(path); return ret; } static void nilfs_btree_do_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node; int ncblk; if (level < nilfs_btree_height(btree) - 1) { node = nilfs_btree_get_nonroot_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_delete(node, path[level].bp_index, keyp, ptrp, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (path[level].bp_index == 0) nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); } else { node = nilfs_btree_get_root(btree); nilfs_btree_node_delete(node, path[level].bp_index, keyp, ptrp, NILFS_BTREE_ROOT_NCHILDREN_MAX); } } static void nilfs_btree_borrow_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; int nchildren, lnchildren, n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); lnchildren = nilfs_btree_node_get_nchildren(left); ncblk = nilfs_btree_nchildren_per_block(btree); n = (nchildren + lnchildren) / 2 - nchildren; nilfs_btree_node_move_right(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(node, 0)); brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level].bp_index += n; } static void nilfs_btree_borrow_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int nchildren, rnchildren, n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); nchildren = nilfs_btree_node_get_nchildren(node); rnchildren = nilfs_btree_node_get_nchildren(right); ncblk = nilfs_btree_nchildren_per_block(btree); n = (nchildren + rnchildren) / 2 - nchildren; nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); path[level + 1].bp_index++; nilfs_btree_promote_key(btree, path, level + 1, nilfs_btree_node_get_key(right, 0)); path[level + 1].bp_index--; brelse(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; } static void nilfs_btree_concat_left(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *left; int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); left = nilfs_btree_get_sib_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(node); nilfs_btree_node_move_left(left, node, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_sib_bh)) mark_buffer_dirty(path[level].bp_sib_bh); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = path[level].bp_sib_bh; path[level].bp_sib_bh = NULL; path[level].bp_index += nilfs_btree_node_get_nchildren(left); } static void nilfs_btree_concat_right(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *node, *right; int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); node = nilfs_btree_get_nonroot_node(path, level); right = nilfs_btree_get_sib_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); n = nilfs_btree_node_get_nchildren(right); nilfs_btree_node_move_left(node, right, n, ncblk, ncblk); if (!buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); nilfs_btnode_delete(path[level].bp_sib_bh); path[level].bp_sib_bh = NULL; path[level + 1].bp_index++; } static void nilfs_btree_shrink(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { struct nilfs_btree_node *root, *child; int n, ncblk; nilfs_btree_do_delete(btree, path, level, keyp, ptrp); root = nilfs_btree_get_root(btree); child = nilfs_btree_get_nonroot_node(path, level); ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_delete(root, 0, NULL, NULL, NILFS_BTREE_ROOT_NCHILDREN_MAX); nilfs_btree_node_set_level(root, level); n = nilfs_btree_node_get_nchildren(child); nilfs_btree_node_move_left(root, child, n, NILFS_BTREE_ROOT_NCHILDREN_MAX, ncblk); nilfs_btnode_delete(path[level].bp_bh); path[level].bp_bh = NULL; } static void nilfs_btree_nop(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, __u64 *keyp, __u64 *ptrp) { } static int nilfs_btree_prepare_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int *levelp, struct nilfs_bmap_stats *stats, struct inode *dat) { struct buffer_head *bh; struct nilfs_btree_node *node, *parent, *sib; __u64 sibptr; int pindex, dindex, level, ncmin, ncmax, ncblk, ret; ret = 0; stats->bs_nblocks = 0; ncmin = NILFS_BTREE_NODE_NCHILDREN_MIN(nilfs_btree_node_size(btree)); ncblk = nilfs_btree_nchildren_per_block(btree); for (level = NILFS_BTREE_LEVEL_NODE_MIN, dindex = path[level].bp_index; level < nilfs_btree_height(btree) - 1; level++) { node = nilfs_btree_get_nonroot_node(path, level); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(node, dindex, ncblk); ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); if (ret < 0) goto err_out_child_node; if (nilfs_btree_node_get_nchildren(node) > ncmin) { path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; goto out; } parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); pindex = path[level + 1].bp_index; dindex = pindex; if (pindex > 0) { /* left sibling */ sibptr = nilfs_btree_node_get_ptr(parent, pindex - 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) > ncmin) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_borrow_left; stats->bs_nblocks++; goto out; } else { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_concat_left; stats->bs_nblocks++; /* continue; */ } } else if (pindex < nilfs_btree_node_get_nchildren(parent) - 1) { /* right sibling */ sibptr = nilfs_btree_node_get_ptr(parent, pindex + 1, ncmax); ret = nilfs_btree_get_block(btree, sibptr, &bh); if (ret < 0) goto err_out_curr_node; sib = (struct nilfs_btree_node *)bh->b_data; if (nilfs_btree_node_get_nchildren(sib) > ncmin) { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_borrow_right; stats->bs_nblocks++; goto out; } else { path[level].bp_sib_bh = bh; path[level].bp_op = nilfs_btree_concat_right; stats->bs_nblocks++; /* * When merging right sibling node * into the current node, pointer to * the right sibling node must be * terminated instead. The adjustment * below is required for that. */ dindex = pindex + 1; /* continue; */ } } else { /* no siblings */ /* the only child of the root node */ WARN_ON(level != nilfs_btree_height(btree) - 2); if (nilfs_btree_node_get_nchildren(node) - 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { path[level].bp_op = nilfs_btree_shrink; stats->bs_nblocks += 2; level++; path[level].bp_op = nilfs_btree_nop; goto shrink_root_child; } else { path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; goto out; } } } /* child of the root node is deleted */ path[level].bp_op = nilfs_btree_do_delete; stats->bs_nblocks++; shrink_root_child: node = nilfs_btree_get_root(btree); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(node, dindex, NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_bmap_prepare_end_ptr(btree, &path[level].bp_oldreq, dat); if (ret < 0) goto err_out_child_node; /* success */ out: *levelp = level; return ret; /* error */ err_out_curr_node: nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); err_out_child_node: for (level--; level >= NILFS_BTREE_LEVEL_NODE_MIN; level--) { brelse(path[level].bp_sib_bh); nilfs_bmap_abort_end_ptr(btree, &path[level].bp_oldreq, dat); } *levelp = level; stats->bs_nblocks = 0; return ret; } static void nilfs_btree_commit_delete(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int maxlevel, struct inode *dat) { int level; for (level = NILFS_BTREE_LEVEL_NODE_MIN; level <= maxlevel; level++) { nilfs_bmap_commit_end_ptr(btree, &path[level].bp_oldreq, dat); path[level].bp_op(btree, path, level, NULL, NULL); } if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); } static int nilfs_btree_delete(struct nilfs_bmap *btree, __u64 key) { struct nilfs_btree_path *path; struct nilfs_bmap_stats stats; struct inode *dat; int level, ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, NULL, NILFS_BTREE_LEVEL_NODE_MIN, 0); if (ret < 0) goto out; dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; ret = nilfs_btree_prepare_delete(btree, path, &level, &stats, dat); if (ret < 0) goto out; nilfs_btree_commit_delete(btree, path, level, dat); nilfs_inode_sub_blocks(btree->b_inode, stats.bs_nblocks); out: nilfs_btree_free_path(path); return ret; } static int nilfs_btree_seek_key(const struct nilfs_bmap *btree, __u64 start, __u64 *keyp) { struct nilfs_btree_path *path; const int minlevel = NILFS_BTREE_LEVEL_NODE_MIN; int ret; path = nilfs_btree_alloc_path(); if (!path) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, start, NULL, minlevel, 0); if (!ret) *keyp = start; else if (ret == -ENOENT) ret = nilfs_btree_get_next_key(btree, path, minlevel, keyp); nilfs_btree_free_path(path); return ret; } static int nilfs_btree_last_key(const struct nilfs_bmap *btree, __u64 *keyp) { struct nilfs_btree_path *path; int ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup_last(btree, path, keyp, NULL); nilfs_btree_free_path(path); return ret; } static int nilfs_btree_check_delete(struct nilfs_bmap *btree, __u64 key) { struct buffer_head *bh; struct nilfs_btree_node *root, *node; __u64 maxkey, nextmaxkey; __u64 ptr; int nchildren, ret; root = nilfs_btree_get_root(btree); switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; break; case 3: nchildren = nilfs_btree_node_get_nchildren(root); if (nchildren > 1) return 0; ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; break; default: return 0; } nchildren = nilfs_btree_node_get_nchildren(node); maxkey = nilfs_btree_node_get_key(node, nchildren - 1); nextmaxkey = (nchildren > 1) ? nilfs_btree_node_get_key(node, nchildren - 2) : 0; brelse(bh); return (maxkey == key) && (nextmaxkey < NILFS_BMAP_LARGE_LOW); } static int nilfs_btree_gather_data(struct nilfs_bmap *btree, __u64 *keys, __u64 *ptrs, int nitems) { struct buffer_head *bh; struct nilfs_btree_node *node, *root; __le64 *dkeys; __le64 *dptrs; __u64 ptr; int nchildren, ncmax, i, ret; root = nilfs_btree_get_root(btree); switch (nilfs_btree_height(btree)) { case 2: bh = NULL; node = root; ncmax = NILFS_BTREE_ROOT_NCHILDREN_MAX; break; case 3: nchildren = nilfs_btree_node_get_nchildren(root); WARN_ON(nchildren > 1); ptr = nilfs_btree_node_get_ptr(root, nchildren - 1, NILFS_BTREE_ROOT_NCHILDREN_MAX); ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) return ret; node = (struct nilfs_btree_node *)bh->b_data; ncmax = nilfs_btree_nchildren_per_block(btree); break; default: node = NULL; return -EINVAL; } nchildren = nilfs_btree_node_get_nchildren(node); if (nchildren < nitems) nitems = nchildren; dkeys = nilfs_btree_node_dkeys(node); dptrs = nilfs_btree_node_dptrs(node, ncmax); for (i = 0; i < nitems; i++) { keys[i] = le64_to_cpu(dkeys[i]); ptrs[i] = le64_to_cpu(dptrs[i]); } brelse(bh); return nitems; } static int nilfs_btree_prepare_convert_and_insert(struct nilfs_bmap *btree, __u64 key, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head **bhp, struct nilfs_bmap_stats *stats) { struct buffer_head *bh; struct inode *dat = NULL; int ret; stats->bs_nblocks = 0; /* for data */ /* cannot find near ptr */ if (NILFS_BMAP_USE_VBN(btree)) { dreq->bpr_ptr = nilfs_btree_find_target_v(btree, NULL, key); dat = nilfs_bmap_get_dat(btree); } ret = nilfs_attach_btree_node_cache(&NILFS_BMAP_I(btree)->vfs_inode); if (ret < 0) return ret; ret = nilfs_bmap_prepare_alloc_ptr(btree, dreq, dat); if (ret < 0) return ret; *bhp = NULL; stats->bs_nblocks++; if (nreq != NULL) { nreq->bpr_ptr = dreq->bpr_ptr + 1; ret = nilfs_bmap_prepare_alloc_ptr(btree, nreq, dat); if (ret < 0) goto err_out_dreq; ret = nilfs_btree_get_new_block(btree, nreq->bpr_ptr, &bh); if (ret < 0) goto err_out_nreq; *bhp = bh; stats->bs_nblocks++; } /* success */ return 0; /* error */ err_out_nreq: nilfs_bmap_abort_alloc_ptr(btree, nreq, dat); err_out_dreq: nilfs_bmap_abort_alloc_ptr(btree, dreq, dat); stats->bs_nblocks = 0; return ret; } static void nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n, union nilfs_bmap_ptr_req *dreq, union nilfs_bmap_ptr_req *nreq, struct buffer_head *bh) { struct nilfs_btree_node *node; struct inode *dat; __u64 tmpptr; int ncblk; /* free resources */ if (btree->b_ops->bop_clear != NULL) btree->b_ops->bop_clear(btree); /* ptr must be a pointer to a buffer head. */ set_buffer_nilfs_volatile((struct buffer_head *)((unsigned long)ptr)); /* convert and insert */ dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL; __nilfs_btree_init(btree); if (nreq != NULL) { nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); nilfs_bmap_commit_alloc_ptr(btree, nreq, dat); /* create child node at level 1 */ node = (struct nilfs_btree_node *)bh->b_data; ncblk = nilfs_btree_nchildren_per_block(btree); nilfs_btree_node_init(node, 0, 1, n, ncblk, keys, ptrs); nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, ncblk); if (!buffer_dirty(bh)) mark_buffer_dirty(bh); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); brelse(bh); /* create root node at level 2 */ node = nilfs_btree_get_root(btree); tmpptr = nreq->bpr_ptr; nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 2, 1, NILFS_BTREE_ROOT_NCHILDREN_MAX, &keys[0], &tmpptr); } else { nilfs_bmap_commit_alloc_ptr(btree, dreq, dat); /* create root node at level 1 */ node = nilfs_btree_get_root(btree); nilfs_btree_node_init(node, NILFS_BTREE_NODE_ROOT, 1, n, NILFS_BTREE_ROOT_NCHILDREN_MAX, keys, ptrs); nilfs_btree_node_insert(node, n, key, dreq->bpr_ptr, NILFS_BTREE_ROOT_NCHILDREN_MAX); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); } if (NILFS_BMAP_USE_VBN(btree)) nilfs_bmap_set_target_v(btree, key, dreq->bpr_ptr); } /** * nilfs_btree_convert_and_insert - Convert and insert entries into a B-tree * @btree: NILFS B-tree structure * @key: Key of the new entry to be inserted * @ptr: Pointer (block number) associated with the key to be inserted * @keys: Array of keys to be inserted in addition to @key * @ptrs: Array of pointers associated with @keys * @n: Number of keys and pointers in @keys and @ptrs * * This function is used to insert a new entry specified by @key and @ptr, * along with additional entries specified by @keys and @ptrs arrays, into a * NILFS B-tree. * It prepares the necessary changes by allocating the required blocks and any * necessary intermediate nodes. It converts configurations from other forms of * block mapping (the one that currently exists is direct mapping) to a B-tree. * * Return: 0 on success or a negative error code on failure. */ int nilfs_btree_convert_and_insert(struct nilfs_bmap *btree, __u64 key, __u64 ptr, const __u64 *keys, const __u64 *ptrs, int n) { struct buffer_head *bh = NULL; union nilfs_bmap_ptr_req dreq, nreq, *di, *ni; struct nilfs_bmap_stats stats; int ret; if (n + 1 <= NILFS_BTREE_ROOT_NCHILDREN_MAX) { di = &dreq; ni = NULL; } else if ((n + 1) <= NILFS_BTREE_NODE_NCHILDREN_MAX( nilfs_btree_node_size(btree))) { di = &dreq; ni = &nreq; } else { di = NULL; ni = NULL; BUG(); } ret = nilfs_btree_prepare_convert_and_insert(btree, key, di, ni, &bh, &stats); if (ret < 0) return ret; nilfs_btree_commit_convert_and_insert(btree, key, ptr, keys, ptrs, n, di, ni, bh); nilfs_inode_add_blocks(btree->b_inode, stats.bs_nblocks); return 0; } static int nilfs_btree_propagate_p(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head *bh) { while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) mark_buffer_dirty(path[level].bp_bh); return 0; } static int nilfs_btree_prepare_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { struct nilfs_btree_node *parent; int ncmax, ret; parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); path[level].bp_oldreq.bpr_ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); path[level].bp_newreq.bpr_ptr = path[level].bp_oldreq.bpr_ptr + 1; ret = nilfs_dat_prepare_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req); if (ret < 0) return ret; if (buffer_nilfs_node(path[level].bp_bh)) { path[level].bp_ctxt.oldkey = path[level].bp_oldreq.bpr_ptr; path[level].bp_ctxt.newkey = path[level].bp_newreq.bpr_ptr; path[level].bp_ctxt.bh = path[level].bp_bh; ret = nilfs_btnode_prepare_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) { nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req); return ret; } } return 0; } static void nilfs_btree_commit_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { struct nilfs_btree_node *parent; int ncmax; nilfs_dat_commit_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req, btree->b_ptr_type == NILFS_BMAP_PTR_VS); if (buffer_nilfs_node(path[level].bp_bh)) { nilfs_btnode_commit_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); path[level].bp_bh = path[level].bp_ctxt.bh; } set_buffer_nilfs_volatile(path[level].bp_bh); parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, path[level].bp_newreq.bpr_ptr, ncmax); } static void nilfs_btree_abort_update_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct inode *dat) { nilfs_dat_abort_update(dat, &path[level].bp_oldreq.bpr_req, &path[level].bp_newreq.bpr_req); if (buffer_nilfs_node(path[level].bp_bh)) nilfs_btnode_abort_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); } static int nilfs_btree_prepare_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int minlevel, int *maxlevelp, struct inode *dat) { int level, ret; level = minlevel; if (!buffer_nilfs_volatile(path[level].bp_bh)) { ret = nilfs_btree_prepare_update_v(btree, path, level, dat); if (ret < 0) return ret; } while ((++level < nilfs_btree_height(btree) - 1) && !buffer_dirty(path[level].bp_bh)) { WARN_ON(buffer_nilfs_volatile(path[level].bp_bh)); ret = nilfs_btree_prepare_update_v(btree, path, level, dat); if (ret < 0) goto out; } /* success */ *maxlevelp = level - 1; return 0; /* error */ out: while (--level > minlevel) nilfs_btree_abort_update_v(btree, path, level, dat); if (!buffer_nilfs_volatile(path[level].bp_bh)) nilfs_btree_abort_update_v(btree, path, level, dat); return ret; } static void nilfs_btree_commit_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int minlevel, int maxlevel, struct buffer_head *bh, struct inode *dat) { int level; if (!buffer_nilfs_volatile(path[minlevel].bp_bh)) nilfs_btree_commit_update_v(btree, path, minlevel, dat); for (level = minlevel + 1; level <= maxlevel; level++) nilfs_btree_commit_update_v(btree, path, level, dat); } static int nilfs_btree_propagate_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head *bh) { int maxlevel = 0, ret; struct nilfs_btree_node *parent; struct inode *dat = nilfs_bmap_get_dat(btree); __u64 ptr; int ncmax; get_bh(bh); path[level].bp_bh = bh; ret = nilfs_btree_prepare_propagate_v(btree, path, level, &maxlevel, dat); if (ret < 0) goto out; if (buffer_nilfs_volatile(path[level].bp_bh)) { parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); ret = nilfs_dat_mark_dirty(dat, ptr); if (ret < 0) goto out; } nilfs_btree_commit_propagate_v(btree, path, level, maxlevel, bh, dat); out: brelse(path[level].bp_bh); path[level].bp_bh = NULL; return ret; } static int nilfs_btree_propagate(struct nilfs_bmap *btree, struct buffer_head *bh) { struct nilfs_btree_path *path; struct nilfs_btree_node *node; __u64 key; int level, ret; WARN_ON(!buffer_dirty(bh)); path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; if (buffer_nilfs_node(bh)) { node = (struct nilfs_btree_node *)bh->b_data; key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); } else { key = nilfs_bmap_data_get_key(btree, bh); level = NILFS_BTREE_LEVEL_DATA; } ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { if (unlikely(ret == -ENOENT)) nilfs_crit(btree->b_inode->i_sb, "writing node/leaf block does not appear in b-tree (ino=%lu) at key=%llu, level=%d", btree->b_inode->i_ino, (unsigned long long)key, level); goto out; } ret = NILFS_BMAP_USE_VBN(btree) ? nilfs_btree_propagate_v(btree, path, level, bh) : nilfs_btree_propagate_p(btree, path, level, bh); out: nilfs_btree_free_path(path); return ret; } static int nilfs_btree_propagate_gc(struct nilfs_bmap *btree, struct buffer_head *bh) { return nilfs_dat_mark_dirty(nilfs_bmap_get_dat(btree), bh->b_blocknr); } static void nilfs_btree_add_dirty_buffer(struct nilfs_bmap *btree, struct list_head *lists, struct buffer_head *bh) { struct list_head *head; struct buffer_head *cbh; struct nilfs_btree_node *node, *cnode; __u64 key, ckey; int level; get_bh(bh); node = (struct nilfs_btree_node *)bh->b_data; key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); if (level < NILFS_BTREE_LEVEL_NODE_MIN || level >= NILFS_BTREE_LEVEL_MAX) { dump_stack(); nilfs_warn(btree->b_inode->i_sb, "invalid btree level: %d (key=%llu, ino=%lu, blocknr=%llu)", level, (unsigned long long)key, btree->b_inode->i_ino, (unsigned long long)bh->b_blocknr); return; } list_for_each(head, &lists[level]) { cbh = list_entry(head, struct buffer_head, b_assoc_buffers); cnode = (struct nilfs_btree_node *)cbh->b_data; ckey = nilfs_btree_node_get_key(cnode, 0); if (key < ckey) break; } list_add_tail(&bh->b_assoc_buffers, head); } static void nilfs_btree_lookup_dirty_buffers(struct nilfs_bmap *btree, struct list_head *listp) { struct inode *btnc_inode = NILFS_BMAP_I(btree)->i_assoc_inode; struct address_space *btcache = btnc_inode->i_mapping; struct list_head lists[NILFS_BTREE_LEVEL_MAX]; struct folio_batch fbatch; struct buffer_head *bh, *head; pgoff_t index = 0; int level, i; for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < NILFS_BTREE_LEVEL_MAX; level++) INIT_LIST_HEAD(&lists[level]); folio_batch_init(&fbatch); while (filemap_get_folios_tag(btcache, &index, (pgoff_t)-1, PAGECACHE_TAG_DIRTY, &fbatch)) { for (i = 0; i < folio_batch_count(&fbatch); i++) { bh = head = folio_buffers(fbatch.folios[i]); do { if (buffer_dirty(bh)) nilfs_btree_add_dirty_buffer(btree, lists, bh); } while ((bh = bh->b_this_page) != head); } folio_batch_release(&fbatch); cond_resched(); } for (level = NILFS_BTREE_LEVEL_NODE_MIN; level < NILFS_BTREE_LEVEL_MAX; level++) list_splice_tail(&lists[level], listp); } static int nilfs_btree_assign_p(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_node *parent; __u64 key; __u64 ptr; int ncmax, ret; parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); if (buffer_nilfs_node(*bh)) { path[level].bp_ctxt.oldkey = ptr; path[level].bp_ctxt.newkey = blocknr; path[level].bp_ctxt.bh = *bh; ret = nilfs_btnode_prepare_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); if (ret < 0) return ret; nilfs_btnode_commit_change_key( NILFS_BMAP_I(btree)->i_assoc_inode->i_mapping, &path[level].bp_ctxt); *bh = path[level].bp_ctxt.bh; } nilfs_btree_node_set_ptr(parent, path[level + 1].bp_index, blocknr, ncmax); key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); /* on-disk format */ binfo->bi_dat.bi_blkoff = cpu_to_le64(key); binfo->bi_dat.bi_level = level; memset(binfo->bi_dat.bi_pad, 0, sizeof(binfo->bi_dat.bi_pad)); return 0; } static int nilfs_btree_assign_v(struct nilfs_bmap *btree, struct nilfs_btree_path *path, int level, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_node *parent; struct inode *dat = nilfs_bmap_get_dat(btree); __u64 key; __u64 ptr; union nilfs_bmap_ptr_req req; int ncmax, ret; parent = nilfs_btree_get_node(btree, path, level + 1, &ncmax); ptr = nilfs_btree_node_get_ptr(parent, path[level + 1].bp_index, ncmax); req.bpr_ptr = ptr; ret = nilfs_dat_prepare_start(dat, &req.bpr_req); if (ret < 0) return ret; nilfs_dat_commit_start(dat, &req.bpr_req, blocknr); key = nilfs_btree_node_get_key(parent, path[level + 1].bp_index); /* on-disk format */ binfo->bi_v.bi_vblocknr = cpu_to_le64(ptr); binfo->bi_v.bi_blkoff = cpu_to_le64(key); return 0; } static int nilfs_btree_assign(struct nilfs_bmap *btree, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_path *path; struct nilfs_btree_node *node; __u64 key; int level, ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; if (buffer_nilfs_node(*bh)) { node = (struct nilfs_btree_node *)(*bh)->b_data; key = nilfs_btree_node_get_key(node, 0); level = nilfs_btree_node_get_level(node); } else { key = nilfs_bmap_data_get_key(btree, *bh); level = NILFS_BTREE_LEVEL_DATA; } ret = nilfs_btree_do_lookup(btree, path, key, NULL, level + 1, 0); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } ret = NILFS_BMAP_USE_VBN(btree) ? nilfs_btree_assign_v(btree, path, level, bh, blocknr, binfo) : nilfs_btree_assign_p(btree, path, level, bh, blocknr, binfo); out: nilfs_btree_free_path(path); return ret; } static int nilfs_btree_assign_gc(struct nilfs_bmap *btree, struct buffer_head **bh, sector_t blocknr, union nilfs_binfo *binfo) { struct nilfs_btree_node *node; __u64 key; int ret; ret = nilfs_dat_move(nilfs_bmap_get_dat(btree), (*bh)->b_blocknr, blocknr); if (ret < 0) return ret; if (buffer_nilfs_node(*bh)) { node = (struct nilfs_btree_node *)(*bh)->b_data; key = nilfs_btree_node_get_key(node, 0); } else key = nilfs_bmap_data_get_key(btree, *bh); /* on-disk format */ binfo->bi_v.bi_vblocknr = cpu_to_le64((*bh)->b_blocknr); binfo->bi_v.bi_blkoff = cpu_to_le64(key); return 0; } static int nilfs_btree_mark(struct nilfs_bmap *btree, __u64 key, int level) { struct buffer_head *bh; struct nilfs_btree_path *path; __u64 ptr; int ret; path = nilfs_btree_alloc_path(); if (path == NULL) return -ENOMEM; ret = nilfs_btree_do_lookup(btree, path, key, &ptr, level + 1, 0); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } ret = nilfs_btree_get_block(btree, ptr, &bh); if (ret < 0) { WARN_ON(ret == -ENOENT); goto out; } if (!buffer_dirty(bh)) mark_buffer_dirty(bh); brelse(bh); if (!nilfs_bmap_dirty(btree)) nilfs_bmap_set_dirty(btree); out: nilfs_btree_free_path(path); return ret; } static const struct nilfs_bmap_operations nilfs_btree_ops = { .bop_lookup = nilfs_btree_lookup, .bop_lookup_contig = nilfs_btree_lookup_contig, .bop_insert = nilfs_btree_insert, .bop_delete = nilfs_btree_delete, .bop_clear = NULL, .bop_propagate = nilfs_btree_propagate, .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, .bop_assign = nilfs_btree_assign, .bop_mark = nilfs_btree_mark, .bop_seek_key = nilfs_btree_seek_key, .bop_last_key = nilfs_btree_last_key, .bop_check_insert = NULL, .bop_check_delete = nilfs_btree_check_delete, .bop_gather_data = nilfs_btree_gather_data, }; static const struct nilfs_bmap_operations nilfs_btree_ops_gc = { .bop_lookup = NULL, .bop_lookup_contig = NULL, .bop_insert = NULL, .bop_delete = NULL, .bop_clear = NULL, .bop_propagate = nilfs_btree_propagate_gc, .bop_lookup_dirty_buffers = nilfs_btree_lookup_dirty_buffers, .bop_assign = nilfs_btree_assign_gc, .bop_mark = NULL, .bop_seek_key = NULL, .bop_last_key = NULL, .bop_check_insert = NULL, .bop_check_delete = NULL, .bop_gather_data = NULL, }; static void __nilfs_btree_init(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops; bmap->b_nchildren_per_block = NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); } int nilfs_btree_init(struct nilfs_bmap *bmap) { int ret = 0; __nilfs_btree_init(bmap); if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap), bmap->b_inode)) ret = -EIO; else ret = nilfs_attach_btree_node_cache( &NILFS_BMAP_I(bmap)->vfs_inode); return ret; } void nilfs_btree_init_gc(struct nilfs_bmap *bmap) { bmap->b_ops = &nilfs_btree_ops_gc; bmap->b_nchildren_per_block = NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap)); } |
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 | #ifndef INTERNAL_IO_SLIST_H #define INTERNAL_IO_SLIST_H #include <linux/io_uring_types.h> #define __wq_list_for_each(pos, head) \ for (pos = (head)->first; pos; pos = (pos)->next) #define wq_list_for_each(pos, prv, head) \ for (pos = (head)->first, prv = NULL; pos; prv = pos, pos = (pos)->next) #define wq_list_for_each_resume(pos, prv) \ for (; pos; prv = pos, pos = (pos)->next) #define wq_list_empty(list) (READ_ONCE((list)->first) == NULL) #define INIT_WQ_LIST(list) do { \ (list)->first = NULL; \ } while (0) static inline void wq_list_add_after(struct io_wq_work_node *node, struct io_wq_work_node *pos, struct io_wq_work_list *list) { struct io_wq_work_node *next = pos->next; pos->next = node; node->next = next; if (!next) list->last = node; } static inline void wq_list_add_tail(struct io_wq_work_node *node, struct io_wq_work_list *list) { node->next = NULL; if (!list->first) { list->last = node; WRITE_ONCE(list->first, node); } else { list->last->next = node; list->last = node; } } static inline void wq_list_add_head(struct io_wq_work_node *node, struct io_wq_work_list *list) { node->next = list->first; if (!node->next) list->last = node; WRITE_ONCE(list->first, node); } static inline void wq_list_cut(struct io_wq_work_list *list, struct io_wq_work_node *last, struct io_wq_work_node *prev) { /* first in the list, if prev==NULL */ if (!prev) WRITE_ONCE(list->first, last->next); else prev->next = last->next; if (last == list->last) list->last = prev; last->next = NULL; } static inline void __wq_list_splice(struct io_wq_work_list *list, struct io_wq_work_node *to) { list->last->next = to->next; to->next = list->first; INIT_WQ_LIST(list); } static inline bool wq_list_splice(struct io_wq_work_list *list, struct io_wq_work_node *to) { if (!wq_list_empty(list)) { __wq_list_splice(list, to); return true; } return false; } static inline void wq_stack_add_head(struct io_wq_work_node *node, struct io_wq_work_node *stack) { node->next = stack->next; stack->next = node; } static inline void wq_list_del(struct io_wq_work_list *list, struct io_wq_work_node *node, struct io_wq_work_node *prev) { wq_list_cut(list, node, prev); } static inline struct io_wq_work_node *wq_stack_extract(struct io_wq_work_node *stack) { struct io_wq_work_node *node = stack->next; stack->next = node->next; return node; } static inline struct io_wq_work *wq_next_work(struct io_wq_work *work) { if (!work->list.next) return NULL; return container_of(work->list.next, struct io_wq_work, list); } #endif // INTERNAL_IO_SLIST_H |
| 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 1099 1037 57 7 7 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 | // SPDX-License-Identifier: GPL-2.0-or-later /* * LAPB release 002 * * This code REQUIRES 2.1.15 or higher/ NET3.038 * * History * LAPB 001 Jonathan Naylor Started Coding * LAPB 002 Jonathan Naylor New timer architecture. * 2000-10-29 Henner Eisen lapb_data_indication() return status. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/inet.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/stat.h> #include <linux/init.h> #include <net/lapb.h> static LIST_HEAD(lapb_list); static DEFINE_RWLOCK(lapb_list_lock); /* * Free an allocated lapb control block. */ static void lapb_free_cb(struct lapb_cb *lapb) { kfree(lapb); } static __inline__ void lapb_hold(struct lapb_cb *lapb) { refcount_inc(&lapb->refcnt); } static __inline__ void lapb_put(struct lapb_cb *lapb) { if (refcount_dec_and_test(&lapb->refcnt)) lapb_free_cb(lapb); } /* * Socket removal during an interrupt is now safe. */ static void __lapb_remove_cb(struct lapb_cb *lapb) { if (lapb->node.next) { list_del(&lapb->node); lapb_put(lapb); } } /* * Add a socket to the bound sockets list. */ static void __lapb_insert_cb(struct lapb_cb *lapb) { list_add(&lapb->node, &lapb_list); lapb_hold(lapb); } static struct lapb_cb *__lapb_devtostruct(struct net_device *dev) { struct lapb_cb *lapb, *use = NULL; list_for_each_entry(lapb, &lapb_list, node) { if (lapb->dev == dev) { use = lapb; break; } } if (use) lapb_hold(use); return use; } static struct lapb_cb *lapb_devtostruct(struct net_device *dev) { struct lapb_cb *rc; read_lock_bh(&lapb_list_lock); rc = __lapb_devtostruct(dev); read_unlock_bh(&lapb_list_lock); return rc; } /* * Create an empty LAPB control block. */ static struct lapb_cb *lapb_create_cb(void) { struct lapb_cb *lapb = kzalloc(sizeof(*lapb), GFP_ATOMIC); if (!lapb) goto out; skb_queue_head_init(&lapb->write_queue); skb_queue_head_init(&lapb->ack_queue); timer_setup(&lapb->t1timer, NULL, 0); timer_setup(&lapb->t2timer, NULL, 0); lapb->t1timer_running = false; lapb->t2timer_running = false; lapb->t1 = LAPB_DEFAULT_T1; lapb->t2 = LAPB_DEFAULT_T2; lapb->n2 = LAPB_DEFAULT_N2; lapb->mode = LAPB_DEFAULT_MODE; lapb->window = LAPB_DEFAULT_WINDOW; lapb->state = LAPB_STATE_0; spin_lock_init(&lapb->lock); refcount_set(&lapb->refcnt, 1); out: return lapb; } int lapb_register(struct net_device *dev, const struct lapb_register_struct *callbacks) { struct lapb_cb *lapb; int rc = LAPB_BADTOKEN; write_lock_bh(&lapb_list_lock); lapb = __lapb_devtostruct(dev); if (lapb) { lapb_put(lapb); goto out; } lapb = lapb_create_cb(); rc = LAPB_NOMEM; if (!lapb) goto out; lapb->dev = dev; lapb->callbacks = callbacks; __lapb_insert_cb(lapb); lapb_start_t1timer(lapb); rc = LAPB_OK; out: write_unlock_bh(&lapb_list_lock); return rc; } EXPORT_SYMBOL(lapb_register); int lapb_unregister(struct net_device *dev) { struct lapb_cb *lapb; int rc = LAPB_BADTOKEN; write_lock_bh(&lapb_list_lock); lapb = __lapb_devtostruct(dev); if (!lapb) goto out; lapb_put(lapb); /* Wait for other refs to "lapb" to drop */ while (refcount_read(&lapb->refcnt) > 2) usleep_range(1, 10); spin_lock_bh(&lapb->lock); lapb_stop_t1timer(lapb); lapb_stop_t2timer(lapb); lapb_clear_queues(lapb); spin_unlock_bh(&lapb->lock); /* Wait for running timers to stop */ del_timer_sync(&lapb->t1timer); del_timer_sync(&lapb->t2timer); __lapb_remove_cb(lapb); lapb_put(lapb); rc = LAPB_OK; out: write_unlock_bh(&lapb_list_lock); return rc; } EXPORT_SYMBOL(lapb_unregister); int lapb_getparms(struct net_device *dev, struct lapb_parms_struct *parms) { int rc = LAPB_BADTOKEN; struct lapb_cb *lapb = lapb_devtostruct(dev); if (!lapb) goto out; spin_lock_bh(&lapb->lock); parms->t1 = lapb->t1 / HZ; parms->t2 = lapb->t2 / HZ; parms->n2 = lapb->n2; parms->n2count = lapb->n2count; parms->state = lapb->state; parms->window = lapb->window; parms->mode = lapb->mode; if (!timer_pending(&lapb->t1timer)) parms->t1timer = 0; else parms->t1timer = (lapb->t1timer.expires - jiffies) / HZ; if (!timer_pending(&lapb->t2timer)) parms->t2timer = 0; else parms->t2timer = (lapb->t2timer.expires - jiffies) / HZ; spin_unlock_bh(&lapb->lock); lapb_put(lapb); rc = LAPB_OK; out: return rc; } EXPORT_SYMBOL(lapb_getparms); int lapb_setparms(struct net_device *dev, struct lapb_parms_struct *parms) { int rc = LAPB_BADTOKEN; struct lapb_cb *lapb = lapb_devtostruct(dev); if (!lapb) goto out; spin_lock_bh(&lapb->lock); rc = LAPB_INVALUE; if (parms->t1 < 1 || parms->t2 < 1 || parms->n2 < 1) goto out_put; if (lapb->state == LAPB_STATE_0) { if (parms->mode & LAPB_EXTENDED) { if (parms->window < 1 || parms->window > 127) goto out_put; } else { if (parms->window < 1 || parms->window > 7) goto out_put; } lapb->mode = parms->mode; lapb->window = parms->window; } lapb->t1 = parms->t1 * HZ; lapb->t2 = parms->t2 * HZ; lapb->n2 = parms->n2; rc = LAPB_OK; out_put: spin_unlock_bh(&lapb->lock); lapb_put(lapb); out: return rc; } EXPORT_SYMBOL(lapb_setparms); int lapb_connect_request(struct net_device *dev) { struct lapb_cb *lapb = lapb_devtostruct(dev); int rc = LAPB_BADTOKEN; if (!lapb) goto out; spin_lock_bh(&lapb->lock); rc = LAPB_OK; if (lapb->state == LAPB_STATE_1) goto out_put; rc = LAPB_CONNECTED; if (lapb->state == LAPB_STATE_3 || lapb->state == LAPB_STATE_4) goto out_put; lapb_establish_data_link(lapb); lapb_dbg(0, "(%p) S0 -> S1\n", lapb->dev); lapb->state = LAPB_STATE_1; rc = LAPB_OK; out_put: spin_unlock_bh(&lapb->lock); lapb_put(lapb); out: return rc; } EXPORT_SYMBOL(lapb_connect_request); static int __lapb_disconnect_request(struct lapb_cb *lapb) { switch (lapb->state) { case LAPB_STATE_0: return LAPB_NOTCONNECTED; case LAPB_STATE_1: lapb_dbg(1, "(%p) S1 TX DISC(1)\n", lapb->dev); lapb_dbg(0, "(%p) S1 -> S0\n", lapb->dev); lapb_send_control(lapb, LAPB_DISC, LAPB_POLLON, LAPB_COMMAND); lapb->state = LAPB_STATE_0; lapb_start_t1timer(lapb); return LAPB_NOTCONNECTED; case LAPB_STATE_2: return LAPB_OK; } lapb_clear_queues(lapb); lapb->n2count = 0; lapb_send_control(lapb, LAPB_DISC, LAPB_POLLON, LAPB_COMMAND); lapb_start_t1timer(lapb); lapb_stop_t2timer(lapb); lapb->state = LAPB_STATE_2; lapb_dbg(1, "(%p) S3 DISC(1)\n", lapb->dev); lapb_dbg(0, "(%p) S3 -> S2\n", lapb->dev); return LAPB_OK; } int lapb_disconnect_request(struct net_device *dev) { struct lapb_cb *lapb = lapb_devtostruct(dev); int rc = LAPB_BADTOKEN; if (!lapb) goto out; spin_lock_bh(&lapb->lock); rc = __lapb_disconnect_request(lapb); spin_unlock_bh(&lapb->lock); lapb_put(lapb); out: return rc; } EXPORT_SYMBOL(lapb_disconnect_request); int lapb_data_request(struct net_device *dev, struct sk_buff *skb) { struct lapb_cb *lapb = lapb_devtostruct(dev); int rc = LAPB_BADTOKEN; if (!lapb) goto out; spin_lock_bh(&lapb->lock); rc = LAPB_NOTCONNECTED; if (lapb->state != LAPB_STATE_3 && lapb->state != LAPB_STATE_4) goto out_put; skb_queue_tail(&lapb->write_queue, skb); lapb_kick(lapb); rc = LAPB_OK; out_put: spin_unlock_bh(&lapb->lock); lapb_put(lapb); out: return rc; } EXPORT_SYMBOL(lapb_data_request); int lapb_data_received(struct net_device *dev, struct sk_buff *skb) { struct lapb_cb *lapb = lapb_devtostruct(dev); int rc = LAPB_BADTOKEN; if (lapb) { spin_lock_bh(&lapb->lock); lapb_data_input(lapb, skb); spin_unlock_bh(&lapb->lock); lapb_put(lapb); rc = LAPB_OK; } return rc; } EXPORT_SYMBOL(lapb_data_received); void lapb_connect_confirmation(struct lapb_cb *lapb, int reason) { if (lapb->callbacks->connect_confirmation) lapb->callbacks->connect_confirmation(lapb->dev, reason); } void lapb_connect_indication(struct lapb_cb *lapb, int reason) { if (lapb->callbacks->connect_indication) lapb->callbacks->connect_indication(lapb->dev, reason); } void lapb_disconnect_confirmation(struct lapb_cb *lapb, int reason) { if (lapb->callbacks->disconnect_confirmation) lapb->callbacks->disconnect_confirmation(lapb->dev, reason); } void lapb_disconnect_indication(struct lapb_cb *lapb, int reason) { if (lapb->callbacks->disconnect_indication) lapb->callbacks->disconnect_indication(lapb->dev, reason); } int lapb_data_indication(struct lapb_cb *lapb, struct sk_buff *skb) { if (lapb->callbacks->data_indication) return lapb->callbacks->data_indication(lapb->dev, skb); kfree_skb(skb); return NET_RX_SUCCESS; /* For now; must be != NET_RX_DROP */ } int lapb_data_transmit(struct lapb_cb *lapb, struct sk_buff *skb) { int used = 0; if (lapb->callbacks->data_transmit) { lapb->callbacks->data_transmit(lapb->dev, skb); used = 1; } return used; } /* Handle device status changes. */ static int lapb_device_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct lapb_cb *lapb; if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; if (dev->type != ARPHRD_X25) return NOTIFY_DONE; lapb = lapb_devtostruct(dev); if (!lapb) return NOTIFY_DONE; spin_lock_bh(&lapb->lock); switch (event) { case NETDEV_UP: lapb_dbg(0, "(%p) Interface up: %s\n", dev, dev->name); if (netif_carrier_ok(dev)) { lapb_dbg(0, "(%p): Carrier is already up: %s\n", dev, dev->name); if (lapb->mode & LAPB_DCE) { lapb_start_t1timer(lapb); } else { if (lapb->state == LAPB_STATE_0) { lapb->state = LAPB_STATE_1; lapb_establish_data_link(lapb); } } } break; case NETDEV_GOING_DOWN: if (netif_carrier_ok(dev)) __lapb_disconnect_request(lapb); break; case NETDEV_DOWN: lapb_dbg(0, "(%p) Interface down: %s\n", dev, dev->name); lapb_dbg(0, "(%p) S%d -> S0\n", dev, lapb->state); lapb_clear_queues(lapb); lapb->state = LAPB_STATE_0; lapb->n2count = 0; lapb_stop_t1timer(lapb); lapb_stop_t2timer(lapb); break; case NETDEV_CHANGE: if (netif_carrier_ok(dev)) { lapb_dbg(0, "(%p): Carrier detected: %s\n", dev, dev->name); if (lapb->mode & LAPB_DCE) { lapb_start_t1timer(lapb); } else { if (lapb->state == LAPB_STATE_0) { lapb->state = LAPB_STATE_1; lapb_establish_data_link(lapb); } } } else { lapb_dbg(0, "(%p) Carrier lost: %s\n", dev, dev->name); lapb_dbg(0, "(%p) S%d -> S0\n", dev, lapb->state); lapb_clear_queues(lapb); lapb->state = LAPB_STATE_0; lapb->n2count = 0; lapb_stop_t1timer(lapb); lapb_stop_t2timer(lapb); } break; } spin_unlock_bh(&lapb->lock); lapb_put(lapb); return NOTIFY_DONE; } static struct notifier_block lapb_dev_notifier = { .notifier_call = lapb_device_event, }; static int __init lapb_init(void) { return register_netdevice_notifier(&lapb_dev_notifier); } static void __exit lapb_exit(void) { WARN_ON(!list_empty(&lapb_list)); unregister_netdevice_notifier(&lapb_dev_notifier); } MODULE_AUTHOR("Jonathan Naylor <g4klx@g4klx.demon.co.uk>"); MODULE_DESCRIPTION("The X.25 Link Access Procedure B link layer protocol"); MODULE_LICENSE("GPL"); module_init(lapb_init); module_exit(lapb_exit); |
| 2 2 2 5 6 6 6 3 1 1 1 1 3 3 6 5 6 3 3 1 1 2 41 11 3 2 5 3 2 1 1 2 3 5 5 4 2 6 6 22 4 1 20 12 1 3 4 2 2 2 1 4 8 10 12 11 6 3 9 2 9 7 3 6 3 1 4 6 4 4 2 4 6 1 2 35 35 1 34 1 32 28 1 1 2 2 25 15 13 1 1 1 1 26 3 19 3 21 3 2 2 24 26 6 1 5 5 5 76 7 1 1 1 51 1 1 3 1 22 1 22 35 5 4 3 3 1 6 1 3 1 1 2 1 6 2 1092 1054 58 32 9 2 2 8 1 6 7 2 5 7 7 1 1 1 2 7 3 4 7 6 1 1 1 1 1 2 2 4 1 3 1 3 1 2 4 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 | // SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) /* * bcm.c - Broadcast Manager to filter/send (cyclic) CAN content * * Copyright (c) 2002-2017 Volkswagen Group Electronic Research * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Volkswagen nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * Alternatively, provided that this notice is retained in full, this * software may be distributed under the terms of the GNU General * Public License ("GPL") version 2, in which case the provisions of the * GPL apply INSTEAD OF those given above. * * The provided data structures and external interfaces from this code * are not restricted to be used by modules with a GPL compatible license. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. * */ #include <linux/module.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/hrtimer.h> #include <linux/list.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/uio.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/socket.h> #include <linux/if_arp.h> #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> #include <linux/can/skb.h> #include <linux/can/bcm.h> #include <linux/slab.h> #include <net/sock.h> #include <net/net_namespace.h> /* * To send multiple CAN frame content within TX_SETUP or to filter * CAN messages with multiplex index within RX_SETUP, the number of * different filters is limited to 256 due to the one byte index value. */ #define MAX_NFRAMES 256 /* limit timers to 400 days for sending/timeouts */ #define BCM_TIMER_SEC_MAX (400 * 24 * 60 * 60) /* use of last_frames[index].flags */ #define RX_LOCAL 0x10 /* frame was created on the local host */ #define RX_OWN 0x20 /* frame was sent via the socket it was received on */ #define RX_RECV 0x40 /* received data for this element */ #define RX_THR 0x80 /* element not been sent due to throttle feature */ #define BCM_CAN_FLAGS_MASK 0x0F /* to clean private flags after usage */ /* get best masking value for can_rx_register() for a given single can_id */ #define REGMASK(id) ((id & CAN_EFF_FLAG) ? \ (CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG) : \ (CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG)) MODULE_DESCRIPTION("PF_CAN broadcast manager protocol"); MODULE_LICENSE("Dual BSD/GPL"); MODULE_AUTHOR("Oliver Hartkopp <oliver.hartkopp@volkswagen.de>"); MODULE_ALIAS("can-proto-2"); #define BCM_MIN_NAMELEN CAN_REQUIRED_SIZE(struct sockaddr_can, can_ifindex) /* * easy access to the first 64 bit of can(fd)_frame payload. cp->data is * 64 bit aligned so the offset has to be multiples of 8 which is ensured * by the only callers in bcm_rx_cmp_to_index() bcm_rx_handler(). */ static inline u64 get_u64(const struct canfd_frame *cp, int offset) { return *(u64 *)(cp->data + offset); } struct bcm_op { struct list_head list; struct rcu_head rcu; int ifindex; canid_t can_id; u32 flags; unsigned long frames_abs, frames_filtered; struct bcm_timeval ival1, ival2; struct hrtimer timer, thrtimer; ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg; int rx_ifindex; int cfsiz; u32 count; u32 nframes; u32 currframe; /* void pointers to arrays of struct can[fd]_frame */ void *frames; void *last_frames; struct canfd_frame sframe; struct canfd_frame last_sframe; struct sock *sk; struct net_device *rx_reg_dev; }; struct bcm_sock { struct sock sk; int bound; int ifindex; struct list_head notifier; struct list_head rx_ops; struct list_head tx_ops; unsigned long dropped_usr_msgs; struct proc_dir_entry *bcm_proc_read; char procname [32]; /* inode number in decimal with \0 */ }; static LIST_HEAD(bcm_notifier_list); static DEFINE_SPINLOCK(bcm_notifier_lock); static struct bcm_sock *bcm_busy_notifier; /* Return pointer to store the extra msg flags for bcm_recvmsg(). * We use the space of one unsigned int beyond the 'struct sockaddr_can' * in skb->cb. */ static inline unsigned int *bcm_flags(struct sk_buff *skb) { /* return pointer after struct sockaddr_can */ return (unsigned int *)(&((struct sockaddr_can *)skb->cb)[1]); } static inline struct bcm_sock *bcm_sk(const struct sock *sk) { return (struct bcm_sock *)sk; } static inline ktime_t bcm_timeval_to_ktime(struct bcm_timeval tv) { return ktime_set(tv.tv_sec, tv.tv_usec * NSEC_PER_USEC); } /* check limitations for timeval provided by user */ static bool bcm_is_invalid_tv(struct bcm_msg_head *msg_head) { if ((msg_head->ival1.tv_sec < 0) || (msg_head->ival1.tv_sec > BCM_TIMER_SEC_MAX) || (msg_head->ival1.tv_usec < 0) || (msg_head->ival1.tv_usec >= USEC_PER_SEC) || (msg_head->ival2.tv_sec < 0) || (msg_head->ival2.tv_sec > BCM_TIMER_SEC_MAX) || (msg_head->ival2.tv_usec < 0) || (msg_head->ival2.tv_usec >= USEC_PER_SEC)) return true; return false; } #define CFSIZ(flags) ((flags & CAN_FD_FRAME) ? CANFD_MTU : CAN_MTU) #define OPSIZ sizeof(struct bcm_op) #define MHSIZ sizeof(struct bcm_msg_head) /* * procfs functions */ #if IS_ENABLED(CONFIG_PROC_FS) static char *bcm_proc_getifname(struct net *net, char *result, int ifindex) { struct net_device *dev; if (!ifindex) return "any"; rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifindex); if (dev) strcpy(result, dev->name); else strcpy(result, "???"); rcu_read_unlock(); return result; } static int bcm_proc_show(struct seq_file *m, void *v) { char ifname[IFNAMSIZ]; struct net *net = m->private; struct sock *sk = (struct sock *)pde_data(m->file->f_inode); struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; seq_printf(m, ">>> socket %pK", sk->sk_socket); seq_printf(m, " / sk %pK", sk); seq_printf(m, " / bo %pK", bo); seq_printf(m, " / dropped %lu", bo->dropped_usr_msgs); seq_printf(m, " / bound %s", bcm_proc_getifname(net, ifname, bo->ifindex)); seq_printf(m, " <<<\n"); list_for_each_entry(op, &bo->rx_ops, list) { unsigned long reduction; /* print only active entries & prevent division by zero */ if (!op->frames_abs) continue; seq_printf(m, "rx_op: %03X %-5s ", op->can_id, bcm_proc_getifname(net, ifname, op->ifindex)); if (op->flags & CAN_FD_FRAME) seq_printf(m, "(%u)", op->nframes); else seq_printf(m, "[%u]", op->nframes); seq_printf(m, "%c ", (op->flags & RX_CHECK_DLC) ? 'd' : ' '); if (op->kt_ival1) seq_printf(m, "timeo=%lld ", (long long)ktime_to_us(op->kt_ival1)); if (op->kt_ival2) seq_printf(m, "thr=%lld ", (long long)ktime_to_us(op->kt_ival2)); seq_printf(m, "# recv %ld (%ld) => reduction: ", op->frames_filtered, op->frames_abs); reduction = 100 - (op->frames_filtered * 100) / op->frames_abs; seq_printf(m, "%s%ld%%\n", (reduction == 100) ? "near " : "", reduction); } list_for_each_entry(op, &bo->tx_ops, list) { seq_printf(m, "tx_op: %03X %s ", op->can_id, bcm_proc_getifname(net, ifname, op->ifindex)); if (op->flags & CAN_FD_FRAME) seq_printf(m, "(%u) ", op->nframes); else seq_printf(m, "[%u] ", op->nframes); if (op->kt_ival1) seq_printf(m, "t1=%lld ", (long long)ktime_to_us(op->kt_ival1)); if (op->kt_ival2) seq_printf(m, "t2=%lld ", (long long)ktime_to_us(op->kt_ival2)); seq_printf(m, "# sent %ld\n", op->frames_abs); } seq_putc(m, '\n'); return 0; } #endif /* CONFIG_PROC_FS */ /* * bcm_can_tx - send the (next) CAN frame to the appropriate CAN interface * of the given bcm tx op */ static void bcm_can_tx(struct bcm_op *op) { struct sk_buff *skb; struct net_device *dev; struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe; int err; /* no target device? => exit */ if (!op->ifindex) return; dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (!dev) { /* RFC: should this bcm_op remove itself here? */ return; } skb = alloc_skb(op->cfsiz + sizeof(struct can_skb_priv), gfp_any()); if (!skb) goto out; can_skb_reserve(skb); can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; skb_put_data(skb, cf, op->cfsiz); /* send with loopback */ skb->dev = dev; can_skb_set_owner(skb, op->sk); err = can_send(skb, 1); if (!err) op->frames_abs++; op->currframe++; /* reached last frame? */ if (op->currframe >= op->nframes) op->currframe = 0; out: dev_put(dev); } /* * bcm_send_to_user - send a BCM message to the userspace * (consisting of bcm_msg_head + x CAN frames) */ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, struct canfd_frame *frames, int has_timestamp) { struct sk_buff *skb; struct canfd_frame *firstframe; struct sockaddr_can *addr; struct sock *sk = op->sk; unsigned int datalen = head->nframes * op->cfsiz; int err; unsigned int *pflags; skb = alloc_skb(sizeof(*head) + datalen, gfp_any()); if (!skb) return; skb_put_data(skb, head, sizeof(*head)); /* ensure space for sockaddr_can and msg flags */ sock_skb_cb_check_size(sizeof(struct sockaddr_can) + sizeof(unsigned int)); /* initialize msg flags */ pflags = bcm_flags(skb); *pflags = 0; if (head->nframes) { /* CAN frames starting here */ firstframe = (struct canfd_frame *)skb_tail_pointer(skb); skb_put_data(skb, frames, datalen); /* * the BCM uses the flags-element of the canfd_frame * structure for internal purposes. This is only * relevant for updates that are generated by the * BCM, where nframes is 1 */ if (head->nframes == 1) { if (firstframe->flags & RX_LOCAL) *pflags |= MSG_DONTROUTE; if (firstframe->flags & RX_OWN) *pflags |= MSG_CONFIRM; firstframe->flags &= BCM_CAN_FLAGS_MASK; } } if (has_timestamp) { /* restore rx timestamp */ skb->tstamp = op->rx_stamp; } /* * Put the datagram to the queue so that bcm_recvmsg() can * get it from there. We need to pass the interface index to * bcm_recvmsg(). We pass a whole struct sockaddr_can in skb->cb * containing the interface index. */ addr = (struct sockaddr_can *)skb->cb; memset(addr, 0, sizeof(*addr)); addr->can_family = AF_CAN; addr->can_ifindex = op->rx_ifindex; err = sock_queue_rcv_skb(sk, skb); if (err < 0) { struct bcm_sock *bo = bcm_sk(sk); kfree_skb(skb); /* don't care about overflows in this statistic */ bo->dropped_usr_msgs++; } } static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt) { ktime_t ival; if (op->kt_ival1 && op->count) ival = op->kt_ival1; else if (op->kt_ival2) ival = op->kt_ival2; else return false; hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival)); return true; } static void bcm_tx_start_timer(struct bcm_op *op) { if (bcm_tx_set_expiry(op, &op->timer)) hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT); } /* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */ static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); struct bcm_msg_head msg_head; if (op->kt_ival1 && (op->count > 0)) { op->count--; if (!op->count && (op->flags & TX_COUNTEVT)) { /* create notification to user */ memset(&msg_head, 0, sizeof(msg_head)); msg_head.opcode = TX_EXPIRED; msg_head.flags = op->flags; msg_head.count = op->count; msg_head.ival1 = op->ival1; msg_head.ival2 = op->ival2; msg_head.can_id = op->can_id; msg_head.nframes = 0; bcm_send_to_user(op, &msg_head, NULL, 0); } bcm_can_tx(op); } else if (op->kt_ival2) { bcm_can_tx(op); } return bcm_tx_set_expiry(op, &op->timer) ? HRTIMER_RESTART : HRTIMER_NORESTART; } /* * bcm_rx_changed - create a RX_CHANGED notification due to changed content */ static void bcm_rx_changed(struct bcm_op *op, struct canfd_frame *data) { struct bcm_msg_head head; /* update statistics */ op->frames_filtered++; /* prevent statistics overflow */ if (op->frames_filtered > ULONG_MAX/100) op->frames_filtered = op->frames_abs = 0; /* this element is not throttled anymore */ data->flags &= ~RX_THR; memset(&head, 0, sizeof(head)); head.opcode = RX_CHANGED; head.flags = op->flags; head.count = op->count; head.ival1 = op->ival1; head.ival2 = op->ival2; head.can_id = op->can_id; head.nframes = 1; bcm_send_to_user(op, &head, data, 1); } /* * bcm_rx_update_and_send - process a detected relevant receive content change * 1. update the last received data * 2. send a notification to the user (if possible) */ static void bcm_rx_update_and_send(struct bcm_op *op, struct canfd_frame *lastdata, const struct canfd_frame *rxdata, unsigned char traffic_flags) { memcpy(lastdata, rxdata, op->cfsiz); /* mark as used and throttled by default */ lastdata->flags |= (RX_RECV|RX_THR); /* add own/local/remote traffic flags */ lastdata->flags |= traffic_flags; /* throttling mode inactive ? */ if (!op->kt_ival2) { /* send RX_CHANGED to the user immediately */ bcm_rx_changed(op, lastdata); return; } /* with active throttling timer we are just done here */ if (hrtimer_active(&op->thrtimer)) return; /* first reception with enabled throttling mode */ if (!op->kt_lastmsg) goto rx_changed_settime; /* got a second frame inside a potential throttle period? */ if (ktime_us_delta(ktime_get(), op->kt_lastmsg) < ktime_to_us(op->kt_ival2)) { /* do not send the saved data - only start throttle timer */ hrtimer_start(&op->thrtimer, ktime_add(op->kt_lastmsg, op->kt_ival2), HRTIMER_MODE_ABS_SOFT); return; } /* the gap was that big, that throttling was not needed here */ rx_changed_settime: bcm_rx_changed(op, lastdata); op->kt_lastmsg = ktime_get(); } /* * bcm_rx_cmp_to_index - (bit)compares the currently received data to formerly * received data stored in op->last_frames[] */ static void bcm_rx_cmp_to_index(struct bcm_op *op, unsigned int index, const struct canfd_frame *rxdata, unsigned char traffic_flags) { struct canfd_frame *cf = op->frames + op->cfsiz * index; struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; int i; /* * no one uses the MSBs of flags for comparison, * so we use it here to detect the first time of reception */ if (!(lcf->flags & RX_RECV)) { /* received data for the first time => send update to user */ bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags); return; } /* do a real check in CAN frame data section */ for (i = 0; i < rxdata->len; i += 8) { if ((get_u64(cf, i) & get_u64(rxdata, i)) != (get_u64(cf, i) & get_u64(lcf, i))) { bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags); return; } } if (op->flags & RX_CHECK_DLC) { /* do a real check in CAN frame length */ if (rxdata->len != lcf->len) { bcm_rx_update_and_send(op, lcf, rxdata, traffic_flags); return; } } } /* * bcm_rx_starttimer - enable timeout monitoring for CAN frame reception */ static void bcm_rx_starttimer(struct bcm_op *op) { if (op->flags & RX_NO_AUTOTIMER) return; if (op->kt_ival1) hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT); } /* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer); struct bcm_msg_head msg_head; /* if user wants to be informed, when cyclic CAN-Messages come back */ if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) { /* clear received CAN frames to indicate 'nothing received' */ memset(op->last_frames, 0, op->nframes * op->cfsiz); } /* create notification to user */ memset(&msg_head, 0, sizeof(msg_head)); msg_head.opcode = RX_TIMEOUT; msg_head.flags = op->flags; msg_head.count = op->count; msg_head.ival1 = op->ival1; msg_head.ival2 = op->ival2; msg_head.can_id = op->can_id; msg_head.nframes = 0; bcm_send_to_user(op, &msg_head, NULL, 0); return HRTIMER_NORESTART; } /* * bcm_rx_do_flush - helper for bcm_rx_thr_flush */ static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index) { struct canfd_frame *lcf = op->last_frames + op->cfsiz * index; if ((op->last_frames) && (lcf->flags & RX_THR)) { bcm_rx_changed(op, lcf); return 1; } return 0; } /* * bcm_rx_thr_flush - Check for throttled data and send it to the userspace */ static int bcm_rx_thr_flush(struct bcm_op *op) { int updated = 0; if (op->nframes > 1) { unsigned int i; /* for MUX filter we start at index 1 */ for (i = 1; i < op->nframes; i++) updated += bcm_rx_do_flush(op, i); } else { /* for RX_FILTER_ID and simple filter */ updated += bcm_rx_do_flush(op, 0); } return updated; } /* * bcm_rx_thr_handler - the time for blocked content updates is over now: * Check for throttled data and send it to the userspace */ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer) { struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer); if (bcm_rx_thr_flush(op)) { hrtimer_forward_now(hrtimer, op->kt_ival2); return HRTIMER_RESTART; } else { /* rearm throttle handling */ op->kt_lastmsg = 0; return HRTIMER_NORESTART; } } /* * bcm_rx_handler - handle a CAN frame reception */ static void bcm_rx_handler(struct sk_buff *skb, void *data) { struct bcm_op *op = (struct bcm_op *)data; const struct canfd_frame *rxframe = (struct canfd_frame *)skb->data; unsigned int i; unsigned char traffic_flags; if (op->can_id != rxframe->can_id) return; /* make sure to handle the correct frame type (CAN / CAN FD) */ if (op->flags & CAN_FD_FRAME) { if (!can_is_canfd_skb(skb)) return; } else { if (!can_is_can_skb(skb)) return; } /* disable timeout */ hrtimer_cancel(&op->timer); /* save rx timestamp */ op->rx_stamp = skb->tstamp; /* save originator for recvfrom() */ op->rx_ifindex = skb->dev->ifindex; /* update statistics */ op->frames_abs++; if (op->flags & RX_RTR_FRAME) { /* send reply for RTR-request (placed in op->frames[0]) */ bcm_can_tx(op); return; } /* compute flags to distinguish between own/local/remote CAN traffic */ traffic_flags = 0; if (skb->sk) { traffic_flags |= RX_LOCAL; if (skb->sk == op->sk) traffic_flags |= RX_OWN; } if (op->flags & RX_FILTER_ID) { /* the easiest case */ bcm_rx_update_and_send(op, op->last_frames, rxframe, traffic_flags); goto rx_starttimer; } if (op->nframes == 1) { /* simple compare with index 0 */ bcm_rx_cmp_to_index(op, 0, rxframe, traffic_flags); goto rx_starttimer; } if (op->nframes > 1) { /* * multiplex compare * * find the first multiplex mask that fits. * Remark: The MUX-mask is stored in index 0 - but only the * first 64 bits of the frame data[] are relevant (CAN FD) */ for (i = 1; i < op->nframes; i++) { if ((get_u64(op->frames, 0) & get_u64(rxframe, 0)) == (get_u64(op->frames, 0) & get_u64(op->frames + op->cfsiz * i, 0))) { bcm_rx_cmp_to_index(op, i, rxframe, traffic_flags); break; } } } rx_starttimer: bcm_rx_starttimer(op); } /* * helpers for bcm_op handling: find & delete bcm [rx|tx] op elements */ static struct bcm_op *bcm_find_op(struct list_head *ops, struct bcm_msg_head *mh, int ifindex) { struct bcm_op *op; list_for_each_entry(op, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) return op; } return NULL; } static void bcm_free_op_rcu(struct rcu_head *rcu_head) { struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu); if ((op->frames) && (op->frames != &op->sframe)) kfree(op->frames); if ((op->last_frames) && (op->last_frames != &op->last_sframe)) kfree(op->last_frames); kfree(op); } static void bcm_remove_op(struct bcm_op *op) { hrtimer_cancel(&op->timer); hrtimer_cancel(&op->thrtimer); call_rcu(&op->rcu, bcm_free_op_rcu); } static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op) { if (op->rx_reg_dev == dev) { can_rx_unregister(dev_net(dev), dev, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); /* mark as removed subscription */ op->rx_reg_dev = NULL; } else printk(KERN_ERR "can-bcm: bcm_rx_unreg: registered device " "mismatch %p %p\n", op->rx_reg_dev, dev); } /* * bcm_delete_rx_op - find and remove a rx op (returns number of removed ops) */ static int bcm_delete_rx_op(struct list_head *ops, struct bcm_msg_head *mh, int ifindex) { struct bcm_op *op, *n; list_for_each_entry_safe(op, n, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { /* disable automatic timer on frame reception */ op->flags |= RX_NO_AUTOTIMER; /* * Don't care if we're bound or not (due to netdev * problems) can_rx_unregister() is always a save * thing to do here. */ if (op->ifindex) { /* * Only remove subscriptions that had not * been removed due to NETDEV_UNREGISTER * in bcm_notifier() */ if (op->rx_reg_dev) { struct net_device *dev; dev = dev_get_by_index(sock_net(op->sk), op->ifindex); if (dev) { bcm_rx_unreg(dev, op); dev_put(dev); } } } else can_rx_unregister(sock_net(op->sk), NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); list_del(&op->list); bcm_remove_op(op); return 1; /* done */ } } return 0; /* not found */ } /* * bcm_delete_tx_op - find and remove a tx op (returns number of removed ops) */ static int bcm_delete_tx_op(struct list_head *ops, struct bcm_msg_head *mh, int ifindex) { struct bcm_op *op, *n; list_for_each_entry_safe(op, n, ops, list) { if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) && (op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) { list_del(&op->list); bcm_remove_op(op); return 1; /* done */ } } return 0; /* not found */ } /* * bcm_read_op - read out a bcm_op and send it to the user (for bcm_sendmsg) */ static int bcm_read_op(struct list_head *ops, struct bcm_msg_head *msg_head, int ifindex) { struct bcm_op *op = bcm_find_op(ops, msg_head, ifindex); if (!op) return -EINVAL; /* put current values into msg_head */ msg_head->flags = op->flags; msg_head->count = op->count; msg_head->ival1 = op->ival1; msg_head->ival2 = op->ival2; msg_head->nframes = op->nframes; bcm_send_to_user(op, msg_head, op->frames, 0); return MHSIZ; } /* * bcm_tx_setup - create or update a bcm tx op (for bcm_sendmsg) */ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, int ifindex, struct sock *sk) { struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; struct canfd_frame *cf; unsigned int i; int err; /* we need a real device to send frames */ if (!ifindex) return -ENODEV; /* check nframes boundaries - we need at least one CAN frame */ if (msg_head->nframes < 1 || msg_head->nframes > MAX_NFRAMES) return -EINVAL; /* check timeval limitations */ if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) return -EINVAL; /* check the given can_id */ op = bcm_find_op(&bo->tx_ops, msg_head, ifindex); if (op) { /* update existing BCM operation */ /* * Do we need more space for the CAN frames than currently * allocated? -> This is a _really_ unusual use-case and * therefore (complexity / locking) it is not supported. */ if (msg_head->nframes > op->nframes) return -E2BIG; /* update CAN frames content */ for (i = 0; i < msg_head->nframes; i++) { cf = op->frames + op->cfsiz * i; err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz); if (op->flags & CAN_FD_FRAME) { if (cf->len > 64) err = -EINVAL; } else { if (cf->len > 8) err = -EINVAL; } if (err < 0) return err; if (msg_head->flags & TX_CP_CAN_ID) { /* copy can_id into frame */ cf->can_id = msg_head->can_id; } } op->flags = msg_head->flags; } else { /* insert new BCM operation for the given can_id */ op = kzalloc(OPSIZ, GFP_KERNEL); if (!op) return -ENOMEM; op->can_id = msg_head->can_id; op->cfsiz = CFSIZ(msg_head->flags); op->flags = msg_head->flags; /* create array for CAN frames and copy the data */ if (msg_head->nframes > 1) { op->frames = kmalloc_array(msg_head->nframes, op->cfsiz, GFP_KERNEL); if (!op->frames) { kfree(op); return -ENOMEM; } } else op->frames = &op->sframe; for (i = 0; i < msg_head->nframes; i++) { cf = op->frames + op->cfsiz * i; err = memcpy_from_msg((u8 *)cf, msg, op->cfsiz); if (err < 0) goto free_op; if (op->flags & CAN_FD_FRAME) { if (cf->len > 64) err = -EINVAL; } else { if (cf->len > 8) err = -EINVAL; } if (err < 0) goto free_op; if (msg_head->flags & TX_CP_CAN_ID) { /* copy can_id into frame */ cf->can_id = msg_head->can_id; } } /* tx_ops never compare with previous received messages */ op->last_frames = NULL; /* bcm_can_tx / bcm_tx_timeout_handler needs this */ op->sk = sk; op->ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); op->timer.function = bcm_tx_timeout_handler; /* currently unused in tx_ops */ hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); /* add this bcm_op to the list of the tx_ops */ list_add(&op->list, &bo->tx_ops); } /* if ((op = bcm_find_op(&bo->tx_ops, msg_head->can_id, ifindex))) */ if (op->nframes != msg_head->nframes) { op->nframes = msg_head->nframes; /* start multiple frame transmission with index 0 */ op->currframe = 0; } /* check flags */ if (op->flags & TX_RESET_MULTI_IDX) { /* start multiple frame transmission with index 0 */ op->currframe = 0; } if (op->flags & SETTIMER) { /* set timer values */ op->count = msg_head->count; op->ival1 = msg_head->ival1; op->ival2 = msg_head->ival2; op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1); op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2); /* disable an active timer due to zero values? */ if (!op->kt_ival1 && !op->kt_ival2) hrtimer_cancel(&op->timer); } if (op->flags & STARTTIMER) { hrtimer_cancel(&op->timer); /* spec: send CAN frame when starting timer */ op->flags |= TX_ANNOUNCE; } if (op->flags & TX_ANNOUNCE) { bcm_can_tx(op); if (op->count) op->count--; } if (op->flags & STARTTIMER) bcm_tx_start_timer(op); return msg_head->nframes * op->cfsiz + MHSIZ; free_op: if (op->frames != &op->sframe) kfree(op->frames); kfree(op); return err; } /* * bcm_rx_setup - create or update a bcm rx op (for bcm_sendmsg) */ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg, int ifindex, struct sock *sk) { struct bcm_sock *bo = bcm_sk(sk); struct bcm_op *op; int do_rx_register; int err = 0; if ((msg_head->flags & RX_FILTER_ID) || (!(msg_head->nframes))) { /* be robust against wrong usage ... */ msg_head->flags |= RX_FILTER_ID; /* ignore trailing garbage */ msg_head->nframes = 0; } /* the first element contains the mux-mask => MAX_NFRAMES + 1 */ if (msg_head->nframes > MAX_NFRAMES + 1) return -EINVAL; if ((msg_head->flags & RX_RTR_FRAME) && ((msg_head->nframes != 1) || (!(msg_head->can_id & CAN_RTR_FLAG)))) return -EINVAL; /* check timeval limitations */ if ((msg_head->flags & SETTIMER) && bcm_is_invalid_tv(msg_head)) return -EINVAL; /* check the given can_id */ op = bcm_find_op(&bo->rx_ops, msg_head, ifindex); if (op) { /* update existing BCM operation */ /* * Do we need more space for the CAN frames than currently * allocated? -> This is a _really_ unusual use-case and * therefore (complexity / locking) it is not supported. */ if (msg_head->nframes > op->nframes) return -E2BIG; if (msg_head->nframes) { /* update CAN frames content */ err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz); if (err < 0) return err; /* clear last_frames to indicate 'nothing received' */ memset(op->last_frames, 0, msg_head->nframes * op->cfsiz); } op->nframes = msg_head->nframes; op->flags = msg_head->flags; /* Only an update -> do not call can_rx_register() */ do_rx_register = 0; } else { /* insert new BCM operation for the given can_id */ op = kzalloc(OPSIZ, GFP_KERNEL); if (!op) return -ENOMEM; op->can_id = msg_head->can_id; op->nframes = msg_head->nframes; op->cfsiz = CFSIZ(msg_head->flags); op->flags = msg_head->flags; if (msg_head->nframes > 1) { /* create array for CAN frames and copy the data */ op->frames = kmalloc_array(msg_head->nframes, op->cfsiz, GFP_KERNEL); if (!op->frames) { kfree(op); return -ENOMEM; } /* create and init array for received CAN frames */ op->last_frames = kcalloc(msg_head->nframes, op->cfsiz, GFP_KERNEL); if (!op->last_frames) { kfree(op->frames); kfree(op); return -ENOMEM; } } else { op->frames = &op->sframe; op->last_frames = &op->last_sframe; } if (msg_head->nframes) { err = memcpy_from_msg(op->frames, msg, msg_head->nframes * op->cfsiz); if (err < 0) { if (op->frames != &op->sframe) kfree(op->frames); if (op->last_frames != &op->last_sframe) kfree(op->last_frames); kfree(op); return err; } } /* bcm_can_tx / bcm_tx_timeout_handler needs this */ op->sk = sk; op->ifindex = ifindex; /* ifindex for timeout events w/o previous frame reception */ op->rx_ifindex = ifindex; /* initialize uninitialized (kzalloc) structure */ hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); op->timer.function = bcm_rx_timeout_handler; hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT); op->thrtimer.function = bcm_rx_thr_handler; /* add this bcm_op to the list of the rx_ops */ list_add(&op->list, &bo->rx_ops); /* call can_rx_register() */ do_rx_register = 1; } /* if ((op = bcm_find_op(&bo->rx_ops, msg_head->can_id, ifindex))) */ /* check flags */ if (op->flags & RX_RTR_FRAME) { struct canfd_frame *frame0 = op->frames; /* no timers in RTR-mode */ hrtimer_cancel(&op->thrtimer); hrtimer_cancel(&op->timer); /* * funny feature in RX(!)_SETUP only for RTR-mode: * copy can_id into frame BUT without RTR-flag to * prevent a full-load-loopback-test ... ;-] */ if ((op->flags & TX_CP_CAN_ID) || (frame0->can_id == op->can_id)) frame0->can_id = op->can_id & ~CAN_RTR_FLAG; } else { if (op->flags & SETTIMER) { /* set timer value */ op->ival1 = msg_head->ival1; op->ival2 = msg_head->ival2; op->kt_ival1 = bcm_timeval_to_ktime(msg_head->ival1); op->kt_ival2 = bcm_timeval_to_ktime(msg_head->ival2); /* disable an active timer due to zero value? */ if (!op->kt_ival1) hrtimer_cancel(&op->timer); /* * In any case cancel the throttle timer, flush * potentially blocked msgs and reset throttle handling */ op->kt_lastmsg = 0; hrtimer_cancel(&op->thrtimer); bcm_rx_thr_flush(op); } if ((op->flags & STARTTIMER) && op->kt_ival1) hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT); } /* now we can register for can_ids, if we added a new bcm_op */ if (do_rx_register) { if (ifindex) { struct net_device *dev; dev = dev_get_by_index(sock_net(sk), ifindex); if (dev) { err = can_rx_register(sock_net(sk), dev, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); op->rx_reg_dev = dev; dev_put(dev); } } else err = can_rx_register(sock_net(sk), NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op, "bcm", sk); if (err) { /* this bcm rx op is broken -> remove it */ list_del(&op->list); bcm_remove_op(op); return err; } } return msg_head->nframes * op->cfsiz + MHSIZ; } /* * bcm_tx_send - send a single CAN frame to the CAN interface (for bcm_sendmsg) */ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk, int cfsiz) { struct sk_buff *skb; struct net_device *dev; int err; /* we need a real device to send frames */ if (!ifindex) return -ENODEV; skb = alloc_skb(cfsiz + sizeof(struct can_skb_priv), GFP_KERNEL); if (!skb) return -ENOMEM; can_skb_reserve(skb); err = memcpy_from_msg(skb_put(skb, cfsiz), msg, cfsiz); if (err < 0) { kfree_skb(skb); return err; } dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) { kfree_skb(skb); return -ENODEV; } can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; skb->dev = dev; can_skb_set_owner(skb, sk); err = can_send(skb, 1); /* send with loopback */ dev_put(dev); if (err) return err; return cfsiz + MHSIZ; } /* * bcm_sendmsg - process BCM commands (opcodes) from the userspace */ static int bcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); int ifindex = bo->ifindex; /* default ifindex for this bcm_op */ struct bcm_msg_head msg_head; int cfsiz; int ret; /* read bytes or error codes as return value */ if (!bo->bound) return -ENOTCONN; /* check for valid message length from userspace */ if (size < MHSIZ) return -EINVAL; /* read message head information */ ret = memcpy_from_msg((u8 *)&msg_head, msg, MHSIZ); if (ret < 0) return ret; cfsiz = CFSIZ(msg_head.flags); if ((size - MHSIZ) % cfsiz) return -EINVAL; /* check for alternative ifindex for this bcm_op */ if (!ifindex && msg->msg_name) { /* no bound device as default => check msg_name */ DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); if (msg->msg_namelen < BCM_MIN_NAMELEN) return -EINVAL; if (addr->can_family != AF_CAN) return -EINVAL; /* ifindex from sendto() */ ifindex = addr->can_ifindex; if (ifindex) { struct net_device *dev; dev = dev_get_by_index(sock_net(sk), ifindex); if (!dev) return -ENODEV; if (dev->type != ARPHRD_CAN) { dev_put(dev); return -ENODEV; } dev_put(dev); } } lock_sock(sk); switch (msg_head.opcode) { case TX_SETUP: ret = bcm_tx_setup(&msg_head, msg, ifindex, sk); break; case RX_SETUP: ret = bcm_rx_setup(&msg_head, msg, ifindex, sk); break; case TX_DELETE: if (bcm_delete_tx_op(&bo->tx_ops, &msg_head, ifindex)) ret = MHSIZ; else ret = -EINVAL; break; case RX_DELETE: if (bcm_delete_rx_op(&bo->rx_ops, &msg_head, ifindex)) ret = MHSIZ; else ret = -EINVAL; break; case TX_READ: /* reuse msg_head for the reply to TX_READ */ msg_head.opcode = TX_STATUS; ret = bcm_read_op(&bo->tx_ops, &msg_head, ifindex); break; case RX_READ: /* reuse msg_head for the reply to RX_READ */ msg_head.opcode = RX_STATUS; ret = bcm_read_op(&bo->rx_ops, &msg_head, ifindex); break; case TX_SEND: /* we need exactly one CAN frame behind the msg head */ if ((msg_head.nframes != 1) || (size != cfsiz + MHSIZ)) ret = -EINVAL; else ret = bcm_tx_send(msg, ifindex, sk, cfsiz); break; default: ret = -EINVAL; break; } release_sock(sk); return ret; } /* * notification handler for netdevice status changes */ static void bcm_notify(struct bcm_sock *bo, unsigned long msg, struct net_device *dev) { struct sock *sk = &bo->sk; struct bcm_op *op; int notify_enodev = 0; if (!net_eq(dev_net(dev), sock_net(sk))) return; switch (msg) { case NETDEV_UNREGISTER: lock_sock(sk); /* remove device specific receive entries */ list_for_each_entry(op, &bo->rx_ops, list) if (op->rx_reg_dev == dev) bcm_rx_unreg(dev, op); /* remove device reference, if this is our bound device */ if (bo->bound && bo->ifindex == dev->ifindex) { bo->bound = 0; bo->ifindex = 0; notify_enodev = 1; } release_sock(sk); if (notify_enodev) { sk->sk_err = ENODEV; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } break; case NETDEV_DOWN: if (bo->bound && bo->ifindex == dev->ifindex) { sk->sk_err = ENETDOWN; if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } } } static int bcm_notifier(struct notifier_block *nb, unsigned long msg, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); if (dev->type != ARPHRD_CAN) return NOTIFY_DONE; if (msg != NETDEV_UNREGISTER && msg != NETDEV_DOWN) return NOTIFY_DONE; if (unlikely(bcm_busy_notifier)) /* Check for reentrant bug. */ return NOTIFY_DONE; spin_lock(&bcm_notifier_lock); list_for_each_entry(bcm_busy_notifier, &bcm_notifier_list, notifier) { spin_unlock(&bcm_notifier_lock); bcm_notify(bcm_busy_notifier, msg, dev); spin_lock(&bcm_notifier_lock); } bcm_busy_notifier = NULL; spin_unlock(&bcm_notifier_lock); return NOTIFY_DONE; } /* * initial settings for all BCM sockets to be set at socket creation time */ static int bcm_init(struct sock *sk) { struct bcm_sock *bo = bcm_sk(sk); bo->bound = 0; bo->ifindex = 0; bo->dropped_usr_msgs = 0; bo->bcm_proc_read = NULL; INIT_LIST_HEAD(&bo->tx_ops); INIT_LIST_HEAD(&bo->rx_ops); /* set notifier */ spin_lock(&bcm_notifier_lock); list_add_tail(&bo->notifier, &bcm_notifier_list); spin_unlock(&bcm_notifier_lock); return 0; } /* * standard socket functions */ static int bcm_release(struct socket *sock) { struct sock *sk = sock->sk; struct net *net; struct bcm_sock *bo; struct bcm_op *op, *next; if (!sk) return 0; net = sock_net(sk); bo = bcm_sk(sk); /* remove bcm_ops, timer, rx_unregister(), etc. */ spin_lock(&bcm_notifier_lock); while (bcm_busy_notifier == bo) { spin_unlock(&bcm_notifier_lock); schedule_timeout_uninterruptible(1); spin_lock(&bcm_notifier_lock); } list_del(&bo->notifier); spin_unlock(&bcm_notifier_lock); lock_sock(sk); #if IS_ENABLED(CONFIG_PROC_FS) /* remove procfs entry */ if (net->can.bcmproc_dir && bo->bcm_proc_read) remove_proc_entry(bo->procname, net->can.bcmproc_dir); #endif /* CONFIG_PROC_FS */ list_for_each_entry_safe(op, next, &bo->tx_ops, list) bcm_remove_op(op); list_for_each_entry_safe(op, next, &bo->rx_ops, list) { /* * Don't care if we're bound or not (due to netdev problems) * can_rx_unregister() is always a save thing to do here. */ if (op->ifindex) { /* * Only remove subscriptions that had not * been removed due to NETDEV_UNREGISTER * in bcm_notifier() */ if (op->rx_reg_dev) { struct net_device *dev; dev = dev_get_by_index(net, op->ifindex); if (dev) { bcm_rx_unreg(dev, op); dev_put(dev); } } } else can_rx_unregister(net, NULL, op->can_id, REGMASK(op->can_id), bcm_rx_handler, op); } synchronize_rcu(); list_for_each_entry_safe(op, next, &bo->rx_ops, list) bcm_remove_op(op); /* remove device reference */ if (bo->bound) { bo->bound = 0; bo->ifindex = 0; } sock_orphan(sk); sock->sk = NULL; release_sock(sk); sock_put(sk); return 0; } static int bcm_connect(struct socket *sock, struct sockaddr *uaddr, int len, int flags) { struct sockaddr_can *addr = (struct sockaddr_can *)uaddr; struct sock *sk = sock->sk; struct bcm_sock *bo = bcm_sk(sk); struct net *net = sock_net(sk); int ret = 0; if (len < BCM_MIN_NAMELEN) return -EINVAL; lock_sock(sk); if (bo->bound) { ret = -EISCONN; goto fail; } /* bind a device to this socket */ if (addr->can_ifindex) { struct net_device *dev; dev = dev_get_by_index(net, addr->can_ifindex); if (!dev) { ret = -ENODEV; goto fail; } if (dev->type != ARPHRD_CAN) { dev_put(dev); ret = -ENODEV; goto fail; } bo->ifindex = dev->ifindex; dev_put(dev); } else { /* no interface reference for ifindex = 0 ('any' CAN device) */ bo->ifindex = 0; } #if IS_ENABLED(CONFIG_PROC_FS) if (net->can.bcmproc_dir) { /* unique socket address as filename */ sprintf(bo->procname, "%lu", sock_i_ino(sk)); bo->bcm_proc_read = proc_create_net_single(bo->procname, 0644, net->can.bcmproc_dir, bcm_proc_show, sk); if (!bo->bcm_proc_read) { ret = -ENOMEM; goto fail; } } #endif /* CONFIG_PROC_FS */ bo->bound = 1; fail: release_sock(sk); return ret; } static int bcm_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; int error = 0; int err; skb = skb_recv_datagram(sk, flags, &error); if (!skb) return error; if (skb->len < size) size = skb->len; err = memcpy_to_msg(msg, skb->data, size); if (err < 0) { skb_free_datagram(sk, skb); return err; } sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name) { __sockaddr_check_size(BCM_MIN_NAMELEN); msg->msg_namelen = BCM_MIN_NAMELEN; memcpy(msg->msg_name, skb->cb, msg->msg_namelen); } /* assign the flags that have been recorded in bcm_send_to_user() */ msg->msg_flags |= *(bcm_flags(skb)); skb_free_datagram(sk, skb); return size; } static int bcm_sock_no_ioctlcmd(struct socket *sock, unsigned int cmd, unsigned long arg) { /* no ioctls for socket layer -> hand it down to NIC layer */ return -ENOIOCTLCMD; } static const struct proto_ops bcm_ops = { .family = PF_CAN, .release = bcm_release, .bind = sock_no_bind, .connect = bcm_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = sock_no_getname, .poll = datagram_poll, .ioctl = bcm_sock_no_ioctlcmd, .gettstamp = sock_gettstamp, .listen = sock_no_listen, .shutdown = sock_no_shutdown, .sendmsg = bcm_sendmsg, .recvmsg = bcm_recvmsg, .mmap = sock_no_mmap, }; static struct proto bcm_proto __read_mostly = { .name = "CAN_BCM", .owner = THIS_MODULE, .obj_size = sizeof(struct bcm_sock), .init = bcm_init, }; static const struct can_proto bcm_can_proto = { .type = SOCK_DGRAM, .protocol = CAN_BCM, .ops = &bcm_ops, .prot = &bcm_proto, }; static int canbcm_pernet_init(struct net *net) { #if IS_ENABLED(CONFIG_PROC_FS) /* create /proc/net/can-bcm directory */ net->can.bcmproc_dir = proc_net_mkdir(net, "can-bcm", net->proc_net); #endif /* CONFIG_PROC_FS */ return 0; } static void canbcm_pernet_exit(struct net *net) { #if IS_ENABLED(CONFIG_PROC_FS) /* remove /proc/net/can-bcm directory */ if (net->can.bcmproc_dir) remove_proc_entry("can-bcm", net->proc_net); #endif /* CONFIG_PROC_FS */ } static struct pernet_operations canbcm_pernet_ops __read_mostly = { .init = canbcm_pernet_init, .exit = canbcm_pernet_exit, }; static struct notifier_block canbcm_notifier = { .notifier_call = bcm_notifier }; static int __init bcm_module_init(void) { int err; pr_info("can: broadcast manager protocol\n"); err = register_pernet_subsys(&canbcm_pernet_ops); if (err) return err; err = register_netdevice_notifier(&canbcm_notifier); if (err) goto register_notifier_failed; err = can_proto_register(&bcm_can_proto); if (err < 0) { printk(KERN_ERR "can: registration of bcm protocol failed\n"); goto register_proto_failed; } return 0; register_proto_failed: unregister_netdevice_notifier(&canbcm_notifier); register_notifier_failed: unregister_pernet_subsys(&canbcm_pernet_ops); return err; } static void __exit bcm_module_exit(void) { can_proto_unregister(&bcm_can_proto); unregister_netdevice_notifier(&canbcm_notifier); unregister_pernet_subsys(&canbcm_pernet_ops); } module_init(bcm_module_init); module_exit(bcm_module_exit); |
| 18 1 163 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IPC_NAMESPACE_H__ #define __IPC_NAMESPACE_H__ #include <linux/err.h> #include <linux/idr.h> #include <linux/rwsem.h> #include <linux/notifier.h> #include <linux/nsproxy.h> #include <linux/ns_common.h> #include <linux/refcount.h> #include <linux/rhashtable-types.h> #include <linux/sysctl.h> #include <linux/percpu_counter.h> struct user_namespace; struct ipc_ids { int in_use; unsigned short seq; struct rw_semaphore rwsem; struct idr ipcs_idr; int max_idx; int last_idx; /* For wrap around detection */ #ifdef CONFIG_CHECKPOINT_RESTORE int next_id; #endif struct rhashtable key_ht; }; struct ipc_namespace { struct ipc_ids ids[3]; int sem_ctls[4]; int used_sems; unsigned int msg_ctlmax; unsigned int msg_ctlmnb; unsigned int msg_ctlmni; struct percpu_counter percpu_msg_bytes; struct percpu_counter percpu_msg_hdrs; size_t shm_ctlmax; size_t shm_ctlall; unsigned long shm_tot; int shm_ctlmni; /* * Defines whether IPC_RMID is forced for _all_ shm segments regardless * of shmctl() */ int shm_rmid_forced; struct notifier_block ipcns_nb; /* The kern_mount of the mqueuefs sb. We take a ref on it */ struct vfsmount *mq_mnt; /* # queues in this ns, protected by mq_lock */ unsigned int mq_queues_count; /* next fields are set through sysctl */ unsigned int mq_queues_max; /* initialized to DFLT_QUEUESMAX */ unsigned int mq_msg_max; /* initialized to DFLT_MSGMAX */ unsigned int mq_msgsize_max; /* initialized to DFLT_MSGSIZEMAX */ unsigned int mq_msg_default; unsigned int mq_msgsize_default; struct ctl_table_set mq_set; struct ctl_table_header *mq_sysctls; struct ctl_table_set ipc_set; struct ctl_table_header *ipc_sysctls; /* user_ns which owns the ipc ns */ struct user_namespace *user_ns; struct ucounts *ucounts; struct llist_node mnt_llist; struct ns_common ns; } __randomize_layout; extern struct ipc_namespace init_ipc_ns; extern spinlock_t mq_lock; #ifdef CONFIG_SYSVIPC extern void shm_destroy_orphaned(struct ipc_namespace *ns); #else /* CONFIG_SYSVIPC */ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {} #endif /* CONFIG_SYSVIPC */ #ifdef CONFIG_POSIX_MQUEUE extern int mq_init_ns(struct ipc_namespace *ns); /* * POSIX Message Queue default values: * * MIN_*: Lowest value an admin can set the maximum unprivileged limit to * DFLT_*MAX: Default values for the maximum unprivileged limits * DFLT_{MSG,MSGSIZE}: Default values used when the user doesn't supply * an attribute to the open call and the queue must be created * HARD_*: Highest value the maximums can be set to. These are enforced * on CAP_SYS_RESOURCE apps as well making them inviolate (so make them * suitably high) * * POSIX Requirements: * Per app minimum openable message queues - 8. This does not map well * to the fact that we limit the number of queues on a per namespace * basis instead of a per app basis. So, make the default high enough * that no given app should have a hard time opening 8 queues. * Minimum maximum for HARD_MSGMAX - 32767. I bumped this to 65536. * Minimum maximum for HARD_MSGSIZEMAX - POSIX is silent on this. However, * we have run into a situation where running applications in the wild * require this to be at least 5MB, and preferably 10MB, so I set the * value to 16MB in hopes that this user is the worst of the bunch and * the new maximum will handle anyone else. I may have to revisit this * in the future. */ #define DFLT_QUEUESMAX 256 #define MIN_MSGMAX 1 #define DFLT_MSG 10U #define DFLT_MSGMAX 10 #define HARD_MSGMAX 65536 #define MIN_MSGSIZEMAX 128 #define DFLT_MSGSIZE 8192U #define DFLT_MSGSIZEMAX 8192 #define HARD_MSGSIZEMAX (16*1024*1024) #else static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; } #endif #if defined(CONFIG_IPC_NS) extern struct ipc_namespace *copy_ipcs(unsigned long flags, struct user_namespace *user_ns, struct ipc_namespace *ns); static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { if (ns) refcount_inc(&ns->ns.count); return ns; } static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) { if (ns) { if (refcount_inc_not_zero(&ns->ns.count)) return ns; } return NULL; } extern void put_ipc_ns(struct ipc_namespace *ns); #else static inline struct ipc_namespace *copy_ipcs(unsigned long flags, struct user_namespace *user_ns, struct ipc_namespace *ns) { if (flags & CLONE_NEWIPC) return ERR_PTR(-EINVAL); return ns; } static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns) { return ns; } static inline struct ipc_namespace *get_ipc_ns_not_zero(struct ipc_namespace *ns) { return ns; } static inline void put_ipc_ns(struct ipc_namespace *ns) { } #endif #ifdef CONFIG_POSIX_MQUEUE_SYSCTL void retire_mq_sysctls(struct ipc_namespace *ns); bool setup_mq_sysctls(struct ipc_namespace *ns); #else /* CONFIG_POSIX_MQUEUE_SYSCTL */ static inline void retire_mq_sysctls(struct ipc_namespace *ns) { } static inline bool setup_mq_sysctls(struct ipc_namespace *ns) { return true; } #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ #ifdef CONFIG_SYSVIPC_SYSCTL bool setup_ipc_sysctls(struct ipc_namespace *ns); void retire_ipc_sysctls(struct ipc_namespace *ns); #else /* CONFIG_SYSVIPC_SYSCTL */ static inline void retire_ipc_sysctls(struct ipc_namespace *ns) { } static inline bool setup_ipc_sysctls(struct ipc_namespace *ns) { return true; } #endif /* CONFIG_SYSVIPC_SYSCTL */ #endif |
| 116 116 116 31 128 127 2 3 11 119 128 22 22 127 128 85 46 9 35 1 117 115 46 44 3 2 1 103 103 102 1 83 109 64 1 116 91 12 64 28 2 28 100 1 96 76 95 14 14 14 14 10 14 7 7 7 7 14 14 14 14 15 12 1 104 102 103 104 18 95 1 9 10 100 96 85 17 95 94 2 96 2 96 84 14 96 104 96 48 51 94 13 1 94 1 14 47 9 9 9 44 1 47 2 32 19 8 6 6 6 6 14 5 10 1 10 15 15 10 5 2 2 2 2 2 2 13 12 19 7 9 13 2 6 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 4 4 4 4 3 1 4 4 18 18 1 16 2 17 6 13 5 5 3 5 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2005 * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ /* * jfs_txnmgr.c: transaction manager * * notes: * transaction starts with txBegin() and ends with txCommit() * or txAbort(). * * tlock is acquired at the time of update; * (obviate scan at commit time for xtree and dtree) * tlock and mp points to each other; * (no hashlist for mp -> tlock). * * special cases: * tlock on in-memory inode: * in-place tlock in the in-memory inode itself; * converted to page lock by iWrite() at commit time. * * tlock during write()/mmap() under anonymous transaction (tid = 0): * transferred (?) to transaction at commit time. * * use the page itself to update allocation maps * (obviate intermediate replication of allocation/deallocation data) * hold on to mp+lock thru update of maps */ #include <linux/fs.h> #include <linux/vmalloc.h> #include <linux/completion.h> #include <linux/freezer.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/kthread.h> #include <linux/seq_file.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_filsys.h" #include "jfs_metapage.h" #include "jfs_dinode.h" #include "jfs_imap.h" #include "jfs_dmap.h" #include "jfs_superblock.h" #include "jfs_debug.h" /* * transaction management structures */ static struct { int freetid; /* index of a free tid structure */ int freelock; /* index first free lock word */ wait_queue_head_t freewait; /* eventlist of free tblock */ wait_queue_head_t freelockwait; /* eventlist of free tlock */ wait_queue_head_t lowlockwait; /* eventlist of ample tlocks */ int tlocksInUse; /* Number of tlocks in use */ spinlock_t LazyLock; /* synchronize sync_queue & unlock_queue */ /* struct tblock *sync_queue; * Transactions waiting for data sync */ struct list_head unlock_queue; /* Txns waiting to be released */ struct list_head anon_list; /* inodes having anonymous txns */ struct list_head anon_list2; /* inodes having anonymous txns that couldn't be sync'ed */ } TxAnchor; int jfs_tlocks_low; /* Indicates low number of available tlocks */ #ifdef CONFIG_JFS_STATISTICS static struct { uint txBegin; uint txBegin_barrier; uint txBegin_lockslow; uint txBegin_freetid; uint txBeginAnon; uint txBeginAnon_barrier; uint txBeginAnon_lockslow; uint txLockAlloc; uint txLockAlloc_freelock; } TxStat; #endif static int nTxBlock = -1; /* number of transaction blocks */ module_param(nTxBlock, int, 0); MODULE_PARM_DESC(nTxBlock, "Number of transaction blocks (max:65536)"); static int nTxLock = -1; /* number of transaction locks */ module_param(nTxLock, int, 0); MODULE_PARM_DESC(nTxLock, "Number of transaction locks (max:65536)"); struct tblock *TxBlock; /* transaction block table */ static int TxLockLWM; /* Low water mark for number of txLocks used */ static int TxLockHWM; /* High water mark for number of txLocks used */ static int TxLockVHWM; /* Very High water mark */ struct tlock *TxLock; /* transaction lock table */ /* * transaction management lock */ static DEFINE_SPINLOCK(jfsTxnLock); #define TXN_LOCK() spin_lock(&jfsTxnLock) #define TXN_UNLOCK() spin_unlock(&jfsTxnLock) #define LAZY_LOCK_INIT() spin_lock_init(&TxAnchor.LazyLock) #define LAZY_LOCK(flags) spin_lock_irqsave(&TxAnchor.LazyLock, flags) #define LAZY_UNLOCK(flags) spin_unlock_irqrestore(&TxAnchor.LazyLock, flags) static DECLARE_WAIT_QUEUE_HEAD(jfs_commit_thread_wait); static int jfs_commit_thread_waking; /* * Retry logic exist outside these macros to protect from spurrious wakeups. */ static inline void TXN_SLEEP_DROP_LOCK(wait_queue_head_t * event) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(event, &wait); set_current_state(TASK_UNINTERRUPTIBLE); TXN_UNLOCK(); io_schedule(); remove_wait_queue(event, &wait); } #define TXN_SLEEP(event)\ {\ TXN_SLEEP_DROP_LOCK(event);\ TXN_LOCK();\ } #define TXN_WAKEUP(event) wake_up_all(event) /* * statistics */ static struct { tid_t maxtid; /* 4: biggest tid ever used */ lid_t maxlid; /* 4: biggest lid ever used */ int ntid; /* 4: # of transactions performed */ int nlid; /* 4: # of tlocks acquired */ int waitlock; /* 4: # of tlock wait */ } stattx; /* * forward references */ static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck, struct commit *cd); static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck); static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck); static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck); static void txAllocPMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk); static void txForce(struct tblock * tblk); static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd); static void txUpdateMap(struct tblock * tblk); static void txRelease(struct tblock * tblk); static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck); static void LogSyncRelease(struct metapage * mp); /* * transaction block/lock management * --------------------------------- */ /* * Get a transaction lock from the free list. If the number in use is * greater than the high water mark, wake up the sync daemon. This should * free some anonymous transaction locks. (TXN_LOCK must be held.) */ static lid_t txLockAlloc(void) { lid_t lid; INCREMENT(TxStat.txLockAlloc); if (!TxAnchor.freelock) { INCREMENT(TxStat.txLockAlloc_freelock); } while (!(lid = TxAnchor.freelock)) TXN_SLEEP(&TxAnchor.freelockwait); TxAnchor.freelock = TxLock[lid].next; HIGHWATERMARK(stattx.maxlid, lid); if ((++TxAnchor.tlocksInUse > TxLockHWM) && (jfs_tlocks_low == 0)) { jfs_info("txLockAlloc tlocks low"); jfs_tlocks_low = 1; wake_up_process(jfsSyncThread); } return lid; } static void txLockFree(lid_t lid) { TxLock[lid].tid = 0; TxLock[lid].next = TxAnchor.freelock; TxAnchor.freelock = lid; TxAnchor.tlocksInUse--; if (jfs_tlocks_low && (TxAnchor.tlocksInUse < TxLockLWM)) { jfs_info("txLockFree jfs_tlocks_low no more"); jfs_tlocks_low = 0; TXN_WAKEUP(&TxAnchor.lowlockwait); } TXN_WAKEUP(&TxAnchor.freelockwait); } /* * NAME: txInit() * * FUNCTION: initialize transaction management structures * * RETURN: * * serialization: single thread at jfs_init() */ int txInit(void) { int k, size; struct sysinfo si; /* Set defaults for nTxLock and nTxBlock if unset */ if (nTxLock == -1) { if (nTxBlock == -1) { /* Base default on memory size */ si_meminfo(&si); if (si.totalram > (256 * 1024)) /* 1 GB */ nTxLock = 64 * 1024; else nTxLock = si.totalram >> 2; } else if (nTxBlock > (8 * 1024)) nTxLock = 64 * 1024; else nTxLock = nTxBlock << 3; } if (nTxBlock == -1) nTxBlock = nTxLock >> 3; /* Verify tunable parameters */ if (nTxBlock < 16) nTxBlock = 16; /* No one should set it this low */ if (nTxBlock > 65536) nTxBlock = 65536; if (nTxLock < 256) nTxLock = 256; /* No one should set it this low */ if (nTxLock > 65536) nTxLock = 65536; printk(KERN_INFO "JFS: nTxBlock = %d, nTxLock = %d\n", nTxBlock, nTxLock); /* * initialize transaction block (tblock) table * * transaction id (tid) = tblock index * tid = 0 is reserved. */ TxLockLWM = (nTxLock * 4) / 10; TxLockHWM = (nTxLock * 7) / 10; TxLockVHWM = (nTxLock * 8) / 10; size = sizeof(struct tblock) * nTxBlock; TxBlock = vmalloc(size); if (TxBlock == NULL) return -ENOMEM; for (k = 1; k < nTxBlock - 1; k++) { TxBlock[k].next = k + 1; init_waitqueue_head(&TxBlock[k].gcwait); init_waitqueue_head(&TxBlock[k].waitor); } TxBlock[k].next = 0; init_waitqueue_head(&TxBlock[k].gcwait); init_waitqueue_head(&TxBlock[k].waitor); TxAnchor.freetid = 1; init_waitqueue_head(&TxAnchor.freewait); stattx.maxtid = 1; /* statistics */ /* * initialize transaction lock (tlock) table * * transaction lock id = tlock index * tlock id = 0 is reserved. */ size = sizeof(struct tlock) * nTxLock; TxLock = vmalloc(size); if (TxLock == NULL) { vfree(TxBlock); return -ENOMEM; } /* initialize tlock table */ for (k = 1; k < nTxLock - 1; k++) TxLock[k].next = k + 1; TxLock[k].next = 0; init_waitqueue_head(&TxAnchor.freelockwait); init_waitqueue_head(&TxAnchor.lowlockwait); TxAnchor.freelock = 1; TxAnchor.tlocksInUse = 0; INIT_LIST_HEAD(&TxAnchor.anon_list); INIT_LIST_HEAD(&TxAnchor.anon_list2); LAZY_LOCK_INIT(); INIT_LIST_HEAD(&TxAnchor.unlock_queue); stattx.maxlid = 1; /* statistics */ return 0; } /* * NAME: txExit() * * FUNCTION: clean up when module is unloaded */ void txExit(void) { vfree(TxLock); TxLock = NULL; vfree(TxBlock); TxBlock = NULL; } /* * NAME: txBegin() * * FUNCTION: start a transaction. * * PARAMETER: sb - superblock * flag - force for nested tx; * * RETURN: tid - transaction id * * note: flag force allows to start tx for nested tx * to prevent deadlock on logsync barrier; */ tid_t txBegin(struct super_block *sb, int flag) { tid_t t; struct tblock *tblk; struct jfs_log *log; jfs_info("txBegin: flag = 0x%x", flag); log = JFS_SBI(sb)->log; if (!log) { jfs_error(sb, "read-only filesystem\n"); return 0; } TXN_LOCK(); INCREMENT(TxStat.txBegin); retry: if (!(flag & COMMIT_FORCE)) { /* * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag) || test_bit(log_QUIESCE, &log->flag)) { INCREMENT(TxStat.txBegin_barrier); TXN_SLEEP(&log->syncwait); goto retry; } } if (flag == 0) { /* * Don't begin transaction if we're getting starved for tlocks * unless COMMIT_FORCE or COMMIT_INODE (which may ultimately * free tlocks) */ if (TxAnchor.tlocksInUse > TxLockVHWM) { INCREMENT(TxStat.txBegin_lockslow); TXN_SLEEP(&TxAnchor.lowlockwait); goto retry; } } /* * allocate transaction id/block */ if ((t = TxAnchor.freetid) == 0) { jfs_info("txBegin: waiting for free tid"); INCREMENT(TxStat.txBegin_freetid); TXN_SLEEP(&TxAnchor.freewait); goto retry; } tblk = tid_to_tblock(t); if ((tblk->next == 0) && !(flag & COMMIT_FORCE)) { /* Don't let a non-forced transaction take the last tblk */ jfs_info("txBegin: waiting for free tid"); INCREMENT(TxStat.txBegin_freetid); TXN_SLEEP(&TxAnchor.freewait); goto retry; } TxAnchor.freetid = tblk->next; /* * initialize transaction */ /* * We can't zero the whole thing or we screw up another thread being * awakened after sleeping on tblk->waitor * * memset(tblk, 0, sizeof(struct tblock)); */ tblk->next = tblk->last = tblk->xflag = tblk->flag = tblk->lsn = 0; tblk->sb = sb; ++log->logtid; tblk->logtid = log->logtid; ++log->active; HIGHWATERMARK(stattx.maxtid, t); /* statistics */ INCREMENT(stattx.ntid); /* statistics */ TXN_UNLOCK(); jfs_info("txBegin: returning tid = %d", t); return t; } /* * NAME: txBeginAnon() * * FUNCTION: start an anonymous transaction. * Blocks if logsync or available tlocks are low to prevent * anonymous tlocks from depleting supply. * * PARAMETER: sb - superblock * * RETURN: none */ void txBeginAnon(struct super_block *sb) { struct jfs_log *log; log = JFS_SBI(sb)->log; TXN_LOCK(); INCREMENT(TxStat.txBeginAnon); retry: /* * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag) || test_bit(log_QUIESCE, &log->flag)) { INCREMENT(TxStat.txBeginAnon_barrier); TXN_SLEEP(&log->syncwait); goto retry; } /* * Don't begin transaction if we're getting starved for tlocks */ if (TxAnchor.tlocksInUse > TxLockVHWM) { INCREMENT(TxStat.txBeginAnon_lockslow); TXN_SLEEP(&TxAnchor.lowlockwait); goto retry; } TXN_UNLOCK(); } /* * txEnd() * * function: free specified transaction block. * * logsync barrier processing: * * serialization: */ void txEnd(tid_t tid) { struct tblock *tblk = tid_to_tblock(tid); struct jfs_log *log; jfs_info("txEnd: tid = %d", tid); TXN_LOCK(); /* * wakeup transactions waiting on the page locked * by the current transaction */ TXN_WAKEUP(&tblk->waitor); log = JFS_SBI(tblk->sb)->log; /* * Lazy commit thread can't free this guy until we mark it UNLOCKED, * otherwise, we would be left with a transaction that may have been * reused. * * Lazy commit thread will turn off tblkGC_LAZY before calling this * routine. */ if (tblk->flag & tblkGC_LAZY) { jfs_info("txEnd called w/lazy tid: %d, tblk = 0x%p", tid, tblk); TXN_UNLOCK(); spin_lock_irq(&log->gclock); // LOGGC_LOCK tblk->flag |= tblkGC_UNLOCKED; spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK return; } jfs_info("txEnd: tid: %d, tblk = 0x%p", tid, tblk); assert(tblk->next == 0); /* * insert tblock back on freelist */ tblk->next = TxAnchor.freetid; TxAnchor.freetid = tid; /* * mark the tblock not active */ if (--log->active == 0) { clear_bit(log_FLUSH, &log->flag); /* * synchronize with logsync barrier */ if (test_bit(log_SYNCBARRIER, &log->flag)) { TXN_UNLOCK(); /* write dirty metadata & forward log syncpt */ jfs_syncpt(log, 1); jfs_info("log barrier off: 0x%x", log->lsn); /* enable new transactions start */ clear_bit(log_SYNCBARRIER, &log->flag); /* wakeup all waitors for logsync barrier */ TXN_WAKEUP(&log->syncwait); goto wakeup; } } TXN_UNLOCK(); wakeup: /* * wakeup all waitors for a free tblock */ TXN_WAKEUP(&TxAnchor.freewait); } /* * txLock() * * function: acquire a transaction lock on the specified <mp> * * parameter: * * return: transaction lock id * * serialization: */ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp, int type) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); int dir_xtree = 0; lid_t lid; tid_t xtid; struct tlock *tlck; struct xtlock *xtlck; struct linelock *linelock; xtpage_t *p; struct tblock *tblk; TXN_LOCK(); if (S_ISDIR(ip->i_mode) && (type & tlckXTREE) && !(mp->xflag & COMMIT_PAGE)) { /* * Directory inode is special. It can have both an xtree tlock * and a dtree tlock associated with it. */ dir_xtree = 1; lid = jfs_ip->xtlid; } else lid = mp->lid; /* is page not locked by a transaction ? */ if (lid == 0) goto allocateLock; jfs_info("txLock: tid:%d ip:0x%p mp:0x%p lid:%d", tid, ip, mp, lid); /* is page locked by the requester transaction ? */ tlck = lid_to_tlock(lid); if ((xtid = tlck->tid) == tid) { TXN_UNLOCK(); goto grantLock; } /* * is page locked by anonymous transaction/lock ? * * (page update without transaction (i.e., file write) is * locked under anonymous transaction tid = 0: * anonymous tlocks maintained on anonymous tlock list of * the inode of the page and available to all anonymous * transactions until txCommit() time at which point * they are transferred to the transaction tlock list of * the committing transaction of the inode) */ if (xtid == 0) { tlck->tid = tid; TXN_UNLOCK(); tblk = tid_to_tblock(tid); /* * The order of the tlocks in the transaction is important * (during truncate, child xtree pages must be freed before * parent's tlocks change the working map). * Take tlock off anonymous list and add to tail of * transaction list * * Note: We really need to get rid of the tid & lid and * use list_head's. This code is getting UGLY! */ if (jfs_ip->atlhead == lid) { if (jfs_ip->atltail == lid) { /* only anonymous txn. * Remove from anon_list */ TXN_LOCK(); list_del_init(&jfs_ip->anon_inode_list); TXN_UNLOCK(); } jfs_ip->atlhead = tlck->next; } else { lid_t last; for (last = jfs_ip->atlhead; lid_to_tlock(last)->next != lid; last = lid_to_tlock(last)->next) { assert(last); } lid_to_tlock(last)->next = tlck->next; if (jfs_ip->atltail == lid) jfs_ip->atltail = last; } /* insert the tlock at tail of transaction tlock list */ if (tblk->next) lid_to_tlock(tblk->last)->next = lid; else tblk->next = lid; tlck->next = 0; tblk->last = lid; goto grantLock; } goto waitLock; /* * allocate a tlock */ allocateLock: lid = txLockAlloc(); tlck = lid_to_tlock(lid); /* * initialize tlock */ tlck->tid = tid; TXN_UNLOCK(); /* mark tlock for meta-data page */ if (mp->xflag & COMMIT_PAGE) { tlck->flag = tlckPAGELOCK; /* mark the page dirty and nohomeok */ metapage_nohomeok(mp); jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p", mp, mp->nohomeok, tid, tlck); /* if anonymous transaction, and buffer is on the group * commit synclist, mark inode to show this. This will * prevent the buffer from being marked nohomeok for too * long a time. */ if ((tid == 0) && mp->lsn) set_cflag(COMMIT_Synclist, ip); } /* mark tlock for in-memory inode */ else tlck->flag = tlckINODELOCK; if (S_ISDIR(ip->i_mode)) tlck->flag |= tlckDIRECTORY; tlck->type = 0; /* bind the tlock and the page */ tlck->ip = ip; tlck->mp = mp; if (dir_xtree) jfs_ip->xtlid = lid; else mp->lid = lid; /* * enqueue transaction lock to transaction/inode */ /* insert the tlock at tail of transaction tlock list */ if (tid) { tblk = tid_to_tblock(tid); if (tblk->next) lid_to_tlock(tblk->last)->next = lid; else tblk->next = lid; tlck->next = 0; tblk->last = lid; } /* anonymous transaction: * insert the tlock at head of inode anonymous tlock list */ else { tlck->next = jfs_ip->atlhead; jfs_ip->atlhead = lid; if (tlck->next == 0) { /* This inode's first anonymous transaction */ jfs_ip->atltail = lid; TXN_LOCK(); list_add_tail(&jfs_ip->anon_inode_list, &TxAnchor.anon_list); TXN_UNLOCK(); } } /* initialize type dependent area for linelock */ linelock = (struct linelock *) & tlck->lock; linelock->next = 0; linelock->flag = tlckLINELOCK; linelock->maxcnt = TLOCKSHORT; linelock->index = 0; switch (type & tlckTYPE) { case tlckDTREE: linelock->l2linesize = L2DTSLOTSIZE; break; case tlckXTREE: linelock->l2linesize = L2XTSLOTSIZE; xtlck = (struct xtlock *) linelock; xtlck->header.offset = 0; xtlck->header.length = 2; if (type & tlckNEW) { xtlck->lwm.offset = XTENTRYSTART; } else { if (mp->xflag & COMMIT_PAGE) p = (xtpage_t *) mp->data; else p = (xtpage_t *) &jfs_ip->i_xtroot; xtlck->lwm.offset = le16_to_cpu(p->header.nextindex); } xtlck->lwm.length = 0; /* ! */ xtlck->twm.offset = 0; xtlck->hwm.offset = 0; xtlck->index = 2; break; case tlckINODE: linelock->l2linesize = L2INODESLOTSIZE; break; case tlckDATA: linelock->l2linesize = L2DATASLOTSIZE; break; default: jfs_err("UFO tlock:0x%p", tlck); } /* * update tlock vector */ grantLock: tlck->type |= type; return tlck; /* * page is being locked by another transaction: */ waitLock: /* Only locks on ipimap or ipaimap should reach here */ /* assert(jfs_ip->fileset == AGGREGATE_I); */ if (jfs_ip->fileset != AGGREGATE_I) { printk(KERN_ERR "txLock: trying to lock locked page!"); print_hex_dump(KERN_ERR, "ip: ", DUMP_PREFIX_ADDRESS, 16, 4, ip, sizeof(*ip), 0); print_hex_dump(KERN_ERR, "mp: ", DUMP_PREFIX_ADDRESS, 16, 4, mp, sizeof(*mp), 0); print_hex_dump(KERN_ERR, "Locker's tblock: ", DUMP_PREFIX_ADDRESS, 16, 4, tid_to_tblock(tid), sizeof(struct tblock), 0); print_hex_dump(KERN_ERR, "Tlock: ", DUMP_PREFIX_ADDRESS, 16, 4, tlck, sizeof(*tlck), 0); BUG(); } INCREMENT(stattx.waitlock); /* statistics */ TXN_UNLOCK(); release_metapage(mp); TXN_LOCK(); xtid = tlck->tid; /* reacquire after dropping TXN_LOCK */ jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d", tid, xtid, lid); /* Recheck everything since dropping TXN_LOCK */ if (xtid && (tlck->mp == mp) && (mp->lid == lid)) TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor); else TXN_UNLOCK(); jfs_info("txLock: awakened tid = %d, lid = %d", tid, lid); return NULL; } /* * NAME: txRelease() * * FUNCTION: Release buffers associated with transaction locks, but don't * mark homeok yet. The allows other transactions to modify * buffers, but won't let them go to disk until commit record * actually gets written. * * PARAMETER: * tblk - * * RETURN: Errors from subroutines. */ static void txRelease(struct tblock * tblk) { struct metapage *mp; lid_t lid; struct tlock *tlck; TXN_LOCK(); for (lid = tblk->next; lid; lid = tlck->next) { tlck = lid_to_tlock(lid); if ((mp = tlck->mp) != NULL && (tlck->type & tlckBTROOT) == 0) { assert(mp->xflag & COMMIT_PAGE); mp->lid = 0; } } /* * wakeup transactions waiting on a page locked * by the current transaction */ TXN_WAKEUP(&tblk->waitor); TXN_UNLOCK(); } /* * NAME: txUnlock() * * FUNCTION: Initiates pageout of pages modified by tid in journalled * objects and frees their lockwords. */ static void txUnlock(struct tblock * tblk) { struct tlock *tlck; struct linelock *linelock; lid_t lid, next, llid, k; struct metapage *mp; struct jfs_log *log; int difft, diffp; unsigned long flags; jfs_info("txUnlock: tblk = 0x%p", tblk); log = JFS_SBI(tblk->sb)->log; /* * mark page under tlock homeok (its log has been written): */ for (lid = tblk->next; lid; lid = next) { tlck = lid_to_tlock(lid); next = tlck->next; jfs_info("unlocking lid = %d, tlck = 0x%p", lid, tlck); /* unbind page from tlock */ if ((mp = tlck->mp) != NULL && (tlck->type & tlckBTROOT) == 0) { assert(mp->xflag & COMMIT_PAGE); /* hold buffer */ hold_metapage(mp); assert(mp->nohomeok > 0); _metapage_homeok(mp); /* inherit younger/larger clsn */ LOGSYNC_LOCK(log, flags); if (mp->clsn) { logdiff(difft, tblk->clsn, log); logdiff(diffp, mp->clsn, log); if (difft > diffp) mp->clsn = tblk->clsn; } else mp->clsn = tblk->clsn; LOGSYNC_UNLOCK(log, flags); assert(!(tlck->flag & tlckFREEPAGE)); put_metapage(mp); } /* insert tlock, and linelock(s) of the tlock if any, * at head of freelist */ TXN_LOCK(); llid = ((struct linelock *) & tlck->lock)->next; while (llid) { linelock = (struct linelock *) lid_to_tlock(llid); k = linelock->next; txLockFree(llid); llid = k; } txLockFree(lid); TXN_UNLOCK(); } tblk->next = tblk->last = 0; /* * remove tblock from logsynclist * (allocation map pages inherited lsn of tblk and * has been inserted in logsync list at txUpdateMap()) */ if (tblk->lsn) { LOGSYNC_LOCK(log, flags); log->count--; list_del(&tblk->synclist); LOGSYNC_UNLOCK(log, flags); } } /* * txMaplock() * * function: allocate a transaction lock for freed page/entry; * for freed page, maplock is used as xtlock/dtlock type; */ struct tlock *txMaplock(tid_t tid, struct inode *ip, int type) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); lid_t lid; struct tblock *tblk; struct tlock *tlck; struct maplock *maplock; TXN_LOCK(); /* * allocate a tlock */ lid = txLockAlloc(); tlck = lid_to_tlock(lid); /* * initialize tlock */ tlck->tid = tid; /* bind the tlock and the object */ tlck->flag = tlckINODELOCK; if (S_ISDIR(ip->i_mode)) tlck->flag |= tlckDIRECTORY; tlck->ip = ip; tlck->mp = NULL; tlck->type = type; /* * enqueue transaction lock to transaction/inode */ /* insert the tlock at tail of transaction tlock list */ if (tid) { tblk = tid_to_tblock(tid); if (tblk->next) lid_to_tlock(tblk->last)->next = lid; else tblk->next = lid; tlck->next = 0; tblk->last = lid; } /* anonymous transaction: * insert the tlock at head of inode anonymous tlock list */ else { tlck->next = jfs_ip->atlhead; jfs_ip->atlhead = lid; if (tlck->next == 0) { /* This inode's first anonymous transaction */ jfs_ip->atltail = lid; list_add_tail(&jfs_ip->anon_inode_list, &TxAnchor.anon_list); } } TXN_UNLOCK(); /* initialize type dependent area for maplock */ maplock = (struct maplock *) & tlck->lock; maplock->next = 0; maplock->maxcnt = 0; maplock->index = 0; return tlck; } /* * txLinelock() * * function: allocate a transaction lock for log vector list */ struct linelock *txLinelock(struct linelock * tlock) { lid_t lid; struct tlock *tlck; struct linelock *linelock; TXN_LOCK(); /* allocate a TxLock structure */ lid = txLockAlloc(); tlck = lid_to_tlock(lid); TXN_UNLOCK(); /* initialize linelock */ linelock = (struct linelock *) tlck; linelock->next = 0; linelock->flag = tlckLINELOCK; linelock->maxcnt = TLOCKLONG; linelock->index = 0; if (tlck->flag & tlckDIRECTORY) linelock->flag |= tlckDIRECTORY; /* append linelock after tlock */ linelock->next = tlock->next; tlock->next = lid; return linelock; } /* * transaction commit management * ----------------------------- */ /* * NAME: txCommit() * * FUNCTION: commit the changes to the objects specified in * clist. For journalled segments only the * changes of the caller are committed, ie by tid. * for non-journalled segments the data are flushed to * disk and then the change to the disk inode and indirect * blocks committed (so blocks newly allocated to the * segment will be made a part of the segment atomically). * * all of the segments specified in clist must be in * one file system. no more than 6 segments are needed * to handle all unix svcs. * * if the i_nlink field (i.e. disk inode link count) * is zero, and the type of inode is a regular file or * directory, or symbolic link , the inode is truncated * to zero length. the truncation is committed but the * VM resources are unaffected until it is closed (see * iput and iclose). * * PARAMETER: * * RETURN: * * serialization: * on entry the inode lock on each segment is assumed * to be held. * * i/o error: */ int txCommit(tid_t tid, /* transaction identifier */ int nip, /* number of inodes to commit */ struct inode **iplist, /* list of inode to commit */ int flag) { int rc = 0; struct commit cd; struct jfs_log *log; struct tblock *tblk; struct lrd *lrd; struct inode *ip; struct jfs_inode_info *jfs_ip; int k, n; ino_t top; struct super_block *sb; jfs_info("txCommit, tid = %d, flag = %d", tid, flag); /* is read-only file system ? */ if (isReadOnly(iplist[0])) { rc = -EROFS; goto TheEnd; } sb = cd.sb = iplist[0]->i_sb; cd.tid = tid; if (tid == 0) tid = txBegin(sb, 0); tblk = tid_to_tblock(tid); /* * initialize commit structure */ log = JFS_SBI(sb)->log; cd.log = log; /* initialize log record descriptor in commit */ lrd = &cd.lrd; lrd->logtid = cpu_to_le32(tblk->logtid); lrd->backchain = 0; tblk->xflag |= flag; if ((flag & (COMMIT_FORCE | COMMIT_SYNC)) == 0) tblk->xflag |= COMMIT_LAZY; /* * prepare non-journaled objects for commit * * flush data pages of non-journaled file * to prevent the file getting non-initialized disk blocks * in case of crash. * (new blocks - ) */ cd.iplist = iplist; cd.nip = nip; /* * acquire transaction lock on (on-disk) inodes * * update on-disk inode from in-memory inode * acquiring transaction locks for AFTER records * on the on-disk inode of file object * * sort the inodes array by inode number in descending order * to prevent deadlock when acquiring transaction lock * of on-disk inodes on multiple on-disk inode pages by * multiple concurrent transactions */ for (k = 0; k < cd.nip; k++) { top = (cd.iplist[k])->i_ino; for (n = k + 1; n < cd.nip; n++) { ip = cd.iplist[n]; if (ip->i_ino > top) { top = ip->i_ino; cd.iplist[n] = cd.iplist[k]; cd.iplist[k] = ip; } } ip = cd.iplist[k]; jfs_ip = JFS_IP(ip); /* * BUGBUG - This code has temporarily been removed. The * intent is to ensure that any file data is written before * the metadata is committed to the journal. This prevents * uninitialized data from appearing in a file after the * journal has been replayed. (The uninitialized data * could be sensitive data removed by another user.) * * The problem now is that we are holding the IWRITELOCK * on the inode, and calling filemap_fdatawrite on an * unmapped page will cause a deadlock in jfs_get_block. * * The long term solution is to pare down the use of * IWRITELOCK. We are currently holding it too long. * We could also be smarter about which data pages need * to be written before the transaction is committed and * when we don't need to worry about it at all. * * if ((!S_ISDIR(ip->i_mode)) * && (tblk->flag & COMMIT_DELETE) == 0) * filemap_write_and_wait(ip->i_mapping); */ /* * Mark inode as not dirty. It will still be on the dirty * inode list, but we'll know not to commit it again unless * it gets marked dirty again */ clear_cflag(COMMIT_Dirty, ip); /* inherit anonymous tlock(s) of inode */ if (jfs_ip->atlhead) { lid_to_tlock(jfs_ip->atltail)->next = tblk->next; tblk->next = jfs_ip->atlhead; if (!tblk->last) tblk->last = jfs_ip->atltail; jfs_ip->atlhead = jfs_ip->atltail = 0; TXN_LOCK(); list_del_init(&jfs_ip->anon_inode_list); TXN_UNLOCK(); } /* * acquire transaction lock on on-disk inode page * (become first tlock of the tblk's tlock list) */ if (((rc = diWrite(tid, ip)))) goto out; } /* * write log records from transaction locks * * txUpdateMap() resets XAD_NEW in XAD. */ txLog(log, tblk, &cd); /* * Ensure that inode isn't reused before * lazy commit thread finishes processing */ if (tblk->xflag & COMMIT_DELETE) { ihold(tblk->u.ip); /* * Avoid a rare deadlock * * If the inode is locked, we may be blocked in * jfs_commit_inode. If so, we don't want the * lazy_commit thread doing the last iput() on the inode * since that may block on the locked inode. Instead, * commit the transaction synchronously, so the last iput * will be done by the calling thread (or later) */ /* * I believe this code is no longer needed. Splitting I_LOCK * into two bits, I_NEW and I_SYNC should prevent this * deadlock as well. But since I don't have a JFS testload * to verify this, only a trivial s/I_LOCK/I_SYNC/ was done. * Joern */ if (tblk->u.ip->i_state & I_SYNC) tblk->xflag &= ~COMMIT_LAZY; } ASSERT((!(tblk->xflag & COMMIT_DELETE)) || ((tblk->u.ip->i_nlink == 0) && !test_cflag(COMMIT_Nolink, tblk->u.ip))); /* * write COMMIT log record */ lrd->type = cpu_to_le16(LOG_COMMIT); lrd->length = 0; lmLog(log, tblk, lrd, NULL); lmGroupCommit(log, tblk); /* * - transaction is now committed - */ /* * force pages in careful update * (imap addressing structure update) */ if (flag & COMMIT_FORCE) txForce(tblk); /* * update allocation map. * * update inode allocation map and inode: * free pager lock on memory object of inode if any. * update block allocation map. * * txUpdateMap() resets XAD_NEW in XAD. */ if (tblk->xflag & COMMIT_FORCE) txUpdateMap(tblk); /* * free transaction locks and pageout/free pages */ txRelease(tblk); if ((tblk->flag & tblkGC_LAZY) == 0) txUnlock(tblk); /* * reset in-memory object state */ for (k = 0; k < cd.nip; k++) { ip = cd.iplist[k]; jfs_ip = JFS_IP(ip); /* * reset in-memory inode state */ jfs_ip->bxflag = 0; jfs_ip->blid = 0; } out: if (rc != 0) txAbort(tid, 1); TheEnd: jfs_info("txCommit: tid = %d, returning %d", tid, rc); return rc; } /* * NAME: txLog() * * FUNCTION: Writes AFTER log records for all lines modified * by tid for segments specified by inodes in comdata. * Code assumes only WRITELOCKS are recorded in lockwords. * * PARAMETERS: * * RETURN : */ static void txLog(struct jfs_log *log, struct tblock *tblk, struct commit *cd) { struct inode *ip; lid_t lid; struct tlock *tlck; struct lrd *lrd = &cd->lrd; /* * write log record(s) for each tlock of transaction, */ for (lid = tblk->next; lid; lid = tlck->next) { tlck = lid_to_tlock(lid); tlck->flag |= tlckLOG; /* initialize lrd common */ ip = tlck->ip; lrd->aggregate = cpu_to_le32(JFS_SBI(ip->i_sb)->aggregate); lrd->log.redopage.fileset = cpu_to_le32(JFS_IP(ip)->fileset); lrd->log.redopage.inode = cpu_to_le32(ip->i_ino); /* write log record of page from the tlock */ switch (tlck->type & tlckTYPE) { case tlckXTREE: xtLog(log, tblk, lrd, tlck); break; case tlckDTREE: dtLog(log, tblk, lrd, tlck); break; case tlckINODE: diLog(log, tblk, lrd, tlck, cd); break; case tlckMAP: mapLog(log, tblk, lrd, tlck); break; case tlckDATA: dataLog(log, tblk, lrd, tlck); break; default: jfs_err("UFO tlock:0x%p", tlck); } } return; } /* * diLog() * * function: log inode tlock and format maplock to update bmap; */ static void diLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck, struct commit *cd) { struct metapage *mp; pxd_t *pxd; struct pxd_lock *pxdlock; mp = tlck->mp; /* initialize as REDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_INODE); lrd->log.redopage.l2linesize = cpu_to_le16(L2INODESLOTSIZE); pxd = &lrd->log.redopage.pxd; /* * inode after image */ if (tlck->type & tlckENTRY) { /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; } else if (tlck->type & tlckFREE) { /* * free inode extent * * (pages of the freed inode extent have been invalidated and * a maplock for free of the extent has been formatted at * txLock() time); * * the tlock had been acquired on the inode allocation map page * (iag) that specifies the freed extent, even though the map * page is not itself logged, to prevent pageout of the map * page before the log; */ /* log LOG_NOREDOINOEXT of the freed inode extent for * logredo() to start NoRedoPage filters, and to update * imap and bmap for free of the extent; */ lrd->type = cpu_to_le16(LOG_NOREDOINOEXT); /* * For the LOG_NOREDOINOEXT record, we need * to pass the IAG number and inode extent * index (within that IAG) from which the * extent is being released. These have been * passed to us in the iplist[1] and iplist[2]. */ lrd->log.noredoinoext.iagnum = cpu_to_le32((u32) (size_t) cd->iplist[1]); lrd->log.noredoinoext.inoext_idx = cpu_to_le32((u32) (size_t) cd->iplist[2]); pxdlock = (struct pxd_lock *) & tlck->lock; *pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* update bmap */ tlck->flag |= tlckUPDATEMAP; /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; } else jfs_err("diLog: UFO type tlck:0x%p", tlck); return; } /* * dataLog() * * function: log data tlock */ static void dataLog(struct jfs_log *log, struct tblock *tblk, struct lrd *lrd, struct tlock *tlck) { struct metapage *mp; pxd_t *pxd; mp = tlck->mp; /* initialize as REDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_DATA); lrd->log.redopage.l2linesize = cpu_to_le16(L2DATASLOTSIZE); pxd = &lrd->log.redopage.pxd; /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); if (jfs_dirtable_inline(tlck->ip)) { /* * The table has been truncated, we've must have deleted * the last entry, so don't bother logging this */ mp->lid = 0; grab_metapage(mp); metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; return; } PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * dtLog() * * function: log dtree tlock and format maplock to update bmap; */ static void dtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { struct metapage *mp; struct pxd_lock *pxdlock; pxd_t *pxd; mp = tlck->mp; /* initialize as REDOPAGE/NOREDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_DTREE); lrd->log.redopage.l2linesize = cpu_to_le16(L2DTSLOTSIZE); pxd = &lrd->log.redopage.pxd; if (tlck->type & tlckBTROOT) lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); /* * page extension via relocation: entry insertion; * page extension in-place: entry insertion; * new right page from page split, reinitialized in-line * root from root page split: entry insertion; */ if (tlck->type & (tlckNEW | tlckEXTEND)) { /* log after-image of the new page for logredo(): * mark log (LOG_NEW) for logredo() to initialize * freelist and update bmap for alloc of the new page; */ lrd->type = cpu_to_le16(LOG_REDOPAGE); if (tlck->type & tlckEXTEND) lrd->log.redopage.type |= cpu_to_le16(LOG_EXTEND); else lrd->log.redopage.type |= cpu_to_le16(LOG_NEW); PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* format a maplock for txUpdateMap() to update bPMAP for * alloc of the new page; */ if (tlck->type & tlckBTROOT) return; tlck->flag |= tlckUPDATEMAP; pxdlock = (struct pxd_lock *) & tlck->lock; pxdlock->flag = mlckALLOCPXD; pxdlock->pxd = *pxd; pxdlock->index = 1; /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * entry insertion/deletion, * sibling page link update (old right page before split); */ if (tlck->type & (tlckENTRY | tlckRELINK)) { /* log after-image for logredo(): */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(pxd, mp->index); PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * page deletion: page has been invalidated * page relocation: source extent * * a maplock for free of the page has been formatted * at txLock() time); */ if (tlck->type & (tlckFREE | tlckRELOCATE)) { /* log LOG_NOREDOPAGE of the deleted page for logredo() * to start NoRedoPage filter and to update bmap for free * of the deletd page */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); pxdlock = (struct pxd_lock *) & tlck->lock; *pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time; */ tlck->flag |= tlckUPDATEMAP; } return; } /* * xtLog() * * function: log xtree tlock and format maplock to update bmap; */ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { struct inode *ip; struct metapage *mp; xtpage_t *p; struct xtlock *xtlck; struct maplock *maplock; struct xdlistlock *xadlock; struct pxd_lock *pxdlock; pxd_t *page_pxd; int next, lwm, hwm; ip = tlck->ip; mp = tlck->mp; /* initialize as REDOPAGE/NOREDOPAGE record format */ lrd->log.redopage.type = cpu_to_le16(LOG_XTREE); lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE); page_pxd = &lrd->log.redopage.pxd; if (tlck->type & tlckBTROOT) { lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT); p = (xtpage_t *) &JFS_IP(ip)->i_xtroot; if (S_ISDIR(ip->i_mode)) lrd->log.redopage.type |= cpu_to_le16(LOG_DIR_XTREE); } else p = (xtpage_t *) mp->data; next = le16_to_cpu(p->header.nextindex); xtlck = (struct xtlock *) & tlck->lock; maplock = (struct maplock *) & tlck->lock; xadlock = (struct xdlistlock *) maplock; /* * entry insertion/extension; * sibling page link update (old right page before split); */ if (tlck->type & (tlckNEW | tlckGROW | tlckRELINK)) { /* log after-image for logredo(): * logredo() will update bmap for alloc of new/extended * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from * after-image of XADlist; * logredo() resets (XAD_NEW|XAD_EXTEND) flag when * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* format a maplock for txUpdateMap() to update bPMAP * for alloc of new/extended extents of XAD[lwm:next) * from the page itself; * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. */ lwm = xtlck->lwm.offset; if (lwm == 0) lwm = XTPAGEMAXSLOT; if (lwm == next) goto out; if (lwm > next) { jfs_err("xtLog: lwm > next"); goto out; } tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckALLOCXADLIST; xadlock->count = next - lwm; if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { int i; pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. * * We can fit twice as may pxd's as xads in the lock */ xadlock->flag = mlckALLOCPXDLIST; pxd = xadlock->xdlist = &xtlck->pxdlock; for (i = 0; i < xadlock->count; i++) { PXDaddress(pxd, addressXAD(&p->xad[lwm + i])); PXDlength(pxd, lengthXAD(&p->xad[lwm + i])); p->xad[lwm + i].flag &= ~(XAD_NEW | XAD_EXTENDED); pxd++; } } else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily. */ xadlock->flag = mlckALLOCXADLIST; xadlock->xdlist = &p->xad[lwm]; tblk->xflag &= ~COMMIT_LAZY; } jfs_info("xtLog: alloc ip:0x%p mp:0x%p tlck:0x%p lwm:%d count:%d", tlck->ip, mp, tlck, lwm, xadlock->count); maplock->index = 1; out: /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; return; } /* * page deletion: file deletion/truncation (ref. xtTruncate()) * * (page will be invalidated after log is written and bmap * is updated from the page); */ if (tlck->type & tlckFREE) { /* LOG_NOREDOPAGE log for NoRedoPage filter: * if page free from file delete, NoRedoFile filter from * inode image of zero link count will subsume NoRedoPage * filters for each page; * if page free from file truncattion, write NoRedoPage * filter; * * upadte of block allocation map for the page itself: * if page free from deletion and truncation, LOG_UPDATEMAP * log for the page itself is generated from processing * its parent page xad entries; */ /* if page free from file truncation, log LOG_NOREDOPAGE * of the deleted page for logredo() to start NoRedoPage * filter for the page; */ if (tblk->xflag & COMMIT_TRUNCATE) { /* write NOREDOPAGE for the page */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb-> s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); if (tlck->type & tlckBTROOT) { /* Empty xtree must be logged */ lrd->type = cpu_to_le16(LOG_REDOPAGE); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); } } /* init LOG_UPDATEMAP of the freed extents * XAD[XTENTRYSTART:hwm) from the deleted page itself * for logredo() to update bmap; */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); xtlck = (struct xtlock *) & tlck->lock; hwm = xtlck->hwm.offset; lrd->log.updatemap.nxd = cpu_to_le16(hwm - XTENTRYSTART + 1); /* reformat linelock for lmLog() */ xtlck->header.offset = XTENTRYSTART; xtlck->header.length = hwm - XTENTRYSTART + 1; xtlck->index = 1; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* format a maplock for txUpdateMap() to update bmap * to free extents of XAD[XTENTRYSTART:hwm) from the * deleted page itself; */ tlck->flag |= tlckUPDATEMAP; xadlock->count = hwm - XTENTRYSTART + 1; if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) { int i; pxd_t *pxd; /* * Lazy commit may allow xtree to be modified before * txUpdateMap runs. Copy xad into linelock to * preserve correct data. * * We can fit twice as may pxd's as xads in the lock */ xadlock->flag = mlckFREEPXDLIST; pxd = xadlock->xdlist = &xtlck->pxdlock; for (i = 0; i < xadlock->count; i++) { PXDaddress(pxd, addressXAD(&p->xad[XTENTRYSTART + i])); PXDlength(pxd, lengthXAD(&p->xad[XTENTRYSTART + i])); pxd++; } } else { /* * xdlist will point to into inode's xtree, ensure * that transaction is not committed lazily. */ xadlock->flag = mlckFREEXADLIST; xadlock->xdlist = &p->xad[XTENTRYSTART]; tblk->xflag &= ~COMMIT_LAZY; } jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d lwm:2", tlck->ip, mp, xadlock->count); maplock->index = 1; /* mark page as invalid */ if (((tblk->xflag & COMMIT_PWMAP) || S_ISDIR(ip->i_mode)) && !(tlck->type & tlckBTROOT)) tlck->flag |= tlckFREEPAGE; /* else (tblk->xflag & COMMIT_PMAP) ? release the page; */ return; } /* * page/entry truncation: file truncation (ref. xtTruncate()) * * |----------+------+------+---------------| * | | | * | | hwm - hwm before truncation * | next - truncation point * lwm - lwm before truncation * header ? */ if (tlck->type & tlckTRUNCATE) { pxd_t pxd; /* truncated extent of xad */ int twm; /* * For truncation the entire linelock may be used, so it would * be difficult to store xad list in linelock itself. * Therefore, we'll just force transaction to be committed * synchronously, so that xtree pages won't be changed before * txUpdateMap runs. */ tblk->xflag &= ~COMMIT_LAZY; lwm = xtlck->lwm.offset; if (lwm == 0) lwm = XTPAGEMAXSLOT; hwm = xtlck->hwm.offset; twm = xtlck->twm.offset; /* * write log records */ /* log after-image for logredo(): * * logredo() will update bmap for alloc of new/extended * extents (XAD_NEW|XAD_EXTEND) of XAD[lwm:next) from * after-image of XADlist; * logredo() resets (XAD_NEW|XAD_EXTEND) flag when * applying the after-image to the meta-data page. */ lrd->type = cpu_to_le16(LOG_REDOPAGE); PXDaddress(page_pxd, mp->index); PXDlength(page_pxd, mp->logical_size >> tblk->sb->s_blocksize_bits); lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); /* * truncate entry XAD[twm == next - 1]: */ if (twm == next - 1) { /* init LOG_UPDATEMAP for logredo() to update bmap for * free of truncated delta extent of the truncated * entry XAD[next - 1]: * (xtlck->pxdlock = truncated delta extent); */ pxdlock = (struct pxd_lock *) & xtlck->pxdlock; /* assert(pxdlock->type & tlckTRUNCATE); */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; pxd = pxdlock->pxd; /* save to format maplock */ lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); } /* * free entries XAD[next:hwm]: */ if (hwm >= next) { /* init LOG_UPDATEMAP of the freed extents * XAD[next:hwm] from the deleted page itself * for logredo() to update bmap; */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEXADLIST); xtlck = (struct xtlock *) & tlck->lock; hwm = xtlck->hwm.offset; lrd->log.updatemap.nxd = cpu_to_le16(hwm - next + 1); /* reformat linelock for lmLog() */ xtlck->header.offset = next; xtlck->header.length = hwm - next + 1; xtlck->index = 1; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck)); } /* * format maplock(s) for txUpdateMap() to update bmap */ maplock->index = 0; /* * allocate entries XAD[lwm:next): */ if (lwm < next) { /* format a maplock for txUpdateMap() to update bPMAP * for alloc of new/extended extents of XAD[lwm:next) * from the page itself; * txUpdateMap() resets (XAD_NEW|XAD_EXTEND) flag. */ tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckALLOCXADLIST; xadlock->count = next - lwm; xadlock->xdlist = &p->xad[lwm]; jfs_info("xtLog: alloc ip:0x%p mp:0x%p count:%d lwm:%d next:%d", tlck->ip, mp, xadlock->count, lwm, next); maplock->index++; xadlock++; } /* * truncate entry XAD[twm == next - 1]: */ if (twm == next - 1) { /* format a maplock for txUpdateMap() to update bmap * to free truncated delta extent of the truncated * entry XAD[next - 1]; * (xtlck->pxdlock = truncated delta extent); */ tlck->flag |= tlckUPDATEMAP; pxdlock = (struct pxd_lock *) xadlock; pxdlock->flag = mlckFREEPXD; pxdlock->count = 1; pxdlock->pxd = pxd; jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d hwm:%d", ip, mp, pxdlock->count, hwm); maplock->index++; xadlock++; } /* * free entries XAD[next:hwm]: */ if (hwm >= next) { /* format a maplock for txUpdateMap() to update bmap * to free extents of XAD[next:hwm] from thedeleted * page itself; */ tlck->flag |= tlckUPDATEMAP; xadlock->flag = mlckFREEXADLIST; xadlock->count = hwm - next + 1; xadlock->xdlist = &p->xad[next]; jfs_info("xtLog: free ip:0x%p mp:0x%p count:%d next:%d hwm:%d", tlck->ip, mp, xadlock->count, next, hwm); maplock->index++; } /* mark page as homeward bound */ tlck->flag |= tlckWRITEPAGE; } return; } /* * mapLog() * * function: log from maplock of freed data extents; */ static void mapLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, struct tlock * tlck) { struct pxd_lock *pxdlock; int i, nlock; pxd_t *pxd; /* * page relocation: free the source page extent * * a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time saving the src * relocated page address; */ if (tlck->type & tlckRELOCATE) { /* log LOG_NOREDOPAGE of the old relocated page * for logredo() to start NoRedoPage filter; */ lrd->type = cpu_to_le16(LOG_NOREDOPAGE); pxdlock = (struct pxd_lock *) & tlck->lock; pxd = &lrd->log.redopage.pxd; *pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* (N.B. currently, logredo() does NOT update bmap * for free of the page itself for (LOG_XTREE|LOG_NOREDOPAGE); * if page free from relocation, LOG_UPDATEMAP log is * specifically generated now for logredo() * to update bmap for free of src relocated page; * (new flag LOG_RELOCATE may be introduced which will * inform logredo() to start NORedoPage filter and also * update block allocation map at the same time, thus * avoiding an extra log write); */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); /* a maplock for txUpdateMap() for free of the page * has been formatted at txLock() time; */ tlck->flag |= tlckUPDATEMAP; return; } /* * Otherwise it's not a relocate request * */ else { /* log LOG_UPDATEMAP for logredo() to update bmap for * free of truncated/relocated delta extent of the data; * e.g.: external EA extent, relocated/truncated extent * from xtTailgate(); */ lrd->type = cpu_to_le16(LOG_UPDATEMAP); pxdlock = (struct pxd_lock *) & tlck->lock; nlock = pxdlock->index; for (i = 0; i < nlock; i++, pxdlock++) { if (pxdlock->flag & mlckALLOCPXD) lrd->log.updatemap.type = cpu_to_le16(LOG_ALLOCPXD); else lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD); lrd->log.updatemap.nxd = cpu_to_le16(1); lrd->log.updatemap.pxd = pxdlock->pxd; lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, NULL)); jfs_info("mapLog: xaddr:0x%lx xlen:0x%x", (ulong) addressPXD(&pxdlock->pxd), lengthPXD(&pxdlock->pxd)); } /* update bmap */ tlck->flag |= tlckUPDATEMAP; } } /* * txEA() * * function: acquire maplock for EA/ACL extents or * set COMMIT_INLINE flag; */ void txEA(tid_t tid, struct inode *ip, dxd_t * oldea, dxd_t * newea) { struct tlock *tlck = NULL; struct pxd_lock *maplock = NULL, *pxdlock = NULL; /* * format maplock for alloc of new EA extent */ if (newea) { /* Since the newea could be a completely zeroed entry we need to * check for the two flags which indicate we should actually * commit new EA data */ if (newea->flag & DXD_EXTENT) { tlck = txMaplock(tid, ip, tlckMAP); maplock = (struct pxd_lock *) & tlck->lock; pxdlock = (struct pxd_lock *) maplock; pxdlock->flag = mlckALLOCPXD; PXDaddress(&pxdlock->pxd, addressDXD(newea)); PXDlength(&pxdlock->pxd, lengthDXD(newea)); pxdlock++; maplock->index = 1; } else if (newea->flag & DXD_INLINE) { tlck = NULL; set_cflag(COMMIT_Inlineea, ip); } } /* * format maplock for free of old EA extent */ if (!test_cflag(COMMIT_Nolink, ip) && oldea->flag & DXD_EXTENT) { if (tlck == NULL) { tlck = txMaplock(tid, ip, tlckMAP); maplock = (struct pxd_lock *) & tlck->lock; pxdlock = (struct pxd_lock *) maplock; maplock->index = 0; } pxdlock->flag = mlckFREEPXD; PXDaddress(&pxdlock->pxd, addressDXD(oldea)); PXDlength(&pxdlock->pxd, lengthDXD(oldea)); maplock->index++; } } /* * txForce() * * function: synchronously write pages locked by transaction * after txLog() but before txUpdateMap(); */ static void txForce(struct tblock * tblk) { struct tlock *tlck; lid_t lid, next; struct metapage *mp; /* * reverse the order of transaction tlocks in * careful update order of address index pages * (right to left, bottom up) */ tlck = lid_to_tlock(tblk->next); lid = tlck->next; tlck->next = 0; while (lid) { tlck = lid_to_tlock(lid); next = tlck->next; tlck->next = tblk->next; tblk->next = lid; lid = next; } /* * synchronously write the page, and * hold the page for txUpdateMap(); */ for (lid = tblk->next; lid; lid = next) { tlck = lid_to_tlock(lid); next = tlck->next; if ((mp = tlck->mp) != NULL && (tlck->type & tlckBTROOT) == 0) { assert(mp->xflag & COMMIT_PAGE); if (tlck->flag & tlckWRITEPAGE) { tlck->flag &= ~tlckWRITEPAGE; /* do not release page to freelist */ force_metapage(mp); #if 0 /* * The "right" thing to do here is to * synchronously write the metadata. * With the current implementation this * is hard since write_metapage requires * us to kunmap & remap the page. If we * have tlocks pointing into the metadata * pages, we don't want to do this. I think * we can get by with synchronously writing * the pages when they are released. */ assert(mp->nohomeok); set_bit(META_dirty, &mp->flag); set_bit(META_sync, &mp->flag); #endif } } } } /* * txUpdateMap() * * function: update persistent allocation map (and working map * if appropriate); * * parameter: */ static void txUpdateMap(struct tblock * tblk) { struct inode *ip; struct inode *ipimap; lid_t lid; struct tlock *tlck; struct maplock *maplock; struct pxd_lock pxdlock; int maptype; int k, nlock; struct metapage *mp = NULL; ipimap = JFS_SBI(tblk->sb)->ipimap; maptype = (tblk->xflag & COMMIT_PMAP) ? COMMIT_PMAP : COMMIT_PWMAP; /* * update block allocation map * * update allocation state in pmap (and wmap) and * update lsn of the pmap page; */ /* * scan each tlock/page of transaction for block allocation/free: * * for each tlock/page of transaction, update map. * ? are there tlock for pmap and pwmap at the same time ? */ for (lid = tblk->next; lid; lid = tlck->next) { tlck = lid_to_tlock(lid); if ((tlck->flag & tlckUPDATEMAP) == 0) continue; if (tlck->flag & tlckFREEPAGE) { /* * Another thread may attempt to reuse freed space * immediately, so we want to get rid of the metapage * before anyone else has a chance to get it. * Lock metapage, update maps, then invalidate * the metapage. */ mp = tlck->mp; ASSERT(mp->xflag & COMMIT_PAGE); grab_metapage(mp); } /* * extent list: * . in-line PXD list: * . out-of-line XAD list: */ maplock = (struct maplock *) & tlck->lock; nlock = maplock->index; for (k = 0; k < nlock; k++, maplock++) { /* * allocate blocks in persistent map: * * blocks have been allocated from wmap at alloc time; */ if (maplock->flag & mlckALLOC) { txAllocPMap(ipimap, maplock, tblk); } /* * free blocks in persistent and working map: * blocks will be freed in pmap and then in wmap; * * ? tblock specifies the PMAP/PWMAP based upon * transaction * * free blocks in persistent map: * blocks will be freed from wmap at last reference * release of the object for regular files; * * Alway free blocks from both persistent & working * maps for directories */ else { /* (maplock->flag & mlckFREE) */ if (tlck->flag & tlckDIRECTORY) txFreeMap(ipimap, maplock, tblk, COMMIT_PWMAP); else txFreeMap(ipimap, maplock, tblk, maptype); } } if (tlck->flag & tlckFREEPAGE) { if (!(tblk->flag & tblkGC_LAZY)) { /* This is equivalent to txRelease */ ASSERT(mp->lid == lid); tlck->mp->lid = 0; } assert(mp->nohomeok == 1); metapage_homeok(mp); discard_metapage(mp); tlck->mp = NULL; } } /* * update inode allocation map * * update allocation state in pmap and * update lsn of the pmap page; * update in-memory inode flag/state * * unlock mapper/write lock */ if (tblk->xflag & COMMIT_CREATE) { diUpdatePMap(ipimap, tblk->ino, false, tblk); /* update persistent block allocation map * for the allocation of inode extent; */ pxdlock.flag = mlckALLOCPXD; pxdlock.pxd = tblk->u.ixpxd; pxdlock.index = 1; txAllocPMap(ipimap, (struct maplock *) & pxdlock, tblk); } else if (tblk->xflag & COMMIT_DELETE) { ip = tblk->u.ip; diUpdatePMap(ipimap, ip->i_ino, true, tblk); iput(ip); } } /* * txAllocPMap() * * function: allocate from persistent map; * * parameter: * ipbmap - * malock - * xad list: * pxd: * * maptype - * allocate from persistent map; * free from persistent map; * (e.g., tmp file - free from working map at releae * of last reference); * free from persistent and working map; * * lsn - log sequence number; */ static void txAllocPMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk) { struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct xdlistlock *xadlistlock; xad_t *xad; s64 xaddr; int xlen; struct pxd_lock *pxdlock; struct xdlistlock *pxdlistlock; pxd_t *pxd; int n; /* * allocate from persistent map; */ if (maplock->flag & mlckALLOCXADLIST) { xadlistlock = (struct xdlistlock *) maplock; xad = xadlistlock->xdlist; for (n = 0; n < xadlistlock->count; n++, xad++) { if (xad->flag & (XAD_NEW | XAD_EXTENDED)) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); xad->flag &= ~(XAD_NEW | XAD_EXTENDED); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } else if (maplock->flag & mlckALLOCPXD) { pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } else { /* (maplock->flag & mlckALLOCPXDLIST) */ pxdlistlock = (struct xdlistlock *) maplock; pxd = pxdlistlock->xdlist; for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); dbUpdatePMap(ipbmap, false, xaddr, (s64) xlen, tblk); jfs_info("allocPMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } /* * txFreeMap() * * function: free from persistent and/or working map; * * todo: optimization */ void txFreeMap(struct inode *ip, struct maplock * maplock, struct tblock * tblk, int maptype) { struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap; struct xdlistlock *xadlistlock; xad_t *xad; s64 xaddr; int xlen; struct pxd_lock *pxdlock; struct xdlistlock *pxdlistlock; pxd_t *pxd; int n; jfs_info("txFreeMap: tblk:0x%p maplock:0x%p maptype:0x%x", tblk, maplock, maptype); /* * free from persistent map; */ if (maptype == COMMIT_PMAP || maptype == COMMIT_PWMAP) { if (maplock->flag & mlckFREEXADLIST) { xadlistlock = (struct xdlistlock *) maplock; xad = xadlistlock->xdlist; for (n = 0; n < xadlistlock->count; n++, xad++) { if (!(xad->flag & XAD_NEW)) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } else if (maplock->flag & mlckFREEPXD) { pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } else { /* (maplock->flag & mlckALLOCPXDLIST) */ pxdlistlock = (struct xdlistlock *) maplock; pxd = pxdlistlock->xdlist; for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); dbUpdatePMap(ipbmap, true, xaddr, (s64) xlen, tblk); jfs_info("freePMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } /* * free from working map; */ if (maptype == COMMIT_PWMAP || maptype == COMMIT_WMAP) { if (maplock->flag & mlckFREEXADLIST) { xadlistlock = (struct xdlistlock *) maplock; xad = xadlistlock->xdlist; for (n = 0; n < xadlistlock->count; n++, xad++) { xaddr = addressXAD(xad); xlen = lengthXAD(xad); dbFree(ip, xaddr, (s64) xlen); xad->flag = 0; jfs_info("freeWMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } else if (maplock->flag & mlckFREEPXD) { pxdlock = (struct pxd_lock *) maplock; xaddr = addressPXD(&pxdlock->pxd); xlen = lengthPXD(&pxdlock->pxd); dbFree(ip, xaddr, (s64) xlen); jfs_info("freeWMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } else { /* (maplock->flag & mlckFREEPXDLIST) */ pxdlistlock = (struct xdlistlock *) maplock; pxd = pxdlistlock->xdlist; for (n = 0; n < pxdlistlock->count; n++, pxd++) { xaddr = addressPXD(pxd); xlen = lengthPXD(pxd); dbFree(ip, xaddr, (s64) xlen); jfs_info("freeWMap: xaddr:0x%lx xlen:%d", (ulong) xaddr, xlen); } } } } /* * txFreelock() * * function: remove tlock from inode anonymous locklist */ void txFreelock(struct inode *ip) { struct jfs_inode_info *jfs_ip = JFS_IP(ip); struct tlock *xtlck, *tlck; lid_t xlid = 0, lid; if (!jfs_ip->atlhead) return; TXN_LOCK(); xtlck = (struct tlock *) &jfs_ip->atlhead; while ((lid = xtlck->next) != 0) { tlck = lid_to_tlock(lid); if (tlck->flag & tlckFREELOCK) { xtlck->next = tlck->next; txLockFree(lid); } else { xtlck = tlck; xlid = lid; } } if (jfs_ip->atlhead) jfs_ip->atltail = xlid; else { jfs_ip->atltail = 0; /* * If inode was on anon_list, remove it */ list_del_init(&jfs_ip->anon_inode_list); } TXN_UNLOCK(); } /* * txAbort() * * function: abort tx before commit; * * frees line-locks and segment locks for all * segments in comdata structure. * Optionally sets state of file-system to FM_DIRTY in super-block. * log age of page-frames in memory for which caller has * are reset to 0 (to avoid logwarap). */ void txAbort(tid_t tid, int dirty) { lid_t lid, next; struct metapage *mp; struct tblock *tblk = tid_to_tblock(tid); struct tlock *tlck; /* * free tlocks of the transaction */ for (lid = tblk->next; lid; lid = next) { tlck = lid_to_tlock(lid); next = tlck->next; mp = tlck->mp; JFS_IP(tlck->ip)->xtlid = 0; if (mp) { mp->lid = 0; /* * reset lsn of page to avoid logwarap: * * (page may have been previously committed by another * transaction(s) but has not been paged, i.e., * it may be on logsync list even though it has not * been logged for the current tx.) */ if (mp->xflag & COMMIT_PAGE && mp->lsn) LogSyncRelease(mp); } /* insert tlock at head of freelist */ TXN_LOCK(); txLockFree(lid); TXN_UNLOCK(); } /* caller will free the transaction block */ tblk->next = tblk->last = 0; /* * mark filesystem dirty */ if (dirty) jfs_error(tblk->sb, "\n"); return; } /* * txLazyCommit(void) * * All transactions except those changing ipimap (COMMIT_FORCE) are * processed by this routine. This insures that the inode and block * allocation maps are updated in order. For synchronous transactions, * let the user thread finish processing after txUpdateMap() is called. */ static void txLazyCommit(struct tblock * tblk) { struct jfs_log *log; while (((tblk->flag & tblkGC_READY) == 0) && ((tblk->flag & tblkGC_UNLOCKED) == 0)) { /* We must have gotten ahead of the user thread */ jfs_info("jfs_lazycommit: tblk 0x%p not unlocked", tblk); yield(); } jfs_info("txLazyCommit: processing tblk 0x%p", tblk); txUpdateMap(tblk); log = (struct jfs_log *) JFS_SBI(tblk->sb)->log; spin_lock_irq(&log->gclock); // LOGGC_LOCK tblk->flag |= tblkGC_COMMITTED; if (tblk->flag & tblkGC_READY) log->gcrtc--; wake_up_all(&tblk->gcwait); // LOGGC_WAKEUP /* * Can't release log->gclock until we've tested tblk->flag */ if (tblk->flag & tblkGC_LAZY) { spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK txUnlock(tblk); tblk->flag &= ~tblkGC_LAZY; txEnd(tblk - TxBlock); /* Convert back to tid */ } else spin_unlock_irq(&log->gclock); // LOGGC_UNLOCK jfs_info("txLazyCommit: done: tblk = 0x%p", tblk); } /* * jfs_lazycommit(void) * * To be run as a kernel daemon. If lbmIODone is called in an interrupt * context, or where blocking is not wanted, this routine will process * committed transactions from the unlock queue. */ int jfs_lazycommit(void *arg) { int WorkDone; struct tblock *tblk; unsigned long flags; struct jfs_sb_info *sbi; set_freezable(); do { LAZY_LOCK(flags); jfs_commit_thread_waking = 0; /* OK to wake another thread */ while (!list_empty(&TxAnchor.unlock_queue)) { WorkDone = 0; list_for_each_entry(tblk, &TxAnchor.unlock_queue, cqueue) { sbi = JFS_SBI(tblk->sb); /* * For each volume, the transactions must be * handled in order. If another commit thread * is handling a tblk for this superblock, * skip it */ if (sbi->commit_state & IN_LAZYCOMMIT) continue; sbi->commit_state |= IN_LAZYCOMMIT; WorkDone = 1; /* * Remove transaction from queue */ list_del(&tblk->cqueue); LAZY_UNLOCK(flags); txLazyCommit(tblk); LAZY_LOCK(flags); sbi->commit_state &= ~IN_LAZYCOMMIT; /* * Don't continue in the for loop. (We can't * anyway, it's unsafe!) We want to go back to * the beginning of the list. */ break; } /* If there was nothing to do, don't continue */ if (!WorkDone) break; } /* In case a wakeup came while all threads were active */ jfs_commit_thread_waking = 0; if (freezing(current)) { LAZY_UNLOCK(flags); try_to_freeze(); } else { DECLARE_WAITQUEUE(wq, current); add_wait_queue(&jfs_commit_thread_wait, &wq); set_current_state(TASK_INTERRUPTIBLE); LAZY_UNLOCK(flags); schedule(); remove_wait_queue(&jfs_commit_thread_wait, &wq); } } while (!kthread_should_stop()); if (!list_empty(&TxAnchor.unlock_queue)) jfs_err("jfs_lazycommit being killed w/pending transactions!"); else jfs_info("jfs_lazycommit being killed"); return 0; } void txLazyUnlock(struct tblock * tblk) { unsigned long flags; LAZY_LOCK(flags); list_add_tail(&tblk->cqueue, &TxAnchor.unlock_queue); /* * Don't wake up a commit thread if there is already one servicing * this superblock, or if the last one we woke up hasn't started yet. */ if (!(JFS_SBI(tblk->sb)->commit_state & IN_LAZYCOMMIT) && !jfs_commit_thread_waking) { jfs_commit_thread_waking = 1; wake_up(&jfs_commit_thread_wait); } LAZY_UNLOCK(flags); } static void LogSyncRelease(struct metapage * mp) { struct jfs_log *log = mp->log; assert(mp->nohomeok); assert(log); metapage_homeok(mp); } /* * txQuiesce * * Block all new transactions and push anonymous transactions to * completion * * This does almost the same thing as jfs_sync below. We don't * worry about deadlocking when jfs_tlocks_low is set, since we would * expect jfs_sync to get us out of that jam. */ void txQuiesce(struct super_block *sb) { struct inode *ip; struct jfs_inode_info *jfs_ip; struct jfs_log *log = JFS_SBI(sb)->log; tid_t tid; set_bit(log_QUIESCE, &log->flag); TXN_LOCK(); restart: while (!list_empty(&TxAnchor.anon_list)) { jfs_ip = list_entry(TxAnchor.anon_list.next, struct jfs_inode_info, anon_inode_list); ip = &jfs_ip->vfs_inode; /* * inode will be removed from anonymous list * when it is committed */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE | COMMIT_FORCE); mutex_lock(&jfs_ip->commit_mutex); txCommit(tid, 1, &ip, 0); txEnd(tid); mutex_unlock(&jfs_ip->commit_mutex); /* * Just to be safe. I don't know how * long we can run without blocking */ cond_resched(); TXN_LOCK(); } /* * If jfs_sync is running in parallel, there could be some inodes * on anon_list2. Let's check. */ if (!list_empty(&TxAnchor.anon_list2)) { list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); goto restart; } TXN_UNLOCK(); /* * We may need to kick off the group commit */ jfs_flush_journal(log, 0); } /* * txResume() * * Allows transactions to start again following txQuiesce */ void txResume(struct super_block *sb) { struct jfs_log *log = JFS_SBI(sb)->log; clear_bit(log_QUIESCE, &log->flag); TXN_WAKEUP(&log->syncwait); } /* * jfs_sync(void) * * To be run as a kernel daemon. This is awakened when tlocks run low. * We write any inodes that have anonymous tlocks so they will become * available. */ int jfs_sync(void *arg) { struct inode *ip; struct jfs_inode_info *jfs_ip; tid_t tid; set_freezable(); do { /* * write each inode on the anonymous inode list */ TXN_LOCK(); while (jfs_tlocks_low && !list_empty(&TxAnchor.anon_list)) { jfs_ip = list_entry(TxAnchor.anon_list.next, struct jfs_inode_info, anon_inode_list); ip = &jfs_ip->vfs_inode; if (! igrab(ip)) { /* * Inode is being freed */ list_del_init(&jfs_ip->anon_inode_list); } else if (mutex_trylock(&jfs_ip->commit_mutex)) { /* * inode will be removed from anonymous list * when it is committed */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE); txCommit(tid, 1, &ip, 0); txEnd(tid); mutex_unlock(&jfs_ip->commit_mutex); iput(ip); /* * Just to be safe. I don't know how * long we can run without blocking */ cond_resched(); TXN_LOCK(); } else { /* We can't get the commit mutex. It may * be held by a thread waiting for tlock's * so let's not block here. Save it to * put back on the anon_list. */ /* Move from anon_list to anon_list2 */ list_move(&jfs_ip->anon_inode_list, &TxAnchor.anon_list2); TXN_UNLOCK(); iput(ip); TXN_LOCK(); } } /* Add anon_list2 back to anon_list */ list_splice_init(&TxAnchor.anon_list2, &TxAnchor.anon_list); if (freezing(current)) { TXN_UNLOCK(); try_to_freeze(); } else { set_current_state(TASK_INTERRUPTIBLE); TXN_UNLOCK(); schedule(); } } while (!kthread_should_stop()); jfs_info("jfs_sync being killed"); return 0; } #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_DEBUG) int jfs_txanchor_proc_show(struct seq_file *m, void *v) { char *freewait; char *freelockwait; char *lowlockwait; freewait = waitqueue_active(&TxAnchor.freewait) ? "active" : "empty"; freelockwait = waitqueue_active(&TxAnchor.freelockwait) ? "active" : "empty"; lowlockwait = waitqueue_active(&TxAnchor.lowlockwait) ? "active" : "empty"; seq_printf(m, "JFS TxAnchor\n" "============\n" "freetid = %d\n" "freewait = %s\n" "freelock = %d\n" "freelockwait = %s\n" "lowlockwait = %s\n" "tlocksInUse = %d\n" "jfs_tlocks_low = %d\n" "unlock_queue is %sempty\n", TxAnchor.freetid, freewait, TxAnchor.freelock, freelockwait, lowlockwait, TxAnchor.tlocksInUse, jfs_tlocks_low, list_empty(&TxAnchor.unlock_queue) ? "" : "not "); return 0; } #endif #if defined(CONFIG_PROC_FS) && defined(CONFIG_JFS_STATISTICS) int jfs_txstats_proc_show(struct seq_file *m, void *v) { seq_printf(m, "JFS TxStats\n" "===========\n" "calls to txBegin = %d\n" "txBegin blocked by sync barrier = %d\n" "txBegin blocked by tlocks low = %d\n" "txBegin blocked by no free tid = %d\n" "calls to txBeginAnon = %d\n" "txBeginAnon blocked by sync barrier = %d\n" "txBeginAnon blocked by tlocks low = %d\n" "calls to txLockAlloc = %d\n" "tLockAlloc blocked by no free lock = %d\n", TxStat.txBegin, TxStat.txBegin_barrier, TxStat.txBegin_lockslow, TxStat.txBegin_freetid, TxStat.txBeginAnon, TxStat.txBeginAnon_barrier, TxStat.txBeginAnon_lockslow, TxStat.txLockAlloc, TxStat.txLockAlloc_freelock); return 0; } #endif |
| 2 2 2 2 2 2 2 2 2 2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) International Business Machines Corp., 2006 * * Author: Artem Bityutskiy (Битюцкий Артём) */ /* * The UBI Eraseblock Association (EBA) sub-system. * * This sub-system is responsible for I/O to/from logical eraseblock. * * Although in this implementation the EBA table is fully kept and managed in * RAM, which assumes poor scalability, it might be (partially) maintained on * flash in future implementations. * * The EBA sub-system implements per-logical eraseblock locking. Before * accessing a logical eraseblock it is locked for reading or writing. The * per-logical eraseblock locking is implemented by means of the lock tree. The * lock tree is an RB-tree which refers all the currently locked logical * eraseblocks. The lock tree elements are &struct ubi_ltree_entry objects. * They are indexed by (@vol_id, @lnum) pairs. * * EBA also maintains the global sequence counter which is incremented each * time a logical eraseblock is mapped to a physical eraseblock and it is * stored in the volume identifier header. This means that each VID header has * a unique sequence number. The sequence number is only increased an we assume * 64 bits is enough to never overflow. */ #include <linux/slab.h> #include <linux/crc32.h> #include <linux/err.h> #include "ubi.h" /** * struct ubi_eba_entry - structure encoding a single LEB -> PEB association * @pnum: the physical eraseblock number attached to the LEB * * This structure is encoding a LEB -> PEB association. Note that the LEB * number is not stored here, because it is the index used to access the * entries table. */ struct ubi_eba_entry { int pnum; }; /** * struct ubi_eba_table - LEB -> PEB association information * @entries: the LEB to PEB mapping (one entry per LEB). * * This structure is private to the EBA logic and should be kept here. * It is encoding the LEB to PEB association table, and is subject to * changes. */ struct ubi_eba_table { struct ubi_eba_entry *entries; }; /** * ubi_next_sqnum - get next sequence number. * @ubi: UBI device description object * * This function returns next sequence number to use, which is just the current * global sequence counter value. It also increases the global sequence * counter. */ unsigned long long ubi_next_sqnum(struct ubi_device *ubi) { unsigned long long sqnum; spin_lock(&ubi->ltree_lock); sqnum = ubi->global_sqnum++; spin_unlock(&ubi->ltree_lock); return sqnum; } /** * ubi_get_compat - get compatibility flags of a volume. * @ubi: UBI device description object * @vol_id: volume ID * * This function returns compatibility flags for an internal volume. User * volumes have no compatibility flags, so %0 is returned. */ static int ubi_get_compat(const struct ubi_device *ubi, int vol_id) { if (vol_id == UBI_LAYOUT_VOLUME_ID) return UBI_LAYOUT_VOLUME_COMPAT; return 0; } /** * ubi_eba_get_ldesc - get information about a LEB * @vol: volume description object * @lnum: logical eraseblock number * @ldesc: the LEB descriptor to fill * * Used to query information about a specific LEB. * It is currently only returning the physical position of the LEB, but will be * extended to provide more information. */ void ubi_eba_get_ldesc(struct ubi_volume *vol, int lnum, struct ubi_eba_leb_desc *ldesc) { ldesc->lnum = lnum; ldesc->pnum = vol->eba_tbl->entries[lnum].pnum; } /** * ubi_eba_create_table - allocate a new EBA table and initialize it with all * LEBs unmapped * @vol: volume containing the EBA table to copy * @nentries: number of entries in the table * * Allocate a new EBA table and initialize it with all LEBs unmapped. * Returns a valid pointer if it succeed, an ERR_PTR() otherwise. */ struct ubi_eba_table *ubi_eba_create_table(struct ubi_volume *vol, int nentries) { struct ubi_eba_table *tbl; int err = -ENOMEM; int i; tbl = kzalloc(sizeof(*tbl), GFP_KERNEL); if (!tbl) return ERR_PTR(-ENOMEM); tbl->entries = kmalloc_array(nentries, sizeof(*tbl->entries), GFP_KERNEL); if (!tbl->entries) goto err; for (i = 0; i < nentries; i++) tbl->entries[i].pnum = UBI_LEB_UNMAPPED; return tbl; err: kfree(tbl); return ERR_PTR(err); } /** * ubi_eba_destroy_table - destroy an EBA table * @tbl: the table to destroy * * Destroy an EBA table. */ void ubi_eba_destroy_table(struct ubi_eba_table *tbl) { if (!tbl) return; kfree(tbl->entries); kfree(tbl); } /** * ubi_eba_copy_table - copy the EBA table attached to vol into another table * @vol: volume containing the EBA table to copy * @dst: destination * @nentries: number of entries to copy * * Copy the EBA table stored in vol into the one pointed by dst. */ void ubi_eba_copy_table(struct ubi_volume *vol, struct ubi_eba_table *dst, int nentries) { struct ubi_eba_table *src; int i; ubi_assert(dst && vol && vol->eba_tbl); src = vol->eba_tbl; for (i = 0; i < nentries; i++) dst->entries[i].pnum = src->entries[i].pnum; } /** * ubi_eba_replace_table - assign a new EBA table to a volume * @vol: volume containing the EBA table to copy * @tbl: new EBA table * * Assign a new EBA table to the volume and release the old one. */ void ubi_eba_replace_table(struct ubi_volume *vol, struct ubi_eba_table *tbl) { ubi_eba_destroy_table(vol->eba_tbl); vol->eba_tbl = tbl; } /** * ltree_lookup - look up the lock tree. * @ubi: UBI device description object * @vol_id: volume ID * @lnum: logical eraseblock number * * This function returns a pointer to the corresponding &struct ubi_ltree_entry * object if the logical eraseblock is locked and %NULL if it is not. * @ubi->ltree_lock has to be locked. */ static struct ubi_ltree_entry *ltree_lookup(struct ubi_device *ubi, int vol_id, int lnum) { struct rb_node *p; p = ubi->ltree.rb_node; while (p) { struct ubi_ltree_entry *le; le = rb_entry(p, struct ubi_ltree_entry, rb); if (vol_id < le->vol_id) p = p->rb_left; else if (vol_id > le->vol_id) p = p->rb_right; else { if (lnum < le->lnum) p = p->rb_left; else if (lnum > le->lnum) p = p->rb_right; else return le; } } return NULL; } /** * ltree_add_entry - add new entry to the lock tree. * @ubi: UBI device description object * @vol_id: volume ID * @lnum: logical eraseblock number * * This function adds new entry for logical eraseblock (@vol_id, @lnum) to the * lock tree. If such entry is already there, its usage counter is increased. * Returns pointer to the lock tree entry or %-ENOMEM if memory allocation * failed. */ static struct ubi_ltree_entry *ltree_add_entry(struct ubi_device *ubi, int vol_id, int lnum) { struct ubi_ltree_entry *le, *le1, *le_free; le = kmalloc(sizeof(struct ubi_ltree_entry), GFP_NOFS); if (!le) return ERR_PTR(-ENOMEM); le->users = 0; init_rwsem(&le->mutex); le->vol_id = vol_id; le->lnum = lnum; spin_lock(&ubi->ltree_lock); le1 = ltree_lookup(ubi, vol_id, lnum); if (le1) { /* * This logical eraseblock is already locked. The newly * allocated lock entry is not needed. */ le_free = le; le = le1; } else { struct rb_node **p, *parent = NULL; /* * No lock entry, add the newly allocated one to the * @ubi->ltree RB-tree. */ le_free = NULL; p = &ubi->ltree.rb_node; while (*p) { parent = *p; le1 = rb_entry(parent, struct ubi_ltree_entry, rb); if (vol_id < le1->vol_id) p = &(*p)->rb_left; else if (vol_id > le1->vol_id) p = &(*p)->rb_right; else { ubi_assert(lnum != le1->lnum); if (lnum < le1->lnum) p = &(*p)->rb_left; else p = &(*p)->rb_right; } } rb_link_node(&le->rb, parent, p); rb_insert_color(&le->rb, &ubi->ltree); } le->users += 1; spin_unlock(&ubi->ltree_lock); kfree(le_free); return le; } /** * leb_read_lock - lock logical eraseblock for reading. * @ubi: UBI device description object * @vol_id: volume ID * @lnum: logical eraseblock number * * This function locks a logical eraseblock for reading. Returns zero in case * of success and a negative error code in case of failure. */ static int leb_read_lock(struct ubi_device *ubi, int vol_id, int lnum) { struct ubi_ltree_entry *le; le = ltree_add_entry(ubi, vol_id, lnum); if (IS_ERR(le)) return PTR_ERR(le); down_read(&le->mutex); return 0; } /** * leb_read_unlock - unlock logical eraseblock. * @ubi: UBI device description object * @vol_id: volume ID * @lnum: logical eraseblock number */ static void leb_read_unlock(struct ubi_device *ubi, int vol_id, int lnum) { struct ubi_ltree_entry *le; spin_lock(&ubi->ltree_lock); le = ltree_lookup(ubi, vol_id, lnum); le->users -= 1; ubi_assert(le->users >= 0); up_read(&le->mutex); if (le->users == 0) { rb_erase(&le->rb, &ubi->ltree); kfree(le); } spin_unlock(&ubi->ltree_lock); } /** * leb_write_lock - lock logical eraseblock for writing. * @ubi: UBI device description object * @vol_id: volume ID * @lnum: logical eraseblock number * * This function locks a logical eraseblock for writing. Returns zero in case * of success and a negative error code in case of failure. */ static int leb_write_lock(struct ubi_device *ubi, int vol_id, int lnum) { struct ubi_ltree_entry *le; le = ltree_add_entry(ubi, vol_id, lnum); if (IS_ERR(le)) return PTR_ERR(le); down_write(&le->mutex); return 0; } /** * leb_write_trylock - try to lock logical eraseblock for writing. * @ubi: UBI device description object * @vol_id: volume ID * @lnum: logical eraseblock number * * This function locks a logical eraseblock for writing if there is no * contention and does nothing if there is contention. Returns %0 in case of * success, %1 in case of contention, and a negative error code in case of * failure. */ static int leb_write_trylock(struct ubi_device *ubi, int vol_id, int lnum) { struct ubi_ltree_entry *le; le = ltree_add_entry(ubi, vol_id, lnum); if (IS_ERR(le)) return PTR_ERR(le); if (down_write_trylock(&le->mutex)) return 0; /* Contention, cancel */ spin_lock(&ubi->ltree_lock); le->users -= 1; ubi_assert(le->users >= 0); if (le->users == 0) { rb_erase(&le->rb, &ubi->ltree); kfree(le); } spin_unlock(&ubi->ltree_lock); return 1; } /** * leb_write_unlock - unlock logical eraseblock. * @ubi: UBI device description object * @vol_id: volume ID * @lnum: logical eraseblock number */ static void leb_write_unlock(struct ubi_device *ubi, int vol_id, int lnum) { struct ubi_ltree_entry *le; spin_lock(&ubi->ltree_lock); le = ltree_lookup(ubi, vol_id, lnum); le->users -= 1; ubi_assert(le->users >= 0); up_write(&le->mutex); if (le->users == 0) { rb_erase(&le->rb, &ubi->ltree); kfree(le); } spin_unlock(&ubi->ltree_lock); } /** * ubi_eba_is_mapped - check if a LEB is mapped. * @vol: volume description object * @lnum: logical eraseblock number * * This function returns true if the LEB is mapped, false otherwise. */ bool ubi_eba_is_mapped(struct ubi_volume *vol, int lnum) { return vol->eba_tbl->entries[lnum].pnum >= 0; } /** * ubi_eba_unmap_leb - un-map logical eraseblock. * @ubi: UBI device description object * @vol: volume description object * @lnum: logical eraseblock number * * This function un-maps logical eraseblock @lnum and schedules corresponding * physical eraseblock for erasure. Returns zero in case of success and a * negative error code in case of failure. */ int ubi_eba_unmap_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum) { int err, pnum, vol_id = vol->vol_id; if (ubi->ro_mode) return -EROFS; err = leb_write_lock(ubi, vol_id, lnum); if (err) return err; pnum = vol->eba_tbl->entries[lnum].pnum; if (pnum < 0) /* This logical eraseblock is already unmapped */ goto out_unlock; dbg_eba("erase LEB %d:%d, PEB %d", vol_id, lnum, pnum); down_read(&ubi->fm_eba_sem); vol->eba_tbl->entries[lnum].pnum = UBI_LEB_UNMAPPED; up_read(&ubi->fm_eba_sem); err = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 0); out_unlock: leb_write_unlock(ubi, vol_id, lnum); return err; } #ifdef CONFIG_MTD_UBI_FASTMAP /** * check_mapping - check and fixup a mapping * @ubi: UBI device description object * @vol: volume description object * @lnum: logical eraseblock number * @pnum: physical eraseblock number * * Checks whether a given mapping is valid. Fastmap cannot track LEB unmap * operations, if such an operation is interrupted the mapping still looks * good, but upon first read an ECC is reported to the upper layer. * Normaly during the full-scan at attach time this is fixed, for Fastmap * we have to deal with it while reading. * If the PEB behind a LEB shows this symthom we change the mapping to * %UBI_LEB_UNMAPPED and schedule the PEB for erasure. * * Returns 0 on success, negative error code in case of failure. */ static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, int *pnum) { int err; struct ubi_vid_io_buf *vidb; struct ubi_vid_hdr *vid_hdr; if (!ubi->fast_attach) return 0; if (!vol->checkmap || test_bit(lnum, vol->checkmap)) return 0; vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) return -ENOMEM; err = ubi_io_read_vid_hdr(ubi, *pnum, vidb, 0); if (err > 0 && err != UBI_IO_BITFLIPS) { int torture = 0; switch (err) { case UBI_IO_FF: case UBI_IO_FF_BITFLIPS: case UBI_IO_BAD_HDR: case UBI_IO_BAD_HDR_EBADMSG: break; default: ubi_assert(0); } if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_FF_BITFLIPS) torture = 1; down_read(&ubi->fm_eba_sem); vol->eba_tbl->entries[lnum].pnum = UBI_LEB_UNMAPPED; up_read(&ubi->fm_eba_sem); ubi_wl_put_peb(ubi, vol->vol_id, lnum, *pnum, torture); *pnum = UBI_LEB_UNMAPPED; } else if (err < 0) { ubi_err(ubi, "unable to read VID header back from PEB %i: %i", *pnum, err); goto out_free; } else { int found_vol_id, found_lnum; ubi_assert(err == 0 || err == UBI_IO_BITFLIPS); vid_hdr = ubi_get_vid_hdr(vidb); found_vol_id = be32_to_cpu(vid_hdr->vol_id); found_lnum = be32_to_cpu(vid_hdr->lnum); if (found_lnum != lnum || found_vol_id != vol->vol_id) { ubi_err(ubi, "EBA mismatch! PEB %i is LEB %i:%i instead of LEB %i:%i", *pnum, found_vol_id, found_lnum, vol->vol_id, lnum); ubi_ro_mode(ubi); err = -EINVAL; goto out_free; } } set_bit(lnum, vol->checkmap); err = 0; out_free: ubi_free_vid_buf(vidb); return err; } #else static int check_mapping(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, int *pnum) { return 0; } #endif /** * ubi_eba_read_leb - read data. * @ubi: UBI device description object * @vol: volume description object * @lnum: logical eraseblock number * @buf: buffer to store the read data * @offset: offset from where to read * @len: how many bytes to read * @check: data CRC check flag * * If the logical eraseblock @lnum is unmapped, @buf is filled with 0xFF * bytes. The @check flag only makes sense for static volumes and forces * eraseblock data CRC checking. * * In case of success this function returns zero. In case of a static volume, * if data CRC mismatches - %-EBADMSG is returned. %-EBADMSG may also be * returned for any volume type if an ECC error was detected by the MTD device * driver. Other negative error cored may be returned in case of other errors. */ int ubi_eba_read_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, void *buf, int offset, int len, int check) { int err, pnum, scrub = 0, vol_id = vol->vol_id; struct ubi_vid_io_buf *vidb; struct ubi_vid_hdr *vid_hdr; uint32_t crc; err = leb_read_lock(ubi, vol_id, lnum); if (err) return err; pnum = vol->eba_tbl->entries[lnum].pnum; if (pnum >= 0) { err = check_mapping(ubi, vol, lnum, &pnum); if (err < 0) goto out_unlock; } if (pnum == UBI_LEB_UNMAPPED) { /* * The logical eraseblock is not mapped, fill the whole buffer * with 0xFF bytes. The exception is static volumes for which * it is an error to read unmapped logical eraseblocks. */ dbg_eba("read %d bytes from offset %d of LEB %d:%d (unmapped)", len, offset, vol_id, lnum); leb_read_unlock(ubi, vol_id, lnum); ubi_assert(vol->vol_type != UBI_STATIC_VOLUME); memset(buf, 0xFF, len); return 0; } dbg_eba("read %d bytes from offset %d of LEB %d:%d, PEB %d", len, offset, vol_id, lnum, pnum); if (vol->vol_type == UBI_DYNAMIC_VOLUME) check = 0; retry: if (check) { vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) { err = -ENOMEM; goto out_unlock; } vid_hdr = ubi_get_vid_hdr(vidb); err = ubi_io_read_vid_hdr(ubi, pnum, vidb, 1); if (err && err != UBI_IO_BITFLIPS) { if (err > 0) { /* * The header is either absent or corrupted. * The former case means there is a bug - * switch to read-only mode just in case. * The latter case means a real corruption - we * may try to recover data. FIXME: but this is * not implemented. */ if (err == UBI_IO_BAD_HDR_EBADMSG || err == UBI_IO_BAD_HDR) { ubi_warn(ubi, "corrupted VID header at PEB %d, LEB %d:%d", pnum, vol_id, lnum); err = -EBADMSG; } else { /* * Ending up here in the non-Fastmap case * is a clear bug as the VID header had to * be present at scan time to have it referenced. * With fastmap the story is more complicated. * Fastmap has the mapping info without the need * of a full scan. So the LEB could have been * unmapped, Fastmap cannot know this and keeps * the LEB referenced. * This is valid and works as the layer above UBI * has to do bookkeeping about used/referenced * LEBs in any case. */ if (ubi->fast_attach) { err = -EBADMSG; } else { err = -EINVAL; ubi_ro_mode(ubi); } } } goto out_free; } else if (err == UBI_IO_BITFLIPS) scrub = 1; ubi_assert(lnum < be32_to_cpu(vid_hdr->used_ebs)); ubi_assert(len == be32_to_cpu(vid_hdr->data_size)); crc = be32_to_cpu(vid_hdr->data_crc); ubi_free_vid_buf(vidb); } err = ubi_io_read_data(ubi, buf, pnum, offset, len); if (err) { if (err == UBI_IO_BITFLIPS) scrub = 1; else if (mtd_is_eccerr(err)) { if (vol->vol_type == UBI_DYNAMIC_VOLUME) goto out_unlock; scrub = 1; if (!check) { ubi_msg(ubi, "force data checking"); check = 1; goto retry; } } else goto out_unlock; } if (check) { uint32_t crc1 = crc32(UBI_CRC32_INIT, buf, len); if (crc1 != crc) { ubi_warn(ubi, "CRC error: calculated %#08x, must be %#08x", crc1, crc); err = -EBADMSG; goto out_unlock; } } if (scrub) err = ubi_wl_scrub_peb(ubi, pnum); leb_read_unlock(ubi, vol_id, lnum); return err; out_free: ubi_free_vid_buf(vidb); out_unlock: leb_read_unlock(ubi, vol_id, lnum); return err; } /** * ubi_eba_read_leb_sg - read data into a scatter gather list. * @ubi: UBI device description object * @vol: volume description object * @lnum: logical eraseblock number * @sgl: UBI scatter gather list to store the read data * @offset: offset from where to read * @len: how many bytes to read * @check: data CRC check flag * * This function works exactly like ubi_eba_read_leb(). But instead of * storing the read data into a buffer it writes to an UBI scatter gather * list. */ int ubi_eba_read_leb_sg(struct ubi_device *ubi, struct ubi_volume *vol, struct ubi_sgl *sgl, int lnum, int offset, int len, int check) { int to_read; int ret; struct scatterlist *sg; for (;;) { ubi_assert(sgl->list_pos < UBI_MAX_SG_COUNT); sg = &sgl->sg[sgl->list_pos]; if (len < sg->length - sgl->page_pos) to_read = len; else to_read = sg->length - sgl->page_pos; ret = ubi_eba_read_leb(ubi, vol, lnum, sg_virt(sg) + sgl->page_pos, offset, to_read, check); if (ret < 0) return ret; offset += to_read; len -= to_read; if (!len) { sgl->page_pos += to_read; if (sgl->page_pos == sg->length) { sgl->list_pos++; sgl->page_pos = 0; } break; } sgl->list_pos++; sgl->page_pos = 0; } return ret; } /** * try_recover_peb - try to recover from write failure. * @vol: volume description object * @pnum: the physical eraseblock to recover * @lnum: logical eraseblock number * @buf: data which was not written because of the write failure * @offset: offset of the failed write * @len: how many bytes should have been written * @vidb: VID buffer * @retry: whether the caller should retry in case of failure * * This function is called in case of a write failure and moves all good data * from the potentially bad physical eraseblock to a good physical eraseblock. * This function also writes the data which was not written due to the failure. * Returns 0 in case of success, and a negative error code in case of failure. * In case of failure, the %retry parameter is set to false if this is a fatal * error (retrying won't help), and true otherwise. */ static int try_recover_peb(struct ubi_volume *vol, int pnum, int lnum, const void *buf, int offset, int len, struct ubi_vid_io_buf *vidb, bool *retry) { struct ubi_device *ubi = vol->ubi; struct ubi_vid_hdr *vid_hdr; int new_pnum, err, vol_id = vol->vol_id, data_size; uint32_t crc; *retry = false; new_pnum = ubi_wl_get_peb(ubi); if (new_pnum < 0) { err = new_pnum; goto out_put; } ubi_msg(ubi, "recover PEB %d, move data to PEB %d", pnum, new_pnum); err = ubi_io_read_vid_hdr(ubi, pnum, vidb, 1); if (err && err != UBI_IO_BITFLIPS) { if (err > 0) err = -EIO; goto out_put; } vid_hdr = ubi_get_vid_hdr(vidb); ubi_assert(vid_hdr->vol_type == UBI_VID_DYNAMIC); mutex_lock(&ubi->buf_mutex); memset(ubi->peb_buf + offset, 0xFF, len); /* Read everything before the area where the write failure happened */ if (offset > 0) { err = ubi_io_read_data(ubi, ubi->peb_buf, pnum, 0, offset); if (err && err != UBI_IO_BITFLIPS) goto out_unlock; } *retry = true; memcpy(ubi->peb_buf + offset, buf, len); data_size = offset + len; crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); vid_hdr->copy_flag = 1; vid_hdr->data_size = cpu_to_be32(data_size); vid_hdr->data_crc = cpu_to_be32(crc); err = ubi_io_write_vid_hdr(ubi, new_pnum, vidb); if (err) goto out_unlock; err = ubi_io_write_data(ubi, ubi->peb_buf, new_pnum, 0, data_size); out_unlock: mutex_unlock(&ubi->buf_mutex); if (!err) vol->eba_tbl->entries[lnum].pnum = new_pnum; out_put: up_read(&ubi->fm_eba_sem); if (!err) { ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); ubi_msg(ubi, "data was successfully recovered"); } else if (new_pnum >= 0) { /* * Bad luck? This physical eraseblock is bad too? Crud. Let's * try to get another one. */ ubi_wl_put_peb(ubi, vol_id, lnum, new_pnum, 1); ubi_warn(ubi, "failed to write to PEB %d", new_pnum); } return err; } /** * recover_peb - recover from write failure. * @ubi: UBI device description object * @pnum: the physical eraseblock to recover * @vol_id: volume ID * @lnum: logical eraseblock number * @buf: data which was not written because of the write failure * @offset: offset of the failed write * @len: how many bytes should have been written * * This function is called in case of a write failure and moves all good data * from the potentially bad physical eraseblock to a good physical eraseblock. * This function also writes the data which was not written due to the failure. * Returns 0 in case of success, and a negative error code in case of failure. * This function tries %UBI_IO_RETRIES before giving up. */ static int recover_peb(struct ubi_device *ubi, int pnum, int vol_id, int lnum, const void *buf, int offset, int len) { int err, idx = vol_id2idx(ubi, vol_id), tries; struct ubi_volume *vol = ubi->volumes[idx]; struct ubi_vid_io_buf *vidb; vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) return -ENOMEM; for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { bool retry; err = try_recover_peb(vol, pnum, lnum, buf, offset, len, vidb, &retry); if (!err || !retry) break; ubi_msg(ubi, "try again"); } ubi_free_vid_buf(vidb); return err; } /** * try_write_vid_and_data - try to write VID header and data to a new PEB. * @vol: volume description object * @lnum: logical eraseblock number * @vidb: the VID buffer to write * @buf: buffer containing the data * @offset: where to start writing data * @len: how many bytes should be written * * This function tries to write VID header and data belonging to logical * eraseblock @lnum of volume @vol to a new physical eraseblock. Returns zero * in case of success and a negative error code in case of failure. * In case of error, it is possible that something was still written to the * flash media, but may be some garbage. */ static int try_write_vid_and_data(struct ubi_volume *vol, int lnum, struct ubi_vid_io_buf *vidb, const void *buf, int offset, int len) { struct ubi_device *ubi = vol->ubi; int pnum, opnum, err, err2, vol_id = vol->vol_id; pnum = ubi_wl_get_peb(ubi); if (pnum < 0) { err = pnum; goto out_put; } opnum = vol->eba_tbl->entries[lnum].pnum; dbg_eba("write VID hdr and %d bytes at offset %d of LEB %d:%d, PEB %d", len, offset, vol_id, lnum, pnum); err = ubi_io_write_vid_hdr(ubi, pnum, vidb); if (err) { ubi_warn(ubi, "failed to write VID header to LEB %d:%d, PEB %d", vol_id, lnum, pnum); goto out_put; } if (len) { err = ubi_io_write_data(ubi, buf, pnum, offset, len); if (err) { ubi_warn(ubi, "failed to write %d bytes at offset %d of LEB %d:%d, PEB %d", len, offset, vol_id, lnum, pnum); goto out_put; } } vol->eba_tbl->entries[lnum].pnum = pnum; out_put: up_read(&ubi->fm_eba_sem); if (err && pnum >= 0) { err2 = ubi_wl_put_peb(ubi, vol_id, lnum, pnum, 1); if (err2) { ubi_warn(ubi, "failed to return physical eraseblock %d, error %d", pnum, err2); } } else if (!err && opnum >= 0) { err2 = ubi_wl_put_peb(ubi, vol_id, lnum, opnum, 0); if (err2) { ubi_warn(ubi, "failed to return physical eraseblock %d, error %d", opnum, err2); } } return err; } /** * ubi_eba_write_leb - write data to dynamic volume. * @ubi: UBI device description object * @vol: volume description object * @lnum: logical eraseblock number * @buf: the data to write * @offset: offset within the logical eraseblock where to write * @len: how many bytes to write * * This function writes data to logical eraseblock @lnum of a dynamic volume * @vol. Returns zero in case of success and a negative error code in case * of failure. In case of error, it is possible that something was still * written to the flash media, but may be some garbage. * This function retries %UBI_IO_RETRIES times before giving up. */ int ubi_eba_write_leb(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, const void *buf, int offset, int len) { int err, pnum, tries, vol_id = vol->vol_id; struct ubi_vid_io_buf *vidb; struct ubi_vid_hdr *vid_hdr; if (ubi->ro_mode) return -EROFS; err = leb_write_lock(ubi, vol_id, lnum); if (err) return err; pnum = vol->eba_tbl->entries[lnum].pnum; if (pnum >= 0) { err = check_mapping(ubi, vol, lnum, &pnum); if (err < 0) goto out; } if (pnum >= 0) { dbg_eba("write %d bytes at offset %d of LEB %d:%d, PEB %d", len, offset, vol_id, lnum, pnum); err = ubi_io_write_data(ubi, buf, pnum, offset, len); if (err) { ubi_warn(ubi, "failed to write data to PEB %d", pnum); if (err == -EIO && ubi->bad_allowed) err = recover_peb(ubi, pnum, vol_id, lnum, buf, offset, len); } goto out; } /* * The logical eraseblock is not mapped. We have to get a free physical * eraseblock and write the volume identifier header there first. */ vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) { leb_write_unlock(ubi, vol_id, lnum); return -ENOMEM; } vid_hdr = ubi_get_vid_hdr(vidb); vid_hdr->vol_type = UBI_VID_DYNAMIC; vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); vid_hdr->vol_id = cpu_to_be32(vol_id); vid_hdr->lnum = cpu_to_be32(lnum); vid_hdr->compat = ubi_get_compat(ubi, vol_id); vid_hdr->data_pad = cpu_to_be32(vol->data_pad); for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { err = try_write_vid_and_data(vol, lnum, vidb, buf, offset, len); if (err != -EIO || !ubi->bad_allowed) break; /* * Fortunately, this is the first write operation to this * physical eraseblock, so just put it and request a new one. * We assume that if this physical eraseblock went bad, the * erase code will handle that. */ vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); ubi_msg(ubi, "try another PEB"); } ubi_free_vid_buf(vidb); out: if (err) ubi_ro_mode(ubi); leb_write_unlock(ubi, vol_id, lnum); return err; } /** * ubi_eba_write_leb_st - write data to static volume. * @ubi: UBI device description object * @vol: volume description object * @lnum: logical eraseblock number * @buf: data to write * @len: how many bytes to write * @used_ebs: how many logical eraseblocks will this volume contain * * This function writes data to logical eraseblock @lnum of static volume * @vol. The @used_ebs argument should contain total number of logical * eraseblock in this static volume. * * When writing to the last logical eraseblock, the @len argument doesn't have * to be aligned to the minimal I/O unit size. Instead, it has to be equivalent * to the real data size, although the @buf buffer has to contain the * alignment. In all other cases, @len has to be aligned. * * It is prohibited to write more than once to logical eraseblocks of static * volumes. This function returns zero in case of success and a negative error * code in case of failure. */ int ubi_eba_write_leb_st(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, const void *buf, int len, int used_ebs) { int err, tries, data_size = len, vol_id = vol->vol_id; struct ubi_vid_io_buf *vidb; struct ubi_vid_hdr *vid_hdr; uint32_t crc; if (ubi->ro_mode) return -EROFS; if (lnum == used_ebs - 1) /* If this is the last LEB @len may be unaligned */ len = ALIGN(data_size, ubi->min_io_size); else ubi_assert(!(len & (ubi->min_io_size - 1))); vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) return -ENOMEM; vid_hdr = ubi_get_vid_hdr(vidb); err = leb_write_lock(ubi, vol_id, lnum); if (err) goto out; vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); vid_hdr->vol_id = cpu_to_be32(vol_id); vid_hdr->lnum = cpu_to_be32(lnum); vid_hdr->compat = ubi_get_compat(ubi, vol_id); vid_hdr->data_pad = cpu_to_be32(vol->data_pad); crc = crc32(UBI_CRC32_INIT, buf, data_size); vid_hdr->vol_type = UBI_VID_STATIC; vid_hdr->data_size = cpu_to_be32(data_size); vid_hdr->used_ebs = cpu_to_be32(used_ebs); vid_hdr->data_crc = cpu_to_be32(crc); ubi_assert(vol->eba_tbl->entries[lnum].pnum < 0); for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { err = try_write_vid_and_data(vol, lnum, vidb, buf, 0, len); if (err != -EIO || !ubi->bad_allowed) break; vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); ubi_msg(ubi, "try another PEB"); } if (err) ubi_ro_mode(ubi); leb_write_unlock(ubi, vol_id, lnum); out: ubi_free_vid_buf(vidb); return err; } /* * ubi_eba_atomic_leb_change - change logical eraseblock atomically. * @ubi: UBI device description object * @vol: volume description object * @lnum: logical eraseblock number * @buf: data to write * @len: how many bytes to write * * This function changes the contents of a logical eraseblock atomically. @buf * has to contain new logical eraseblock data, and @len - the length of the * data, which has to be aligned. This function guarantees that in case of an * unclean reboot the old contents is preserved. Returns zero in case of * success and a negative error code in case of failure. * * UBI reserves one LEB for the "atomic LEB change" operation, so only one * LEB change may be done at a time. This is ensured by @ubi->alc_mutex. */ int ubi_eba_atomic_leb_change(struct ubi_device *ubi, struct ubi_volume *vol, int lnum, const void *buf, int len) { int err, tries, vol_id = vol->vol_id; struct ubi_vid_io_buf *vidb; struct ubi_vid_hdr *vid_hdr; uint32_t crc; if (ubi->ro_mode) return -EROFS; if (len == 0) { /* * Special case when data length is zero. In this case the LEB * has to be unmapped and mapped somewhere else. */ err = ubi_eba_unmap_leb(ubi, vol, lnum); if (err) return err; return ubi_eba_write_leb(ubi, vol, lnum, NULL, 0, 0); } vidb = ubi_alloc_vid_buf(ubi, GFP_NOFS); if (!vidb) return -ENOMEM; vid_hdr = ubi_get_vid_hdr(vidb); mutex_lock(&ubi->alc_mutex); err = leb_write_lock(ubi, vol_id, lnum); if (err) goto out_mutex; vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); vid_hdr->vol_id = cpu_to_be32(vol_id); vid_hdr->lnum = cpu_to_be32(lnum); vid_hdr->compat = ubi_get_compat(ubi, vol_id); vid_hdr->data_pad = cpu_to_be32(vol->data_pad); crc = crc32(UBI_CRC32_INIT, buf, len); vid_hdr->vol_type = UBI_VID_DYNAMIC; vid_hdr->data_size = cpu_to_be32(len); vid_hdr->copy_flag = 1; vid_hdr->data_crc = cpu_to_be32(crc); dbg_eba("change LEB %d:%d", vol_id, lnum); for (tries = 0; tries <= UBI_IO_RETRIES; tries++) { err = try_write_vid_and_data(vol, lnum, vidb, buf, 0, len); if (err != -EIO || !ubi->bad_allowed) break; vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); ubi_msg(ubi, "try another PEB"); } /* * This flash device does not admit of bad eraseblocks or * something nasty and unexpected happened. Switch to read-only * mode just in case. */ if (err) ubi_ro_mode(ubi); leb_write_unlock(ubi, vol_id, lnum); out_mutex: mutex_unlock(&ubi->alc_mutex); ubi_free_vid_buf(vidb); return err; } /** * is_error_sane - check whether a read error is sane. * @err: code of the error happened during reading * * This is a helper function for 'ubi_eba_copy_leb()' which is called when we * cannot read data from the target PEB (an error @err happened). If the error * code is sane, then we treat this error as non-fatal. Otherwise the error is * fatal and UBI will be switched to R/O mode later. * * The idea is that we try not to switch to R/O mode if the read error is * something which suggests there was a real read problem. E.g., %-EIO. Or a * memory allocation failed (-%ENOMEM). Otherwise, it is safer to switch to R/O * mode, simply because we do not know what happened at the MTD level, and we * cannot handle this. E.g., the underlying driver may have become crazy, and * it is safer to switch to R/O mode to preserve the data. * * And bear in mind, this is about reading from the target PEB, i.e. the PEB * which we have just written. */ static int is_error_sane(int err) { if (err == -EIO || err == -ENOMEM || err == UBI_IO_BAD_HDR || err == UBI_IO_BAD_HDR_EBADMSG || err == -ETIMEDOUT) return 0; return 1; } /** * ubi_eba_copy_leb - copy logical eraseblock. * @ubi: UBI device description object * @from: physical eraseblock number from where to copy * @to: physical eraseblock number where to copy * @vidb: data structure from where the VID header is derived * * This function copies logical eraseblock from physical eraseblock @from to * physical eraseblock @to. The @vid_hdr buffer may be changed by this * function. Returns: * o %0 in case of success; * o %MOVE_CANCEL_RACE, %MOVE_TARGET_WR_ERR, %MOVE_TARGET_BITFLIPS, etc; * o a negative error code in case of failure. */ int ubi_eba_copy_leb(struct ubi_device *ubi, int from, int to, struct ubi_vid_io_buf *vidb) { int err, vol_id, lnum, data_size, aldata_size, idx; struct ubi_vid_hdr *vid_hdr = ubi_get_vid_hdr(vidb); struct ubi_volume *vol; uint32_t crc; ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem)); vol_id = be32_to_cpu(vid_hdr->vol_id); lnum = be32_to_cpu(vid_hdr->lnum); dbg_wl("copy LEB %d:%d, PEB %d to PEB %d", vol_id, lnum, from, to); if (vid_hdr->vol_type == UBI_VID_STATIC) { data_size = be32_to_cpu(vid_hdr->data_size); aldata_size = ALIGN(data_size, ubi->min_io_size); } else data_size = aldata_size = ubi->leb_size - be32_to_cpu(vid_hdr->data_pad); idx = vol_id2idx(ubi, vol_id); spin_lock(&ubi->volumes_lock); /* * Note, we may race with volume deletion, which means that the volume * this logical eraseblock belongs to might be being deleted. Since the * volume deletion un-maps all the volume's logical eraseblocks, it will * be locked in 'ubi_wl_put_peb()' and wait for the WL worker to finish. */ vol = ubi->volumes[idx]; spin_unlock(&ubi->volumes_lock); if (!vol) { /* No need to do further work, cancel */ dbg_wl("volume %d is being removed, cancel", vol_id); return MOVE_CANCEL_RACE; } /* * We do not want anybody to write to this logical eraseblock while we * are moving it, so lock it. * * Note, we are using non-waiting locking here, because we cannot sleep * on the LEB, since it may cause deadlocks. Indeed, imagine a task is * unmapping the LEB which is mapped to the PEB we are going to move * (@from). This task locks the LEB and goes sleep in the * 'ubi_wl_put_peb()' function on the @ubi->move_mutex. In turn, we are * holding @ubi->move_mutex and go sleep on the LEB lock. So, if the * LEB is already locked, we just do not move it and return * %MOVE_RETRY. Note, we do not return %MOVE_CANCEL_RACE here because * we do not know the reasons of the contention - it may be just a * normal I/O on this LEB, so we want to re-try. */ err = leb_write_trylock(ubi, vol_id, lnum); if (err) { dbg_wl("contention on LEB %d:%d, cancel", vol_id, lnum); return MOVE_RETRY; } /* * The LEB might have been put meanwhile, and the task which put it is * probably waiting on @ubi->move_mutex. No need to continue the work, * cancel it. */ if (vol->eba_tbl->entries[lnum].pnum != from) { dbg_wl("LEB %d:%d is no longer mapped to PEB %d, mapped to PEB %d, cancel", vol_id, lnum, from, vol->eba_tbl->entries[lnum].pnum); err = MOVE_CANCEL_RACE; goto out_unlock_leb; } /* * OK, now the LEB is locked and we can safely start moving it. Since * this function utilizes the @ubi->peb_buf buffer which is shared * with some other functions - we lock the buffer by taking the * @ubi->buf_mutex. */ mutex_lock(&ubi->buf_mutex); dbg_wl("read %d bytes of data", aldata_size); err = ubi_io_read_data(ubi, ubi->peb_buf, from, 0, aldata_size); if (err && err != UBI_IO_BITFLIPS) { ubi_warn(ubi, "error %d while reading data from PEB %d", err, from); err = MOVE_SOURCE_RD_ERR; goto out_unlock_buf; } /* * Now we have got to calculate how much data we have to copy. In * case of a static volume it is fairly easy - the VID header contains * the data size. In case of a dynamic volume it is more difficult - we * have to read the contents, cut 0xFF bytes from the end and copy only * the first part. We must do this to avoid writing 0xFF bytes as it * may have some side-effects. And not only this. It is important not * to include those 0xFFs to CRC because later the they may be filled * by data. */ if (vid_hdr->vol_type == UBI_VID_DYNAMIC) aldata_size = data_size = ubi_calc_data_len(ubi, ubi->peb_buf, data_size); cond_resched(); crc = crc32(UBI_CRC32_INIT, ubi->peb_buf, data_size); cond_resched(); /* * It may turn out to be that the whole @from physical eraseblock * contains only 0xFF bytes. Then we have to only write the VID header * and do not write any data. This also means we should not set * @vid_hdr->copy_flag, @vid_hdr->data_size, and @vid_hdr->data_crc. */ if (data_size > 0) { vid_hdr->copy_flag = 1; vid_hdr->data_size = cpu_to_be32(data_size); vid_hdr->data_crc = cpu_to_be32(crc); } vid_hdr->sqnum = cpu_to_be64(ubi_next_sqnum(ubi)); err = ubi_io_write_vid_hdr(ubi, to, vidb); if (err) { if (err == -EIO) err = MOVE_TARGET_WR_ERR; goto out_unlock_buf; } cond_resched(); /* Read the VID header back and check if it was written correctly */ err = ubi_io_read_vid_hdr(ubi, to, vidb, 1); if (err) { if (err != UBI_IO_BITFLIPS) { ubi_warn(ubi, "error %d while reading VID header back from PEB %d", err, to); if (is_error_sane(err)) err = MOVE_TARGET_RD_ERR; } else err = MOVE_TARGET_BITFLIPS; goto out_unlock_buf; } if (data_size > 0) { err = ubi_io_write_data(ubi, ubi->peb_buf, to, 0, aldata_size); if (err) { if (err == -EIO) err = MOVE_TARGET_WR_ERR; goto out_unlock_buf; } cond_resched(); } ubi_assert(vol->eba_tbl->entries[lnum].pnum == from); /** * The volumes_lock lock is needed here to prevent the expired old eba_tbl * being updated when the eba_tbl is copied in the ubi_resize_volume() process. */ spin_lock(&ubi->volumes_lock); vol->eba_tbl->entries[lnum].pnum = to; spin_unlock(&ubi->volumes_lock); out_unlock_buf: mutex_unlock(&ubi->buf_mutex); out_unlock_leb: leb_write_unlock(ubi, vol_id, lnum); return err; } /** * print_rsvd_warning - warn about not having enough reserved PEBs. * @ubi: UBI device description object * @ai: UBI attach info object * * This is a helper function for 'ubi_eba_init()' which is called when UBI * cannot reserve enough PEBs for bad block handling. This function makes a * decision whether we have to print a warning or not. The algorithm is as * follows: * o if this is a new UBI image, then just print the warning * o if this is an UBI image which has already been used for some time, print * a warning only if we can reserve less than 10% of the expected amount of * the reserved PEB. * * The idea is that when UBI is used, PEBs become bad, and the reserved pool * of PEBs becomes smaller, which is normal and we do not want to scare users * with a warning every time they attach the MTD device. This was an issue * reported by real users. */ static void print_rsvd_warning(struct ubi_device *ubi, struct ubi_attach_info *ai) { /* * The 1 << 18 (256KiB) number is picked randomly, just a reasonably * large number to distinguish between newly flashed and used images. */ if (ai->max_sqnum > (1 << 18)) { int min = ubi->beb_rsvd_level / 10; if (!min) min = 1; if (ubi->beb_rsvd_pebs > min) return; } ubi_warn(ubi, "cannot reserve enough PEBs for bad PEB handling, reserved %d, need %d", ubi->beb_rsvd_pebs, ubi->beb_rsvd_level); if (ubi->corr_peb_count) ubi_warn(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); } /** * self_check_eba - run a self check on the EBA table constructed by fastmap. * @ubi: UBI device description object * @ai_fastmap: UBI attach info object created by fastmap * @ai_scan: UBI attach info object created by scanning * * Returns < 0 in case of an internal error, 0 otherwise. * If a bad EBA table entry was found it will be printed out and * ubi_assert() triggers. */ int self_check_eba(struct ubi_device *ubi, struct ubi_attach_info *ai_fastmap, struct ubi_attach_info *ai_scan) { int i, j, num_volumes, ret = 0; int **scan_eba, **fm_eba; struct ubi_ainf_volume *av; struct ubi_volume *vol; struct ubi_ainf_peb *aeb; struct rb_node *rb; num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; scan_eba = kmalloc_array(num_volumes, sizeof(*scan_eba), GFP_KERNEL); if (!scan_eba) return -ENOMEM; fm_eba = kmalloc_array(num_volumes, sizeof(*fm_eba), GFP_KERNEL); if (!fm_eba) { kfree(scan_eba); return -ENOMEM; } for (i = 0; i < num_volumes; i++) { vol = ubi->volumes[i]; if (!vol) continue; scan_eba[i] = kmalloc_array(vol->reserved_pebs, sizeof(**scan_eba), GFP_KERNEL); if (!scan_eba[i]) { ret = -ENOMEM; goto out_free; } fm_eba[i] = kmalloc_array(vol->reserved_pebs, sizeof(**fm_eba), GFP_KERNEL); if (!fm_eba[i]) { ret = -ENOMEM; kfree(scan_eba[i]); goto out_free; } for (j = 0; j < vol->reserved_pebs; j++) scan_eba[i][j] = fm_eba[i][j] = UBI_LEB_UNMAPPED; av = ubi_find_av(ai_scan, idx2vol_id(ubi, i)); if (!av) continue; ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) scan_eba[i][aeb->lnum] = aeb->pnum; av = ubi_find_av(ai_fastmap, idx2vol_id(ubi, i)); if (!av) continue; ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) fm_eba[i][aeb->lnum] = aeb->pnum; for (j = 0; j < vol->reserved_pebs; j++) { if (scan_eba[i][j] != fm_eba[i][j]) { if (scan_eba[i][j] == UBI_LEB_UNMAPPED || fm_eba[i][j] == UBI_LEB_UNMAPPED) continue; ubi_err(ubi, "LEB:%i:%i is PEB:%i instead of %i!", vol->vol_id, j, fm_eba[i][j], scan_eba[i][j]); ubi_assert(0); } } } out_free: while (--i >= 0) { if (!ubi->volumes[i]) continue; kfree(scan_eba[i]); kfree(fm_eba[i]); } kfree(scan_eba); kfree(fm_eba); return ret; } /** * ubi_eba_init - initialize the EBA sub-system using attaching information. * @ubi: UBI device description object * @ai: attaching information * * This function returns zero in case of success and a negative error code in * case of failure. */ int ubi_eba_init(struct ubi_device *ubi, struct ubi_attach_info *ai) { int i, err, num_volumes; struct ubi_ainf_volume *av; struct ubi_volume *vol; struct ubi_ainf_peb *aeb; struct rb_node *rb; dbg_eba("initialize EBA sub-system"); spin_lock_init(&ubi->ltree_lock); mutex_init(&ubi->alc_mutex); ubi->ltree = RB_ROOT; ubi->global_sqnum = ai->max_sqnum + 1; num_volumes = ubi->vtbl_slots + UBI_INT_VOL_COUNT; for (i = 0; i < num_volumes; i++) { struct ubi_eba_table *tbl; vol = ubi->volumes[i]; if (!vol) continue; cond_resched(); tbl = ubi_eba_create_table(vol, vol->reserved_pebs); if (IS_ERR(tbl)) { err = PTR_ERR(tbl); goto out_free; } ubi_eba_replace_table(vol, tbl); av = ubi_find_av(ai, idx2vol_id(ubi, i)); if (!av) continue; ubi_rb_for_each_entry(rb, aeb, &av->root, u.rb) { if (aeb->lnum >= vol->reserved_pebs) { /* * This may happen in case of an unclean reboot * during re-size. */ ubi_move_aeb_to_list(av, aeb, &ai->erase); } else { struct ubi_eba_entry *entry; entry = &vol->eba_tbl->entries[aeb->lnum]; entry->pnum = aeb->pnum; } } } if (ubi->avail_pebs < EBA_RESERVED_PEBS) { ubi_err(ubi, "no enough physical eraseblocks (%d, need %d)", ubi->avail_pebs, EBA_RESERVED_PEBS); if (ubi->corr_peb_count) ubi_err(ubi, "%d PEBs are corrupted and not used", ubi->corr_peb_count); err = -ENOSPC; goto out_free; } ubi->avail_pebs -= EBA_RESERVED_PEBS; ubi->rsvd_pebs += EBA_RESERVED_PEBS; if (ubi->bad_allowed) { ubi_calculate_reserved(ubi); if (ubi->avail_pebs < ubi->beb_rsvd_level) { /* No enough free physical eraseblocks */ ubi->beb_rsvd_pebs = ubi->avail_pebs; print_rsvd_warning(ubi, ai); } else ubi->beb_rsvd_pebs = ubi->beb_rsvd_level; ubi->avail_pebs -= ubi->beb_rsvd_pebs; ubi->rsvd_pebs += ubi->beb_rsvd_pebs; } dbg_eba("EBA sub-system is initialized"); return 0; out_free: for (i = 0; i < num_volumes; i++) { if (!ubi->volumes[i]) continue; ubi_eba_replace_table(ubi->volumes[i], NULL); } return err; } |
| 38 38 38 38 38 38 13 1 1 34 1 33 36 36 36 35 34 30 35 35 5 27 6 33 34 32 34 32 33 30 3 37 32 3 33 33 32 18 3 1 1 2 2 2 2 2 2 8 3 1 19 19 7 1 11 1 38 32 3 2 11 1 1 2 4 38 38 2 36 29 33 12 13 13 1 13 7 13 13 13 13 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 1 4 1 3 4 4 3 3 3 2 3 2 2 3 3 3 1 1 1 1 38 2 3 1 1 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2000-2006 Silicon Graphics, Inc. * All Rights Reserved. */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_log.h" #include "xfs_log_priv.h" #include "xfs_log_recover.h" #include "xfs_trans_priv.h" #include "xfs_alloc.h" #include "xfs_ialloc.h" #include "xfs_trace.h" #include "xfs_icache.h" #include "xfs_error.h" #include "xfs_buf_item.h" #include "xfs_ag.h" #include "xfs_quota.h" #include "xfs_reflink.h" #define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1) STATIC int xlog_find_zeroed( struct xlog *, xfs_daddr_t *); STATIC int xlog_clear_stale_blocks( struct xlog *, xfs_lsn_t); STATIC int xlog_do_recovery_pass( struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *); /* * Sector aligned buffer routines for buffer create/read/write/access */ /* * Verify the log-relative block number and length in basic blocks are valid for * an operation involving the given XFS log buffer. Returns true if the fields * are valid, false otherwise. */ static inline bool xlog_verify_bno( struct xlog *log, xfs_daddr_t blk_no, int bbcount) { if (blk_no < 0 || blk_no >= log->l_logBBsize) return false; if (bbcount <= 0 || (blk_no + bbcount) > log->l_logBBsize) return false; return true; } /* * Allocate a buffer to hold log data. The buffer needs to be able to map to * a range of nbblks basic blocks at any valid offset within the log. */ static char * xlog_alloc_buffer( struct xlog *log, int nbblks) { /* * Pass log block 0 since we don't have an addr yet, buffer will be * verified on read. */ if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, 0, nbblks))) { xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer", nbblks); return NULL; } /* * We do log I/O in units of log sectors (a power-of-2 multiple of the * basic block size), so we round up the requested size to accommodate * the basic blocks required for complete log sectors. * * In addition, the buffer may be used for a non-sector-aligned block * offset, in which case an I/O of the requested size could extend * beyond the end of the buffer. If the requested size is only 1 basic * block it will never straddle a sector boundary, so this won't be an * issue. Nor will this be a problem if the log I/O is done in basic * blocks (sector size 1). But otherwise we extend the buffer by one * extra log sector to ensure there's space to accommodate this * possibility. */ if (nbblks > 1 && log->l_sectBBsize > 1) nbblks += log->l_sectBBsize; nbblks = round_up(nbblks, log->l_sectBBsize); return kvzalloc(BBTOB(nbblks), GFP_KERNEL | __GFP_RETRY_MAYFAIL); } /* * Return the address of the start of the given block number's data * in a log buffer. The buffer covers a log sector-aligned region. */ static inline unsigned int xlog_align( struct xlog *log, xfs_daddr_t blk_no) { return BBTOB(blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1)); } static int xlog_do_io( struct xlog *log, xfs_daddr_t blk_no, unsigned int nbblks, char *data, enum req_op op) { int error; if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, blk_no, nbblks))) { xfs_warn(log->l_mp, "Invalid log block/length (0x%llx, 0x%x) for buffer", blk_no, nbblks); return -EFSCORRUPTED; } blk_no = round_down(blk_no, log->l_sectBBsize); nbblks = round_up(nbblks, log->l_sectBBsize); ASSERT(nbblks > 0); error = xfs_rw_bdev(log->l_targ->bt_bdev, log->l_logBBstart + blk_no, BBTOB(nbblks), data, op); if (error && !xlog_is_shutdown(log)) { xfs_alert(log->l_mp, "log recovery %s I/O error at daddr 0x%llx len %d error %d", op == REQ_OP_WRITE ? "write" : "read", blk_no, nbblks, error); } return error; } STATIC int xlog_bread_noalign( struct xlog *log, xfs_daddr_t blk_no, int nbblks, char *data) { return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); } STATIC int xlog_bread( struct xlog *log, xfs_daddr_t blk_no, int nbblks, char *data, char **offset) { int error; error = xlog_do_io(log, blk_no, nbblks, data, REQ_OP_READ); if (!error) *offset = data + xlog_align(log, blk_no); return error; } STATIC int xlog_bwrite( struct xlog *log, xfs_daddr_t blk_no, int nbblks, char *data) { return xlog_do_io(log, blk_no, nbblks, data, REQ_OP_WRITE); } #ifdef DEBUG /* * dump debug superblock and log record information */ STATIC void xlog_header_check_dump( xfs_mount_t *mp, xlog_rec_header_t *head) { xfs_debug(mp, "%s: SB : uuid = %pU, fmt = %d", __func__, &mp->m_sb.sb_uuid, XLOG_FMT); xfs_debug(mp, " log : uuid = %pU, fmt = %d", &head->h_fs_uuid, be32_to_cpu(head->h_fmt)); } #else #define xlog_header_check_dump(mp, head) #endif /* * check log record header for recovery */ STATIC int xlog_header_check_recover( xfs_mount_t *mp, xlog_rec_header_t *head) { ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); /* * IRIX doesn't write the h_fmt field and leaves it zeroed * (XLOG_FMT_UNKNOWN). This stops us from trying to recover * a dirty log created in IRIX. */ if (XFS_IS_CORRUPT(mp, head->h_fmt != cpu_to_be32(XLOG_FMT))) { xfs_warn(mp, "dirty log written in incompatible format - can't recover"); xlog_header_check_dump(mp, head); return -EFSCORRUPTED; } if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { xfs_warn(mp, "dirty log entry has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); return -EFSCORRUPTED; } return 0; } /* * read the head block of the log and check the header */ STATIC int xlog_header_check_mount( xfs_mount_t *mp, xlog_rec_header_t *head) { ASSERT(head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)); if (uuid_is_null(&head->h_fs_uuid)) { /* * IRIX doesn't write the h_fs_uuid or h_fmt fields. If * h_fs_uuid is null, we assume this log was last mounted * by IRIX and continue. */ xfs_warn(mp, "null uuid in log - IRIX style log"); } else if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) { xfs_warn(mp, "log has mismatched uuid - can't recover"); xlog_header_check_dump(mp, head); return -EFSCORRUPTED; } return 0; } /* * This routine finds (to an approximation) the first block in the physical * log which contains the given cycle. It uses a binary search algorithm. * Note that the algorithm can not be perfect because the disk will not * necessarily be perfect. */ STATIC int xlog_find_cycle_start( struct xlog *log, char *buffer, xfs_daddr_t first_blk, xfs_daddr_t *last_blk, uint cycle) { char *offset; xfs_daddr_t mid_blk; xfs_daddr_t end_blk; uint mid_cycle; int error; end_blk = *last_blk; mid_blk = BLK_AVG(first_blk, end_blk); while (mid_blk != first_blk && mid_blk != end_blk) { error = xlog_bread(log, mid_blk, 1, buffer, &offset); if (error) return error; mid_cycle = xlog_get_cycle(offset); if (mid_cycle == cycle) end_blk = mid_blk; /* last_half_cycle == mid_cycle */ else first_blk = mid_blk; /* first_half_cycle == mid_cycle */ mid_blk = BLK_AVG(first_blk, end_blk); } ASSERT((mid_blk == first_blk && mid_blk+1 == end_blk) || (mid_blk == end_blk && mid_blk-1 == first_blk)); *last_blk = end_blk; return 0; } /* * Check that a range of blocks does not contain stop_on_cycle_no. * Fill in *new_blk with the block offset where such a block is * found, or with -1 (an invalid block number) if there is no such * block in the range. The scan needs to occur from front to back * and the pointer into the region must be updated since a later * routine will need to perform another test. */ STATIC int xlog_find_verify_cycle( struct xlog *log, xfs_daddr_t start_blk, int nbblks, uint stop_on_cycle_no, xfs_daddr_t *new_blk) { xfs_daddr_t i, j; uint cycle; char *buffer; xfs_daddr_t bufblks; char *buf = NULL; int error = 0; /* * Greedily allocate a buffer big enough to handle the full * range of basic blocks we'll be examining. If that fails, * try a smaller size. We need to be able to read at least * a log sector, or we're out of luck. */ bufblks = roundup_pow_of_two(nbblks); while (bufblks > log->l_logBBsize) bufblks >>= 1; while (!(buffer = xlog_alloc_buffer(log, bufblks))) { bufblks >>= 1; if (bufblks < log->l_sectBBsize) return -ENOMEM; } for (i = start_blk; i < start_blk + nbblks; i += bufblks) { int bcount; bcount = min(bufblks, (start_blk + nbblks - i)); error = xlog_bread(log, i, bcount, buffer, &buf); if (error) goto out; for (j = 0; j < bcount; j++) { cycle = xlog_get_cycle(buf); if (cycle == stop_on_cycle_no) { *new_blk = i+j; goto out; } buf += BBSIZE; } } *new_blk = -1; out: kvfree(buffer); return error; } static inline int xlog_logrec_hblks(struct xlog *log, struct xlog_rec_header *rh) { if (xfs_has_logv2(log->l_mp)) { int h_size = be32_to_cpu(rh->h_size); if ((be32_to_cpu(rh->h_version) & XLOG_VERSION_2) && h_size > XLOG_HEADER_CYCLE_SIZE) return DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE); } return 1; } /* * Potentially backup over partial log record write. * * In the typical case, last_blk is the number of the block directly after * a good log record. Therefore, we subtract one to get the block number * of the last block in the given buffer. extra_bblks contains the number * of blocks we would have read on a previous read. This happens when the * last log record is split over the end of the physical log. * * extra_bblks is the number of blocks potentially verified on a previous * call to this routine. */ STATIC int xlog_find_verify_log_record( struct xlog *log, xfs_daddr_t start_blk, xfs_daddr_t *last_blk, int extra_bblks) { xfs_daddr_t i; char *buffer; char *offset = NULL; xlog_rec_header_t *head = NULL; int error = 0; int smallmem = 0; int num_blks = *last_blk - start_blk; int xhdrs; ASSERT(start_blk != 0 || *last_blk != start_blk); buffer = xlog_alloc_buffer(log, num_blks); if (!buffer) { buffer = xlog_alloc_buffer(log, 1); if (!buffer) return -ENOMEM; smallmem = 1; } else { error = xlog_bread(log, start_blk, num_blks, buffer, &offset); if (error) goto out; offset += ((num_blks - 1) << BBSHIFT); } for (i = (*last_blk) - 1; i >= 0; i--) { if (i < start_blk) { /* valid log record not found */ xfs_warn(log->l_mp, "Log inconsistent (didn't find previous header)"); ASSERT(0); error = -EFSCORRUPTED; goto out; } if (smallmem) { error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out; } head = (xlog_rec_header_t *)offset; if (head->h_magicno == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) break; if (!smallmem) offset -= BBSIZE; } /* * We hit the beginning of the physical log & still no header. Return * to caller. If caller can handle a return of -1, then this routine * will be called again for the end of the physical log. */ if (i == -1) { error = 1; goto out; } /* * We have the final block of the good log (the first block * of the log record _before_ the head. So we check the uuid. */ if ((error = xlog_header_check_mount(log->l_mp, head))) goto out; /* * We may have found a log record header before we expected one. * last_blk will be the 1st block # with a given cycle #. We may end * up reading an entire log record. In this case, we don't want to * reset last_blk. Only when last_blk points in the middle of a log * record do we update last_blk. */ xhdrs = xlog_logrec_hblks(log, head); if (*last_blk - i + extra_bblks != BTOBB(be32_to_cpu(head->h_len)) + xhdrs) *last_blk = i; out: kvfree(buffer); return error; } /* * Head is defined to be the point of the log where the next log write * could go. This means that incomplete LR writes at the end are * eliminated when calculating the head. We aren't guaranteed that previous * LR have complete transactions. We only know that a cycle number of * current cycle number -1 won't be present in the log if we start writing * from our current block number. * * last_blk contains the block number of the first block with a given * cycle number. * * Return: zero if normal, non-zero if error. */ STATIC int xlog_find_head( struct xlog *log, xfs_daddr_t *return_head_blk) { char *buffer; char *offset; xfs_daddr_t new_blk, first_blk, start_blk, last_blk, head_blk; int num_scan_bblks; uint first_half_cycle, last_half_cycle; uint stop_on_cycle; int error, log_bbnum = log->l_logBBsize; /* Is the end of the log device zeroed? */ error = xlog_find_zeroed(log, &first_blk); if (error < 0) { xfs_warn(log->l_mp, "empty log check failed"); return error; } if (error == 1) { *return_head_blk = first_blk; /* Is the whole lot zeroed? */ if (!first_blk) { /* Linux XFS shouldn't generate totally zeroed logs - * mkfs etc write a dummy unmount record to a fresh * log so we can store the uuid in there */ xfs_warn(log->l_mp, "totally zeroed log"); } return 0; } first_blk = 0; /* get cycle # of 1st block */ buffer = xlog_alloc_buffer(log, 1); if (!buffer) return -ENOMEM; error = xlog_bread(log, 0, 1, buffer, &offset); if (error) goto out_free_buffer; first_half_cycle = xlog_get_cycle(offset); last_blk = head_blk = log_bbnum - 1; /* get cycle # of last block */ error = xlog_bread(log, last_blk, 1, buffer, &offset); if (error) goto out_free_buffer; last_half_cycle = xlog_get_cycle(offset); ASSERT(last_half_cycle != 0); /* * If the 1st half cycle number is equal to the last half cycle number, * then the entire log is stamped with the same cycle number. In this * case, head_blk can't be set to zero (which makes sense). The below * math doesn't work out properly with head_blk equal to zero. Instead, * we set it to log_bbnum which is an invalid block number, but this * value makes the math correct. If head_blk doesn't changed through * all the tests below, *head_blk is set to zero at the very end rather * than log_bbnum. In a sense, log_bbnum and zero are the same block * in a circular file. */ if (first_half_cycle == last_half_cycle) { /* * In this case we believe that the entire log should have * cycle number last_half_cycle. We need to scan backwards * from the end verifying that there are no holes still * containing last_half_cycle - 1. If we find such a hole, * then the start of that hole will be the new head. The * simple case looks like * x | x ... | x - 1 | x * Another case that fits this picture would be * x | x + 1 | x ... | x * In this case the head really is somewhere at the end of the * log, as one of the latest writes at the beginning was * incomplete. * One more case is * x | x + 1 | x ... | x - 1 | x * This is really the combination of the above two cases, and * the head has to end up at the start of the x-1 hole at the * end of the log. * * In the 256k log case, we will read from the beginning to the * end of the log and search for cycle numbers equal to x-1. * We don't worry about the x+1 blocks that we encounter, * because we know that they cannot be the head since the log * started with x. */ head_blk = log_bbnum; stop_on_cycle = last_half_cycle - 1; } else { /* * In this case we want to find the first block with cycle * number matching last_half_cycle. We expect the log to be * some variation on * x + 1 ... | x ... | x * The first block with cycle number x (last_half_cycle) will * be where the new head belongs. First we do a binary search * for the first occurrence of last_half_cycle. The binary * search may not be totally accurate, so then we scan back * from there looking for occurrences of last_half_cycle before * us. If that backwards scan wraps around the beginning of * the log, then we look for occurrences of last_half_cycle - 1 * at the end of the log. The cases we're looking for look * like * v binary search stopped here * x + 1 ... | x | x + 1 | x ... | x * ^ but we want to locate this spot * or * <---------> less than scan distance * x + 1 ... | x ... | x - 1 | x * ^ we want to locate this spot */ stop_on_cycle = last_half_cycle; error = xlog_find_cycle_start(log, buffer, first_blk, &head_blk, last_half_cycle); if (error) goto out_free_buffer; } /* * Now validate the answer. Scan back some number of maximum possible * blocks and make sure each one has the expected cycle number. The * maximum is determined by the total possible amount of buffering * in the in-core log. The following number can be made tighter if * we actually look at the block size of the filesystem. */ num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log)); if (head_blk >= num_scan_bblks) { /* * We are guaranteed that the entire check can be performed * in one buffer. */ start_blk = head_blk - num_scan_bblks; if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks, stop_on_cycle, &new_blk))) goto out_free_buffer; if (new_blk != -1) head_blk = new_blk; } else { /* need to read 2 parts of log */ /* * We are going to scan backwards in the log in two parts. * First we scan the physical end of the log. In this part * of the log, we are looking for blocks with cycle number * last_half_cycle - 1. * If we find one, then we know that the log starts there, as * we've found a hole that didn't get written in going around * the end of the physical log. The simple case for this is * x + 1 ... | x ... | x - 1 | x * <---------> less than scan distance * If all of the blocks at the end of the log have cycle number * last_half_cycle, then we check the blocks at the start of * the log looking for occurrences of last_half_cycle. If we * find one, then our current estimate for the location of the * first occurrence of last_half_cycle is wrong and we move * back to the hole we've found. This case looks like * x + 1 ... | x | x + 1 | x ... * ^ binary search stopped here * Another case we need to handle that only occurs in 256k * logs is * x + 1 ... | x ... | x+1 | x ... * ^ binary search stops here * In a 256k log, the scan at the end of the log will see the * x + 1 blocks. We need to skip past those since that is * certainly not the head of the log. By searching for * last_half_cycle-1 we accomplish that. */ ASSERT(head_blk <= INT_MAX && (xfs_daddr_t) num_scan_bblks >= head_blk); start_blk = log_bbnum - (num_scan_bblks - head_blk); if ((error = xlog_find_verify_cycle(log, start_blk, num_scan_bblks - (int)head_blk, (stop_on_cycle - 1), &new_blk))) goto out_free_buffer; if (new_blk != -1) { head_blk = new_blk; goto validate_head; } /* * Scan beginning of log now. The last part of the physical * log is good. This scan needs to verify that it doesn't find * the last_half_cycle. */ start_blk = 0; ASSERT(head_blk <= INT_MAX); if ((error = xlog_find_verify_cycle(log, start_blk, (int)head_blk, stop_on_cycle, &new_blk))) goto out_free_buffer; if (new_blk != -1) head_blk = new_blk; } validate_head: /* * Now we need to make sure head_blk is not pointing to a block in * the middle of a log record. */ num_scan_bblks = XLOG_REC_SHIFT(log); if (head_blk >= num_scan_bblks) { start_blk = head_blk - num_scan_bblks; /* don't read head_blk */ /* start ptr at last block ptr before head_blk */ error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); if (error == 1) error = -EIO; if (error) goto out_free_buffer; } else { start_blk = 0; ASSERT(head_blk <= INT_MAX); error = xlog_find_verify_log_record(log, start_blk, &head_blk, 0); if (error < 0) goto out_free_buffer; if (error == 1) { /* We hit the beginning of the log during our search */ start_blk = log_bbnum - (num_scan_bblks - head_blk); new_blk = log_bbnum; ASSERT(start_blk <= INT_MAX && (xfs_daddr_t) log_bbnum-start_blk >= 0); ASSERT(head_blk <= INT_MAX); error = xlog_find_verify_log_record(log, start_blk, &new_blk, (int)head_blk); if (error == 1) error = -EIO; if (error) goto out_free_buffer; if (new_blk != log_bbnum) head_blk = new_blk; } else if (error) goto out_free_buffer; } kvfree(buffer); if (head_blk == log_bbnum) *return_head_blk = 0; else *return_head_blk = head_blk; /* * When returning here, we have a good block number. Bad block * means that during a previous crash, we didn't have a clean break * from cycle number N to cycle number N-1. In this case, we need * to find the first block with cycle number N-1. */ return 0; out_free_buffer: kvfree(buffer); if (error) xfs_warn(log->l_mp, "failed to find log head"); return error; } /* * Seek backwards in the log for log record headers. * * Given a starting log block, walk backwards until we find the provided number * of records or hit the provided tail block. The return value is the number of * records encountered or a negative error code. The log block and buffer * pointer of the last record seen are returned in rblk and rhead respectively. */ STATIC int xlog_rseek_logrec_hdr( struct xlog *log, xfs_daddr_t head_blk, xfs_daddr_t tail_blk, int count, char *buffer, xfs_daddr_t *rblk, struct xlog_rec_header **rhead, bool *wrapped) { int i; int error; int found = 0; char *offset = NULL; xfs_daddr_t end_blk; *wrapped = false; /* * Walk backwards from the head block until we hit the tail or the first * block in the log. */ end_blk = head_blk > tail_blk ? tail_blk : 0; for (i = (int) head_blk - 1; i >= end_blk; i--) { error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { *rblk = i; *rhead = (struct xlog_rec_header *) offset; if (++found == count) break; } } /* * If we haven't hit the tail block or the log record header count, * start looking again from the end of the physical log. Note that * callers can pass head == tail if the tail is not yet known. */ if (tail_blk >= head_blk && found != count) { for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) { error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { *wrapped = true; *rblk = i; *rhead = (struct xlog_rec_header *) offset; if (++found == count) break; } } } return found; out_error: return error; } /* * Seek forward in the log for log record headers. * * Given head and tail blocks, walk forward from the tail block until we find * the provided number of records or hit the head block. The return value is the * number of records encountered or a negative error code. The log block and * buffer pointer of the last record seen are returned in rblk and rhead * respectively. */ STATIC int xlog_seek_logrec_hdr( struct xlog *log, xfs_daddr_t head_blk, xfs_daddr_t tail_blk, int count, char *buffer, xfs_daddr_t *rblk, struct xlog_rec_header **rhead, bool *wrapped) { int i; int error; int found = 0; char *offset = NULL; xfs_daddr_t end_blk; *wrapped = false; /* * Walk forward from the tail block until we hit the head or the last * block in the log. */ end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1; for (i = (int) tail_blk; i <= end_blk; i++) { error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { *rblk = i; *rhead = (struct xlog_rec_header *) offset; if (++found == count) break; } } /* * If we haven't hit the head block or the log record header count, * start looking again from the start of the physical log. */ if (tail_blk > head_blk && found != count) { for (i = 0; i < (int) head_blk; i++) { error = xlog_bread(log, i, 1, buffer, &offset); if (error) goto out_error; if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) { *wrapped = true; *rblk = i; *rhead = (struct xlog_rec_header *) offset; if (++found == count) break; } } } return found; out_error: return error; } /* * Calculate distance from head to tail (i.e., unused space in the log). */ static inline int xlog_tail_distance( struct xlog *log, xfs_daddr_t head_blk, xfs_daddr_t tail_blk) { if (head_blk < tail_blk) return tail_blk - head_blk; return tail_blk + (log->l_logBBsize - head_blk); } /* * Verify the log tail. This is particularly important when torn or incomplete * writes have been detected near the front of the log and the head has been * walked back accordingly. * * We also have to handle the case where the tail was pinned and the head * blocked behind the tail right before a crash. If the tail had been pushed * immediately prior to the crash and the subsequent checkpoint was only * partially written, it's possible it overwrote the last referenced tail in the * log with garbage. This is not a coherency problem because the tail must have * been pushed before it can be overwritten, but appears as log corruption to * recovery because we have no way to know the tail was updated if the * subsequent checkpoint didn't write successfully. * * Therefore, CRC check the log from tail to head. If a failure occurs and the * offending record is within max iclog bufs from the head, walk the tail * forward and retry until a valid tail is found or corruption is detected out * of the range of a possible overwrite. */ STATIC int xlog_verify_tail( struct xlog *log, xfs_daddr_t head_blk, xfs_daddr_t *tail_blk, int hsize) { struct xlog_rec_header *thead; char *buffer; xfs_daddr_t first_bad; int error = 0; bool wrapped; xfs_daddr_t tmp_tail; xfs_daddr_t orig_tail = *tail_blk; buffer = xlog_alloc_buffer(log, 1); if (!buffer) return -ENOMEM; /* * Make sure the tail points to a record (returns positive count on * success). */ error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, buffer, &tmp_tail, &thead, &wrapped); if (error < 0) goto out; if (*tail_blk != tmp_tail) *tail_blk = tmp_tail; /* * Run a CRC check from the tail to the head. We can't just check * MAX_ICLOGS records past the tail because the tail may point to stale * blocks cleared during the search for the head/tail. These blocks are * overwritten with zero-length records and thus record count is not a * reliable indicator of the iclog state before a crash. */ first_bad = 0; error = xlog_do_recovery_pass(log, head_blk, *tail_blk, XLOG_RECOVER_CRCPASS, &first_bad); while ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) { int tail_distance; /* * Is corruption within range of the head? If so, retry from * the next record. Otherwise return an error. */ tail_distance = xlog_tail_distance(log, head_blk, first_bad); if (tail_distance > BTOBB(XLOG_MAX_ICLOGS * hsize)) break; /* skip to the next record; returns positive count on success */ error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, buffer, &tmp_tail, &thead, &wrapped); if (error < 0) goto out; *tail_blk = tmp_tail; first_bad = 0; error = xlog_do_recovery_pass(log, head_blk, *tail_blk, XLOG_RECOVER_CRCPASS, &first_bad); } if (!error && *tail_blk != orig_tail) xfs_warn(log->l_mp, "Tail block (0x%llx) overwrite detected. Updated to 0x%llx", orig_tail, *tail_blk); out: kvfree(buffer); return error; } /* * Detect and trim torn writes from the head of the log. * * Storage without sector atomicity guarantees can result in torn writes in the * log in the event of a crash. Our only means to detect this scenario is via * CRC verification. While we can't always be certain that CRC verification * failure is due to a torn write vs. an unrelated corruption, we do know that * only a certain number (XLOG_MAX_ICLOGS) of log records can be written out at * one time. Therefore, CRC verify up to XLOG_MAX_ICLOGS records at the head of * the log and treat failures in this range as torn writes as a matter of * policy. In the event of CRC failure, the head is walked back to the last good * record in the log and the tail is updated from that record and verified. */ STATIC int xlog_verify_head( struct xlog *log, xfs_daddr_t *head_blk, /* in/out: unverified head */ xfs_daddr_t *tail_blk, /* out: tail block */ char *buffer, xfs_daddr_t *rhead_blk, /* start blk of last record */ struct xlog_rec_header **rhead, /* ptr to last record */ bool *wrapped) /* last rec. wraps phys. log */ { struct xlog_rec_header *tmp_rhead; char *tmp_buffer; xfs_daddr_t first_bad; xfs_daddr_t tmp_rhead_blk; int found; int error; bool tmp_wrapped; /* * Check the head of the log for torn writes. Search backwards from the * head until we hit the tail or the maximum number of log record I/Os * that could have been in flight at one time. Use a temporary buffer so * we don't trash the rhead/buffer pointers from the caller. */ tmp_buffer = xlog_alloc_buffer(log, 1); if (!tmp_buffer) return -ENOMEM; error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk, XLOG_MAX_ICLOGS, tmp_buffer, &tmp_rhead_blk, &tmp_rhead, &tmp_wrapped); kvfree(tmp_buffer); if (error < 0) return error; /* * Now run a CRC verification pass over the records starting at the * block found above to the current head. If a CRC failure occurs, the * log block of the first bad record is saved in first_bad. */ error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk, XLOG_RECOVER_CRCPASS, &first_bad); if ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) { /* * We've hit a potential torn write. Reset the error and warn * about it. */ error = 0; xfs_warn(log->l_mp, "Torn write (CRC failure) detected at log block 0x%llx. Truncating head block from 0x%llx.", first_bad, *head_blk); /* * Get the header block and buffer pointer for the last good * record before the bad record. * * Note that xlog_find_tail() clears the blocks at the new head * (i.e., the records with invalid CRC) if the cycle number * matches the current cycle. */ found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, buffer, rhead_blk, rhead, wrapped); if (found < 0) return found; if (found == 0) /* XXX: right thing to do here? */ return -EIO; /* * Reset the head block to the starting block of the first bad * log record and set the tail block based on the last good * record. * * Bail out if the updated head/tail match as this indicates * possible corruption outside of the acceptable * (XLOG_MAX_ICLOGS) range. This is a job for xfs_repair... */ *head_blk = first_bad; *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn)); if (*head_blk == *tail_blk) { ASSERT(0); return 0; } } if (error) return error; return xlog_verify_tail(log, *head_blk, tail_blk, be32_to_cpu((*rhead)->h_size)); } /* * We need to make sure we handle log wrapping properly, so we can't use the * calculated logbno directly. Make sure it wraps to the correct bno inside the * log. * * The log is limited to 32 bit sizes, so we use the appropriate modulus * operation here and cast it back to a 64 bit daddr on return. */ static inline xfs_daddr_t xlog_wrap_logbno( struct xlog *log, xfs_daddr_t bno) { int mod; div_s64_rem(bno, log->l_logBBsize, &mod); return mod; } /* * Check whether the head of the log points to an unmount record. In other * words, determine whether the log is clean. If so, update the in-core state * appropriately. */ static int xlog_check_unmount_rec( struct xlog *log, xfs_daddr_t *head_blk, xfs_daddr_t *tail_blk, struct xlog_rec_header *rhead, xfs_daddr_t rhead_blk, char *buffer, bool *clean) { struct xlog_op_header *op_head; xfs_daddr_t umount_data_blk; xfs_daddr_t after_umount_blk; int hblks; int error; char *offset; *clean = false; /* * Look for unmount record. If we find it, then we know there was a * clean unmount. Since 'i' could be the last block in the physical * log, we convert to a log block before comparing to the head_blk. * * Save the current tail lsn to use to pass to xlog_clear_stale_blocks() * below. We won't want to clear the unmount record if there is one, so * we pass the lsn of the unmount record rather than the block after it. */ hblks = xlog_logrec_hblks(log, rhead); after_umount_blk = xlog_wrap_logbno(log, rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len))); if (*head_blk == after_umount_blk && be32_to_cpu(rhead->h_num_logops) == 1) { umount_data_blk = xlog_wrap_logbno(log, rhead_blk + hblks); error = xlog_bread(log, umount_data_blk, 1, buffer, &offset); if (error) return error; op_head = (struct xlog_op_header *)offset; if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { /* * Set tail and last sync so that newly written log * records will point recovery to after the current * unmount record. */ xlog_assign_atomic_lsn(&log->l_tail_lsn, log->l_curr_cycle, after_umount_blk); log->l_ailp->ail_head_lsn = atomic64_read(&log->l_tail_lsn); *tail_blk = after_umount_blk; *clean = true; } } return 0; } static void xlog_set_state( struct xlog *log, xfs_daddr_t head_blk, struct xlog_rec_header *rhead, xfs_daddr_t rhead_blk, bool bump_cycle) { /* * Reset log values according to the state of the log when we * crashed. In the case where head_blk == 0, we bump curr_cycle * one because the next write starts a new cycle rather than * continuing the cycle of the last good log record. At this * point we have guaranteed that all partial log records have been * accounted for. Therefore, we know that the last good log record * written was complete and ended exactly on the end boundary * of the physical log. */ log->l_prev_block = rhead_blk; log->l_curr_block = (int)head_blk; log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); if (bump_cycle) log->l_curr_cycle++; atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); log->l_ailp->ail_head_lsn = be64_to_cpu(rhead->h_lsn); } /* * Find the sync block number or the tail of the log. * * This will be the block number of the last record to have its * associated buffers synced to disk. Every log record header has * a sync lsn embedded in it. LSNs hold block numbers, so it is easy * to get a sync block number. The only concern is to figure out which * log record header to believe. * * The following algorithm uses the log record header with the largest * lsn. The entire log record does not need to be valid. We only care * that the header is valid. * * We could speed up search by using current head_blk buffer, but it is not * available. */ STATIC int xlog_find_tail( struct xlog *log, xfs_daddr_t *head_blk, xfs_daddr_t *tail_blk) { xlog_rec_header_t *rhead; char *offset = NULL; char *buffer; int error; xfs_daddr_t rhead_blk; xfs_lsn_t tail_lsn; bool wrapped = false; bool clean = false; /* * Find previous log record */ if ((error = xlog_find_head(log, head_blk))) return error; ASSERT(*head_blk < INT_MAX); buffer = xlog_alloc_buffer(log, 1); if (!buffer) return -ENOMEM; if (*head_blk == 0) { /* special case */ error = xlog_bread(log, 0, 1, buffer, &offset); if (error) goto done; if (xlog_get_cycle(offset) == 0) { *tail_blk = 0; /* leave all other log inited values alone */ goto done; } } /* * Search backwards through the log looking for the log record header * block. This wraps all the way back around to the head so something is * seriously wrong if we can't find it. */ error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer, &rhead_blk, &rhead, &wrapped); if (error < 0) goto done; if (!error) { xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); error = -EFSCORRUPTED; goto done; } *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); /* * Set the log state based on the current head record. */ xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped); tail_lsn = atomic64_read(&log->l_tail_lsn); /* * Look for an unmount record at the head of the log. This sets the log * state to determine whether recovery is necessary. */ error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, rhead_blk, buffer, &clean); if (error) goto done; /* * Verify the log head if the log is not clean (e.g., we have anything * but an unmount record at the head). This uses CRC verification to * detect and trim torn writes. If discovered, CRC failures are * considered torn writes and the log head is trimmed accordingly. * * Note that we can only run CRC verification when the log is dirty * because there's no guarantee that the log data behind an unmount * record is compatible with the current architecture. */ if (!clean) { xfs_daddr_t orig_head = *head_blk; error = xlog_verify_head(log, head_blk, tail_blk, buffer, &rhead_blk, &rhead, &wrapped); if (error) goto done; /* update in-core state again if the head changed */ if (*head_blk != orig_head) { xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped); tail_lsn = atomic64_read(&log->l_tail_lsn); error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, rhead_blk, buffer, &clean); if (error) goto done; } } /* * Note that the unmount was clean. If the unmount was not clean, we * need to know this to rebuild the superblock counters from the perag * headers if we have a filesystem using non-persistent counters. */ if (clean) set_bit(XFS_OPSTATE_CLEAN, &log->l_mp->m_opstate); /* * Make sure that there are no blocks in front of the head * with the same cycle number as the head. This can happen * because we allow multiple outstanding log writes concurrently, * and the later writes might make it out before earlier ones. * * We use the lsn from before modifying it so that we'll never * overwrite the unmount record after a clean unmount. * * Do this only if we are going to recover the filesystem * * NOTE: This used to say "if (!readonly)" * However on Linux, we can & do recover a read-only filesystem. * We only skip recovery if NORECOVERY is specified on mount, * in which case we would not be here. * * But... if the -device- itself is readonly, just skip this. * We can't recover this device anyway, so it won't matter. */ if (!xfs_readonly_buftarg(log->l_targ)) error = xlog_clear_stale_blocks(log, tail_lsn); done: kvfree(buffer); if (error) xfs_warn(log->l_mp, "failed to locate log tail"); return error; } /* * Is the log zeroed at all? * * The last binary search should be changed to perform an X block read * once X becomes small enough. You can then search linearly through * the X blocks. This will cut down on the number of reads we need to do. * * If the log is partially zeroed, this routine will pass back the blkno * of the first block with cycle number 0. It won't have a complete LR * preceding it. * * Return: * 0 => the log is completely written to * 1 => use *blk_no as the first block of the log * <0 => error has occurred */ STATIC int xlog_find_zeroed( struct xlog *log, xfs_daddr_t *blk_no) { char *buffer; char *offset; uint first_cycle, last_cycle; xfs_daddr_t new_blk, last_blk, start_blk; xfs_daddr_t num_scan_bblks; int error, log_bbnum = log->l_logBBsize; int ret = 1; *blk_no = 0; /* check totally zeroed log */ buffer = xlog_alloc_buffer(log, 1); if (!buffer) return -ENOMEM; error = xlog_bread(log, 0, 1, buffer, &offset); if (error) goto out_free_buffer; first_cycle = xlog_get_cycle(offset); if (first_cycle == 0) { /* completely zeroed log */ *blk_no = 0; goto out_free_buffer; } /* check partially zeroed log */ error = xlog_bread(log, log_bbnum-1, 1, buffer, &offset); if (error) goto out_free_buffer; last_cycle = xlog_get_cycle(offset); if (last_cycle != 0) { /* log completely written to */ ret = 0; goto out_free_buffer; } /* we have a partially zeroed log */ last_blk = log_bbnum-1; error = xlog_find_cycle_start(log, buffer, 0, &last_blk, 0); if (error) goto out_free_buffer; /* * Validate the answer. Because there is no way to guarantee that * the entire log is made up of log records which are the same size, * we scan over the defined maximum blocks. At this point, the maximum * is not chosen to mean anything special. XXXmiken */ num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log); ASSERT(num_scan_bblks <= INT_MAX); if (last_blk < num_scan_bblks) num_scan_bblks = last_blk; start_blk = last_blk - num_scan_bblks; /* * We search for any instances of cycle number 0 that occur before * our current estimate of the head. What we're trying to detect is * 1 ... | 0 | 1 | 0... * ^ binary search ends here */ if ((error = xlog_find_verify_cycle(log, start_blk, (int)num_scan_bblks, 0, &new_blk))) goto out_free_buffer; if (new_blk != -1) last_blk = new_blk; /* * Potentially backup over partial log record write. We don't need * to search the end of the log because we know it is zero. */ error = xlog_find_verify_log_record(log, start_blk, &last_blk, 0); if (error == 1) error = -EIO; if (error) goto out_free_buffer; *blk_no = last_blk; out_free_buffer: kvfree(buffer); if (error) return error; return ret; } /* * These are simple subroutines used by xlog_clear_stale_blocks() below * to initialize a buffer full of empty log record headers and write * them into the log. */ STATIC void xlog_add_record( struct xlog *log, char *buf, int cycle, int block, int tail_cycle, int tail_block) { xlog_rec_header_t *recp = (xlog_rec_header_t *)buf; memset(buf, 0, BBSIZE); recp->h_magicno = cpu_to_be32(XLOG_HEADER_MAGIC_NUM); recp->h_cycle = cpu_to_be32(cycle); recp->h_version = cpu_to_be32( xfs_has_logv2(log->l_mp) ? 2 : 1); recp->h_lsn = cpu_to_be64(xlog_assign_lsn(cycle, block)); recp->h_tail_lsn = cpu_to_be64(xlog_assign_lsn(tail_cycle, tail_block)); recp->h_fmt = cpu_to_be32(XLOG_FMT); memcpy(&recp->h_fs_uuid, &log->l_mp->m_sb.sb_uuid, sizeof(uuid_t)); } STATIC int xlog_write_log_records( struct xlog *log, int cycle, int start_block, int blocks, int tail_cycle, int tail_block) { char *offset; char *buffer; int balign, ealign; int sectbb = log->l_sectBBsize; int end_block = start_block + blocks; int bufblks; int error = 0; int i, j = 0; /* * Greedily allocate a buffer big enough to handle the full * range of basic blocks to be written. If that fails, try * a smaller size. We need to be able to write at least a * log sector, or we're out of luck. */ bufblks = roundup_pow_of_two(blocks); while (bufblks > log->l_logBBsize) bufblks >>= 1; while (!(buffer = xlog_alloc_buffer(log, bufblks))) { bufblks >>= 1; if (bufblks < sectbb) return -ENOMEM; } /* We may need to do a read at the start to fill in part of * the buffer in the starting sector not covered by the first * write below. */ balign = round_down(start_block, sectbb); if (balign != start_block) { error = xlog_bread_noalign(log, start_block, 1, buffer); if (error) goto out_free_buffer; j = start_block - balign; } for (i = start_block; i < end_block; i += bufblks) { int bcount, endcount; bcount = min(bufblks, end_block - start_block); endcount = bcount - j; /* We may need to do a read at the end to fill in part of * the buffer in the final sector not covered by the write. * If this is the same sector as the above read, skip it. */ ealign = round_down(end_block, sectbb); if (j == 0 && (start_block + endcount > ealign)) { error = xlog_bread_noalign(log, ealign, sectbb, buffer + BBTOB(ealign - start_block)); if (error) break; } offset = buffer + xlog_align(log, start_block); for (; j < endcount; j++) { xlog_add_record(log, offset, cycle, i+j, tail_cycle, tail_block); offset += BBSIZE; } error = xlog_bwrite(log, start_block, endcount, buffer); if (error) break; start_block += endcount; j = 0; } out_free_buffer: kvfree(buffer); return error; } /* * This routine is called to blow away any incomplete log writes out * in front of the log head. We do this so that we won't become confused * if we come up, write only a little bit more, and then crash again. * If we leave the partial log records out there, this situation could * cause us to think those partial writes are valid blocks since they * have the current cycle number. We get rid of them by overwriting them * with empty log records with the old cycle number rather than the * current one. * * The tail lsn is passed in rather than taken from * the log so that we will not write over the unmount record after a * clean unmount in a 512 block log. Doing so would leave the log without * any valid log records in it until a new one was written. If we crashed * during that time we would not be able to recover. */ STATIC int xlog_clear_stale_blocks( struct xlog *log, xfs_lsn_t tail_lsn) { int tail_cycle, head_cycle; int tail_block, head_block; int tail_distance, max_distance; int distance; int error; tail_cycle = CYCLE_LSN(tail_lsn); tail_block = BLOCK_LSN(tail_lsn); head_cycle = log->l_curr_cycle; head_block = log->l_curr_block; /* * Figure out the distance between the new head of the log * and the tail. We want to write over any blocks beyond the * head that we may have written just before the crash, but * we don't want to overwrite the tail of the log. */ if (head_cycle == tail_cycle) { /* * The tail is behind the head in the physical log, * so the distance from the head to the tail is the * distance from the head to the end of the log plus * the distance from the beginning of the log to the * tail. */ if (XFS_IS_CORRUPT(log->l_mp, head_block < tail_block || head_block >= log->l_logBBsize)) return -EFSCORRUPTED; tail_distance = tail_block + (log->l_logBBsize - head_block); } else { /* * The head is behind the tail in the physical log, * so the distance from the head to the tail is just * the tail block minus the head block. */ if (XFS_IS_CORRUPT(log->l_mp, head_block >= tail_block || head_cycle != tail_cycle + 1)) return -EFSCORRUPTED; tail_distance = tail_block - head_block; } /* * If the head is right up against the tail, we can't clear * anything. */ if (tail_distance <= 0) { ASSERT(tail_distance == 0); return 0; } max_distance = XLOG_TOTAL_REC_SHIFT(log); /* * Take the smaller of the maximum amount of outstanding I/O * we could have and the distance to the tail to clear out. * We take the smaller so that we don't overwrite the tail and * we don't waste all day writing from the head to the tail * for no reason. */ max_distance = min(max_distance, tail_distance); if ((head_block + max_distance) <= log->l_logBBsize) { /* * We can stomp all the blocks we need to without * wrapping around the end of the log. Just do it * in a single write. Use the cycle number of the * current cycle minus one so that the log will look like: * n ... | n - 1 ... */ error = xlog_write_log_records(log, (head_cycle - 1), head_block, max_distance, tail_cycle, tail_block); if (error) return error; } else { /* * We need to wrap around the end of the physical log in * order to clear all the blocks. Do it in two separate * I/Os. The first write should be from the head to the * end of the physical log, and it should use the current * cycle number minus one just like above. */ distance = log->l_logBBsize - head_block; error = xlog_write_log_records(log, (head_cycle - 1), head_block, distance, tail_cycle, tail_block); if (error) return error; /* * Now write the blocks at the start of the physical log. * This writes the remainder of the blocks we want to clear. * It uses the current cycle number since we're now on the * same cycle as the head so that we get: * n ... n ... | n - 1 ... * ^^^^^ blocks we're writing */ distance = max_distance - (log->l_logBBsize - head_block); error = xlog_write_log_records(log, head_cycle, 0, distance, tail_cycle, tail_block); if (error) return error; } return 0; } /* * Release the recovered intent item in the AIL that matches the given intent * type and intent id. */ void xlog_recover_release_intent( struct xlog *log, unsigned short intent_type, uint64_t intent_id) { struct xfs_defer_pending *dfp, *n; list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) { struct xfs_log_item *lip = dfp->dfp_intent; if (lip->li_type != intent_type) continue; if (!lip->li_ops->iop_match(lip, intent_id)) continue; ASSERT(xlog_item_is_intent(lip)); xfs_defer_cancel_recovery(log->l_mp, dfp); } } int xlog_recover_iget( struct xfs_mount *mp, xfs_ino_t ino, struct xfs_inode **ipp) { int error; error = xfs_iget(mp, NULL, ino, 0, 0, ipp); if (error) return error; error = xfs_qm_dqattach(*ipp); if (error) { xfs_irele(*ipp); return error; } if (VFS_I(*ipp)->i_nlink == 0) xfs_iflags_set(*ipp, XFS_IRECOVERY); return 0; } /* * Get an inode so that we can recover a log operation. * * Log intent items that target inodes effectively contain a file handle. * Check that the generation number matches the intent item like we do for * other file handles. Log intent items defined after this validation weakness * was identified must use this function. */ int xlog_recover_iget_handle( struct xfs_mount *mp, xfs_ino_t ino, uint32_t gen, struct xfs_inode **ipp) { struct xfs_inode *ip; int error; error = xlog_recover_iget(mp, ino, &ip); if (error) return error; if (VFS_I(ip)->i_generation != gen) { xfs_irele(ip); return -EFSCORRUPTED; } *ipp = ip; return 0; } /****************************************************************************** * * Log recover routines * ****************************************************************************** */ static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = { &xlog_buf_item_ops, &xlog_inode_item_ops, &xlog_dquot_item_ops, &xlog_quotaoff_item_ops, &xlog_icreate_item_ops, &xlog_efi_item_ops, &xlog_efd_item_ops, &xlog_rui_item_ops, &xlog_rud_item_ops, &xlog_cui_item_ops, &xlog_cud_item_ops, &xlog_bui_item_ops, &xlog_bud_item_ops, &xlog_attri_item_ops, &xlog_attrd_item_ops, &xlog_xmi_item_ops, &xlog_xmd_item_ops, }; static const struct xlog_recover_item_ops * xlog_find_item_ops( struct xlog_recover_item *item) { unsigned int i; for (i = 0; i < ARRAY_SIZE(xlog_recover_item_ops); i++) if (ITEM_TYPE(item) == xlog_recover_item_ops[i]->item_type) return xlog_recover_item_ops[i]; return NULL; } /* * Sort the log items in the transaction. * * The ordering constraints are defined by the inode allocation and unlink * behaviour. The rules are: * * 1. Every item is only logged once in a given transaction. Hence it * represents the last logged state of the item. Hence ordering is * dependent on the order in which operations need to be performed so * required initial conditions are always met. * * 2. Cancelled buffers are recorded in pass 1 in a separate table and * there's nothing to replay from them so we can simply cull them * from the transaction. However, we can't do that until after we've * replayed all the other items because they may be dependent on the * cancelled buffer and replaying the cancelled buffer can remove it * form the cancelled buffer table. Hence they have tobe done last. * * 3. Inode allocation buffers must be replayed before inode items that * read the buffer and replay changes into it. For filesystems using the * ICREATE transactions, this means XFS_LI_ICREATE objects need to get * treated the same as inode allocation buffers as they create and * initialise the buffers directly. * * 4. Inode unlink buffers must be replayed after inode items are replayed. * This ensures that inodes are completely flushed to the inode buffer * in a "free" state before we remove the unlinked inode list pointer. * * Hence the ordering needs to be inode allocation buffers first, inode items * second, inode unlink buffers third and cancelled buffers last. * * But there's a problem with that - we can't tell an inode allocation buffer * apart from a regular buffer, so we can't separate them. We can, however, * tell an inode unlink buffer from the others, and so we can separate them out * from all the other buffers and move them to last. * * Hence, 4 lists, in order from head to tail: * - buffer_list for all buffers except cancelled/inode unlink buffers * - item_list for all non-buffer items * - inode_buffer_list for inode unlink buffers * - cancel_list for the cancelled buffers * * Note that we add objects to the tail of the lists so that first-to-last * ordering is preserved within the lists. Adding objects to the head of the * list means when we traverse from the head we walk them in last-to-first * order. For cancelled buffers and inode unlink buffers this doesn't matter, * but for all other items there may be specific ordering that we need to * preserve. */ STATIC int xlog_recover_reorder_trans( struct xlog *log, struct xlog_recover *trans, int pass) { struct xlog_recover_item *item, *n; int error = 0; LIST_HEAD(sort_list); LIST_HEAD(cancel_list); LIST_HEAD(buffer_list); LIST_HEAD(inode_buffer_list); LIST_HEAD(item_list); list_splice_init(&trans->r_itemq, &sort_list); list_for_each_entry_safe(item, n, &sort_list, ri_list) { enum xlog_recover_reorder fate = XLOG_REORDER_ITEM_LIST; item->ri_ops = xlog_find_item_ops(item); if (!item->ri_ops) { xfs_warn(log->l_mp, "%s: unrecognized type of log operation (%d)", __func__, ITEM_TYPE(item)); ASSERT(0); /* * return the remaining items back to the transaction * item list so they can be freed in caller. */ if (!list_empty(&sort_list)) list_splice_init(&sort_list, &trans->r_itemq); error = -EFSCORRUPTED; break; } if (item->ri_ops->reorder) fate = item->ri_ops->reorder(item); switch (fate) { case XLOG_REORDER_BUFFER_LIST: list_move_tail(&item->ri_list, &buffer_list); break; case XLOG_REORDER_CANCEL_LIST: trace_xfs_log_recover_item_reorder_head(log, trans, item, pass); list_move(&item->ri_list, &cancel_list); break; case XLOG_REORDER_INODE_BUFFER_LIST: list_move(&item->ri_list, &inode_buffer_list); break; case XLOG_REORDER_ITEM_LIST: trace_xfs_log_recover_item_reorder_tail(log, trans, item, pass); list_move_tail(&item->ri_list, &item_list); break; } } ASSERT(list_empty(&sort_list)); if (!list_empty(&buffer_list)) list_splice(&buffer_list, &trans->r_itemq); if (!list_empty(&item_list)) list_splice_tail(&item_list, &trans->r_itemq); if (!list_empty(&inode_buffer_list)) list_splice_tail(&inode_buffer_list, &trans->r_itemq); if (!list_empty(&cancel_list)) list_splice_tail(&cancel_list, &trans->r_itemq); return error; } void xlog_buf_readahead( struct xlog *log, xfs_daddr_t blkno, uint len, const struct xfs_buf_ops *ops) { if (!xlog_is_buffer_cancelled(log, blkno, len)) xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops); } /* * Create a deferred work structure for resuming and tracking the progress of a * log intent item that was found during recovery. */ void xlog_recover_intent_item( struct xlog *log, struct xfs_log_item *lip, xfs_lsn_t lsn, const struct xfs_defer_op_type *ops) { ASSERT(xlog_item_is_intent(lip)); xfs_defer_start_recovery(lip, &log->r_dfops, ops); /* * Insert the intent into the AIL directly and drop one reference so * that finishing or canceling the work will drop the other. */ xfs_trans_ail_insert(log->l_ailp, lip, lsn); lip->li_ops->iop_unpin(lip, 0); } STATIC int xlog_recover_items_pass2( struct xlog *log, struct xlog_recover *trans, struct list_head *buffer_list, struct list_head *item_list) { struct xlog_recover_item *item; int error = 0; list_for_each_entry(item, item_list, ri_list) { trace_xfs_log_recover_item_recover(log, trans, item, XLOG_RECOVER_PASS2); if (item->ri_ops->commit_pass2) error = item->ri_ops->commit_pass2(log, buffer_list, item, trans->r_lsn); if (error) return error; } return error; } /* * Perform the transaction. * * If the transaction modifies a buffer or inode, do it now. Otherwise, * EFIs and EFDs get queued up by adding entries into the AIL for them. */ STATIC int xlog_recover_commit_trans( struct xlog *log, struct xlog_recover *trans, int pass, struct list_head *buffer_list) { int error = 0; int items_queued = 0; struct xlog_recover_item *item; struct xlog_recover_item *next; LIST_HEAD (ra_list); LIST_HEAD (done_list); #define XLOG_RECOVER_COMMIT_QUEUE_MAX 100 hlist_del_init(&trans->r_list); error = xlog_recover_reorder_trans(log, trans, pass); if (error) return error; list_for_each_entry_safe(item, next, &trans->r_itemq, ri_list) { trace_xfs_log_recover_item_recover(log, trans, item, pass); switch (pass) { case XLOG_RECOVER_PASS1: if (item->ri_ops->commit_pass1) error = item->ri_ops->commit_pass1(log, item); break; case XLOG_RECOVER_PASS2: if (item->ri_ops->ra_pass2) item->ri_ops->ra_pass2(log, item); list_move_tail(&item->ri_list, &ra_list); items_queued++; if (items_queued >= XLOG_RECOVER_COMMIT_QUEUE_MAX) { error = xlog_recover_items_pass2(log, trans, buffer_list, &ra_list); list_splice_tail_init(&ra_list, &done_list); items_queued = 0; } break; default: ASSERT(0); } if (error) goto out; } out: if (!list_empty(&ra_list)) { if (!error) error = xlog_recover_items_pass2(log, trans, buffer_list, &ra_list); list_splice_tail_init(&ra_list, &done_list); } if (!list_empty(&done_list)) list_splice_init(&done_list, &trans->r_itemq); return error; } STATIC void xlog_recover_add_item( struct list_head *head) { struct xlog_recover_item *item; item = kzalloc(sizeof(struct xlog_recover_item), GFP_KERNEL | __GFP_NOFAIL); INIT_LIST_HEAD(&item->ri_list); list_add_tail(&item->ri_list, head); } STATIC int xlog_recover_add_to_cont_trans( struct xlog *log, struct xlog_recover *trans, char *dp, int len) { struct xlog_recover_item *item; char *ptr, *old_ptr; int old_len; /* * If the transaction is empty, the header was split across this and the * previous record. Copy the rest of the header. */ if (list_empty(&trans->r_itemq)) { ASSERT(len <= sizeof(struct xfs_trans_header)); if (len > sizeof(struct xfs_trans_header)) { xfs_warn(log->l_mp, "%s: bad header length", __func__); return -EFSCORRUPTED; } xlog_recover_add_item(&trans->r_itemq); ptr = (char *)&trans->r_theader + sizeof(struct xfs_trans_header) - len; memcpy(ptr, dp, len); return 0; } /* take the tail entry */ item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, ri_list); old_ptr = item->ri_buf[item->ri_cnt-1].i_addr; old_len = item->ri_buf[item->ri_cnt-1].i_len; ptr = kvrealloc(old_ptr, old_len, len + old_len, GFP_KERNEL); if (!ptr) return -ENOMEM; memcpy(&ptr[old_len], dp, len); item->ri_buf[item->ri_cnt-1].i_len += len; item->ri_buf[item->ri_cnt-1].i_addr = ptr; trace_xfs_log_recover_item_add_cont(log, trans, item, 0); return 0; } /* * The next region to add is the start of a new region. It could be * a whole region or it could be the first part of a new region. Because * of this, the assumption here is that the type and size fields of all * format structures fit into the first 32 bits of the structure. * * This works because all regions must be 32 bit aligned. Therefore, we * either have both fields or we have neither field. In the case we have * neither field, the data part of the region is zero length. We only have * a log_op_header and can throw away the header since a new one will appear * later. If we have at least 4 bytes, then we can determine how many regions * will appear in the current log item. */ STATIC int xlog_recover_add_to_trans( struct xlog *log, struct xlog_recover *trans, char *dp, int len) { struct xfs_inode_log_format *in_f; /* any will do */ struct xlog_recover_item *item; char *ptr; if (!len) return 0; if (list_empty(&trans->r_itemq)) { /* we need to catch log corruptions here */ if (*(uint *)dp != XFS_TRANS_HEADER_MAGIC) { xfs_warn(log->l_mp, "%s: bad header magic number", __func__); ASSERT(0); return -EFSCORRUPTED; } if (len > sizeof(struct xfs_trans_header)) { xfs_warn(log->l_mp, "%s: bad header length", __func__); ASSERT(0); return -EFSCORRUPTED; } /* * The transaction header can be arbitrarily split across op * records. If we don't have the whole thing here, copy what we * do have and handle the rest in the next record. */ if (len == sizeof(struct xfs_trans_header)) xlog_recover_add_item(&trans->r_itemq); memcpy(&trans->r_theader, dp, len); return 0; } ptr = xlog_kvmalloc(len); memcpy(ptr, dp, len); in_f = (struct xfs_inode_log_format *)ptr; /* take the tail entry */ item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, ri_list); if (item->ri_total != 0 && item->ri_total == item->ri_cnt) { /* tail item is in use, get a new one */ xlog_recover_add_item(&trans->r_itemq); item = list_entry(trans->r_itemq.prev, struct xlog_recover_item, ri_list); } if (item->ri_total == 0) { /* first region to be added */ if (in_f->ilf_size == 0 || in_f->ilf_size > XLOG_MAX_REGIONS_IN_ITEM) { xfs_warn(log->l_mp, "bad number of regions (%d) in inode log format", in_f->ilf_size); ASSERT(0); kvfree(ptr); return -EFSCORRUPTED; } item->ri_total = in_f->ilf_size; item->ri_buf = kzalloc(item->ri_total * sizeof(xfs_log_iovec_t), GFP_KERNEL | __GFP_NOFAIL); } if (item->ri_total <= item->ri_cnt) { xfs_warn(log->l_mp, "log item region count (%d) overflowed size (%d)", item->ri_cnt, item->ri_total); ASSERT(0); kvfree(ptr); return -EFSCORRUPTED; } /* Description region is ri_buf[0] */ item->ri_buf[item->ri_cnt].i_addr = ptr; item->ri_buf[item->ri_cnt].i_len = len; item->ri_cnt++; trace_xfs_log_recover_item_add(log, trans, item, 0); return 0; } /* * Free up any resources allocated by the transaction * * Remember that EFIs, EFDs, and IUNLINKs are handled later. */ STATIC void xlog_recover_free_trans( struct xlog_recover *trans) { struct xlog_recover_item *item, *n; int i; hlist_del_init(&trans->r_list); list_for_each_entry_safe(item, n, &trans->r_itemq, ri_list) { /* Free the regions in the item. */ list_del(&item->ri_list); for (i = 0; i < item->ri_cnt; i++) kvfree(item->ri_buf[i].i_addr); /* Free the item itself */ kfree(item->ri_buf); kfree(item); } /* Free the transaction recover structure */ kfree(trans); } /* * On error or completion, trans is freed. */ STATIC int xlog_recovery_process_trans( struct xlog *log, struct xlog_recover *trans, char *dp, unsigned int len, unsigned int flags, int pass, struct list_head *buffer_list) { int error = 0; bool freeit = false; /* mask off ophdr transaction container flags */ flags &= ~XLOG_END_TRANS; if (flags & XLOG_WAS_CONT_TRANS) flags &= ~XLOG_CONTINUE_TRANS; /* * Callees must not free the trans structure. We'll decide if we need to * free it or not based on the operation being done and it's result. */ switch (flags) { /* expected flag values */ case 0: case XLOG_CONTINUE_TRANS: error = xlog_recover_add_to_trans(log, trans, dp, len); break; case XLOG_WAS_CONT_TRANS: error = xlog_recover_add_to_cont_trans(log, trans, dp, len); break; case XLOG_COMMIT_TRANS: error = xlog_recover_commit_trans(log, trans, pass, buffer_list); /* success or fail, we are now done with this transaction. */ freeit = true; break; /* unexpected flag values */ case XLOG_UNMOUNT_TRANS: /* just skip trans */ xfs_warn(log->l_mp, "%s: Unmount LR", __func__); freeit = true; break; case XLOG_START_TRANS: default: xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags); ASSERT(0); error = -EFSCORRUPTED; break; } if (error || freeit) xlog_recover_free_trans(trans); return error; } /* * Lookup the transaction recovery structure associated with the ID in the * current ophdr. If the transaction doesn't exist and the start flag is set in * the ophdr, then allocate a new transaction for future ID matches to find. * Either way, return what we found during the lookup - an existing transaction * or nothing. */ STATIC struct xlog_recover * xlog_recover_ophdr_to_trans( struct hlist_head rhash[], struct xlog_rec_header *rhead, struct xlog_op_header *ohead) { struct xlog_recover *trans; xlog_tid_t tid; struct hlist_head *rhp; tid = be32_to_cpu(ohead->oh_tid); rhp = &rhash[XLOG_RHASH(tid)]; hlist_for_each_entry(trans, rhp, r_list) { if (trans->r_log_tid == tid) return trans; } /* * skip over non-start transaction headers - we could be * processing slack space before the next transaction starts */ if (!(ohead->oh_flags & XLOG_START_TRANS)) return NULL; ASSERT(be32_to_cpu(ohead->oh_len) == 0); /* * This is a new transaction so allocate a new recovery container to * hold the recovery ops that will follow. */ trans = kzalloc(sizeof(struct xlog_recover), GFP_KERNEL | __GFP_NOFAIL); trans->r_log_tid = tid; trans->r_lsn = be64_to_cpu(rhead->h_lsn); INIT_LIST_HEAD(&trans->r_itemq); INIT_HLIST_NODE(&trans->r_list); hlist_add_head(&trans->r_list, rhp); /* * Nothing more to do for this ophdr. Items to be added to this new * transaction will be in subsequent ophdr containers. */ return NULL; } STATIC int xlog_recover_process_ophdr( struct xlog *log, struct hlist_head rhash[], struct xlog_rec_header *rhead, struct xlog_op_header *ohead, char *dp, char *end, int pass, struct list_head *buffer_list) { struct xlog_recover *trans; unsigned int len; int error; /* Do we understand who wrote this op? */ if (ohead->oh_clientid != XFS_TRANSACTION && ohead->oh_clientid != XFS_LOG) { xfs_warn(log->l_mp, "%s: bad clientid 0x%x", __func__, ohead->oh_clientid); ASSERT(0); return -EFSCORRUPTED; } /* * Check the ophdr contains all the data it is supposed to contain. */ len = be32_to_cpu(ohead->oh_len); if (dp + len > end) { xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len); WARN_ON(1); return -EFSCORRUPTED; } trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead); if (!trans) { /* nothing to do, so skip over this ophdr */ return 0; } /* * The recovered buffer queue is drained only once we know that all * recovery items for the current LSN have been processed. This is * required because: * * - Buffer write submission updates the metadata LSN of the buffer. * - Log recovery skips items with a metadata LSN >= the current LSN of * the recovery item. * - Separate recovery items against the same metadata buffer can share * a current LSN. I.e., consider that the LSN of a recovery item is * defined as the starting LSN of the first record in which its * transaction appears, that a record can hold multiple transactions, * and/or that a transaction can span multiple records. * * In other words, we are allowed to submit a buffer from log recovery * once per current LSN. Otherwise, we may incorrectly skip recovery * items and cause corruption. * * We don't know up front whether buffers are updated multiple times per * LSN. Therefore, track the current LSN of each commit log record as it * is processed and drain the queue when it changes. Use commit records * because they are ordered correctly by the logging code. */ if (log->l_recovery_lsn != trans->r_lsn && ohead->oh_flags & XLOG_COMMIT_TRANS) { error = xfs_buf_delwri_submit(buffer_list); if (error) return error; log->l_recovery_lsn = trans->r_lsn; } return xlog_recovery_process_trans(log, trans, dp, len, ohead->oh_flags, pass, buffer_list); } /* * There are two valid states of the r_state field. 0 indicates that the * transaction structure is in a normal state. We have either seen the * start of the transaction or the last operation we added was not a partial * operation. If the last operation we added to the transaction was a * partial operation, we need to mark r_state with XLOG_WAS_CONT_TRANS. * * NOTE: skip LRs with 0 data length. */ STATIC int xlog_recover_process_data( struct xlog *log, struct hlist_head rhash[], struct xlog_rec_header *rhead, char *dp, int pass, struct list_head *buffer_list) { struct xlog_op_header *ohead; char *end; int num_logops; int error; end = dp + be32_to_cpu(rhead->h_len); num_logops = be32_to_cpu(rhead->h_num_logops); /* check the log format matches our own - else we can't recover */ if (xlog_header_check_recover(log->l_mp, rhead)) return -EIO; trace_xfs_log_recover_record(log, rhead, pass); while ((dp < end) && num_logops) { ohead = (struct xlog_op_header *)dp; dp += sizeof(*ohead); if (dp > end) { xfs_warn(log->l_mp, "%s: op header overrun", __func__); return -EFSCORRUPTED; } /* errors will abort recovery */ error = xlog_recover_process_ophdr(log, rhash, rhead, ohead, dp, end, pass, buffer_list); if (error) return error; dp += be32_to_cpu(ohead->oh_len); num_logops--; } return 0; } /* Take all the collected deferred ops and finish them in order. */ static int xlog_finish_defer_ops( struct xfs_mount *mp, struct list_head *capture_list) { struct xfs_defer_capture *dfc, *next; struct xfs_trans *tp; int error = 0; list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { struct xfs_trans_res resv; struct xfs_defer_resources dres; /* * Create a new transaction reservation from the captured * information. Set logcount to 1 to force the new transaction * to regrant every roll so that we can make forward progress * in recovery no matter how full the log might be. */ resv.tr_logres = dfc->dfc_logres; resv.tr_logcount = 1; resv.tr_logflags = XFS_TRANS_PERM_LOG_RES; error = xfs_trans_alloc(mp, &resv, dfc->dfc_blkres, dfc->dfc_rtxres, XFS_TRANS_RESERVE, &tp); if (error) { xlog_force_shutdown(mp->m_log, SHUTDOWN_LOG_IO_ERROR); return error; } /* * Transfer to this new transaction all the dfops we captured * from recovering a single intent item. */ list_del_init(&dfc->dfc_list); xfs_defer_ops_continue(dfc, tp, &dres); error = xfs_trans_commit(tp); xfs_defer_resources_rele(&dres); if (error) return error; } ASSERT(list_empty(capture_list)); return 0; } /* Release all the captured defer ops and capture structures in this list. */ static void xlog_abort_defer_ops( struct xfs_mount *mp, struct list_head *capture_list) { struct xfs_defer_capture *dfc; struct xfs_defer_capture *next; list_for_each_entry_safe(dfc, next, capture_list, dfc_list) { list_del_init(&dfc->dfc_list); xfs_defer_ops_capture_abort(mp, dfc); } } /* * When this is called, all of the log intent items which did not have * corresponding log done items should be in the AIL. What we do now is update * the data structures associated with each one. * * Since we process the log intent items in normal transactions, they will be * removed at some point after the commit. This prevents us from just walking * down the list processing each one. We'll use a flag in the intent item to * skip those that we've already processed and use the AIL iteration mechanism's * generation count to try to speed this up at least a bit. * * When we start, we know that the intents are the only things in the AIL. As we * process them, however, other items are added to the AIL. Hence we know we * have started recovery on all the pending intents when we find an non-intent * item in the AIL. */ STATIC int xlog_recover_process_intents( struct xlog *log) { LIST_HEAD(capture_list); struct xfs_defer_pending *dfp, *n; int error = 0; #if defined(DEBUG) || defined(XFS_WARN) xfs_lsn_t last_lsn; last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); #endif list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) { ASSERT(xlog_item_is_intent(dfp->dfp_intent)); /* * We should never see a redo item with a LSN higher than * the last transaction we found in the log at the start * of recovery. */ ASSERT(XFS_LSN_CMP(last_lsn, dfp->dfp_intent->li_lsn) >= 0); /* * NOTE: If your intent processing routine can create more * deferred ops, you /must/ attach them to the capture list in * the recover routine or else those subsequent intents will be * replayed in the wrong order! * * The recovery function can free the log item, so we must not * access dfp->dfp_intent after it returns. It must dispose of * @dfp if it returns 0. */ error = xfs_defer_finish_recovery(log->l_mp, dfp, &capture_list); if (error) break; } if (error) goto err; error = xlog_finish_defer_ops(log->l_mp, &capture_list); if (error) goto err; return 0; err: xlog_abort_defer_ops(log->l_mp, &capture_list); return error; } /* * A cancel occurs when the mount has failed and we're bailing out. Release all * pending log intent items that we haven't started recovery on so they don't * pin the AIL. */ STATIC void xlog_recover_cancel_intents( struct xlog *log) { struct xfs_defer_pending *dfp, *n; list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) { ASSERT(xlog_item_is_intent(dfp->dfp_intent)); xfs_defer_cancel_recovery(log->l_mp, dfp); } } /* * Transfer ownership of the recovered pending work to the recovery transaction * and try to finish the work. If there is more work to be done, the dfp will * remain attached to the transaction. If not, the dfp is freed. */ int xlog_recover_finish_intent( struct xfs_trans *tp, struct xfs_defer_pending *dfp) { int error; list_move(&dfp->dfp_list, &tp->t_dfops); error = xfs_defer_finish_one(tp, dfp); if (error == -EAGAIN) return 0; return error; } /* * This routine performs a transaction to null out a bad inode pointer * in an agi unlinked inode hash bucket. */ STATIC void xlog_recover_clear_agi_bucket( struct xfs_perag *pag, int bucket) { struct xfs_mount *mp = pag->pag_mount; struct xfs_trans *tp; struct xfs_agi *agi; struct xfs_buf *agibp; int offset; int error; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp); if (error) goto out_error; error = xfs_read_agi(pag, tp, 0, &agibp); if (error) goto out_abort; agi = agibp->b_addr; agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); offset = offsetof(xfs_agi_t, agi_unlinked) + (sizeof(xfs_agino_t) * bucket); xfs_trans_log_buf(tp, agibp, offset, (offset + sizeof(xfs_agino_t) - 1)); error = xfs_trans_commit(tp); if (error) goto out_error; return; out_abort: xfs_trans_cancel(tp); out_error: xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, pag->pag_agno); return; } static int xlog_recover_iunlink_bucket( struct xfs_perag *pag, struct xfs_agi *agi, int bucket) { struct xfs_mount *mp = pag->pag_mount; struct xfs_inode *prev_ip = NULL; struct xfs_inode *ip; xfs_agino_t prev_agino, agino; int error = 0; agino = be32_to_cpu(agi->agi_unlinked[bucket]); while (agino != NULLAGINO) { error = xfs_iget(mp, NULL, XFS_AGINO_TO_INO(mp, pag->pag_agno, agino), 0, 0, &ip); if (error) break; ASSERT(VFS_I(ip)->i_nlink == 0); ASSERT(VFS_I(ip)->i_mode != 0); xfs_iflags_clear(ip, XFS_IRECOVERY); agino = ip->i_next_unlinked; if (prev_ip) { ip->i_prev_unlinked = prev_agino; xfs_irele(prev_ip); /* * Ensure the inode is removed from the unlinked list * before we continue so that it won't race with * building the in-memory list here. This could be * serialised with the agibp lock, but that just * serialises via lockstepping and it's much simpler * just to flush the inodegc queue and wait for it to * complete. */ error = xfs_inodegc_flush(mp); if (error) break; } prev_agino = agino; prev_ip = ip; } if (prev_ip) { int error2; ip->i_prev_unlinked = prev_agino; xfs_irele(prev_ip); error2 = xfs_inodegc_flush(mp); if (error2 && !error) return error2; } return error; } /* * Recover AGI unlinked lists * * This is called during recovery to process any inodes which we unlinked but * not freed when the system crashed. These inodes will be on the lists in the * AGI blocks. What we do here is scan all the AGIs and fully truncate and free * any inodes found on the lists. Each inode is removed from the lists when it * has been fully truncated and is freed. The freeing of the inode and its * removal from the list must be atomic. * * If everything we touch in the agi processing loop is already in memory, this * loop can hold the cpu for a long time. It runs without lock contention, * memory allocation contention, the need wait for IO, etc, and so will run * until we either run out of inodes to process, run low on memory or we run out * of log space. * * This behaviour is bad for latency on single CPU and non-preemptible kernels, * and can prevent other filesystem work (such as CIL pushes) from running. This * can lead to deadlocks if the recovery process runs out of log reservation * space. Hence we need to yield the CPU when there is other kernel work * scheduled on this CPU to ensure other scheduled work can run without undue * latency. */ static void xlog_recover_iunlink_ag( struct xfs_perag *pag) { struct xfs_agi *agi; struct xfs_buf *agibp; int bucket; int error; error = xfs_read_agi(pag, NULL, 0, &agibp); if (error) { /* * AGI is b0rked. Don't process it. * * We should probably mark the filesystem as corrupt after we've * recovered all the ag's we can.... */ return; } /* * Unlock the buffer so that it can be acquired in the normal course of * the transaction to truncate and free each inode. Because we are not * racing with anyone else here for the AGI buffer, we don't even need * to hold it locked to read the initial unlinked bucket entries out of * the buffer. We keep buffer reference though, so that it stays pinned * in memory while we need the buffer. */ agi = agibp->b_addr; xfs_buf_unlock(agibp); for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) { error = xlog_recover_iunlink_bucket(pag, agi, bucket); if (error) { /* * Bucket is unrecoverable, so only a repair scan can * free the remaining unlinked inodes. Just empty the * bucket and remaining inodes on it unreferenced and * unfreeable. */ xlog_recover_clear_agi_bucket(pag, bucket); } } xfs_buf_rele(agibp); } static void xlog_recover_process_iunlinks( struct xlog *log) { struct xfs_perag *pag; xfs_agnumber_t agno; for_each_perag(log->l_mp, agno, pag) xlog_recover_iunlink_ag(pag); } STATIC void xlog_unpack_data( struct xlog_rec_header *rhead, char *dp, struct xlog *log) { int i, j, k; for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) && i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) { *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i]; dp += BBSIZE; } if (xfs_has_logv2(log->l_mp)) { xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead; for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) { j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k]; dp += BBSIZE; } } } /* * CRC check, unpack and process a log record. */ STATIC int xlog_recover_process( struct xlog *log, struct hlist_head rhash[], struct xlog_rec_header *rhead, char *dp, int pass, struct list_head *buffer_list) { __le32 old_crc = rhead->h_crc; __le32 crc; crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len)); /* * Nothing else to do if this is a CRC verification pass. Just return * if this a record with a non-zero crc. Unfortunately, mkfs always * sets old_crc to 0 so we must consider this valid even on v5 supers. * Otherwise, return EFSBADCRC on failure so the callers up the stack * know precisely what failed. */ if (pass == XLOG_RECOVER_CRCPASS) { if (old_crc && crc != old_crc) return -EFSBADCRC; return 0; } /* * We're in the normal recovery path. Issue a warning if and only if the * CRC in the header is non-zero. This is an advisory warning and the * zero CRC check prevents warnings from being emitted when upgrading * the kernel from one that does not add CRCs by default. */ if (crc != old_crc) { if (old_crc || xfs_has_crc(log->l_mp)) { xfs_alert(log->l_mp, "log record CRC mismatch: found 0x%x, expected 0x%x.", le32_to_cpu(old_crc), le32_to_cpu(crc)); xfs_hex_dump(dp, 32); } /* * If the filesystem is CRC enabled, this mismatch becomes a * fatal log corruption failure. */ if (xfs_has_crc(log->l_mp)) { XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp); return -EFSCORRUPTED; } } xlog_unpack_data(rhead, dp, log); return xlog_recover_process_data(log, rhash, rhead, dp, pass, buffer_list); } STATIC int xlog_valid_rec_header( struct xlog *log, struct xlog_rec_header *rhead, xfs_daddr_t blkno, int bufsize) { int hlen; if (XFS_IS_CORRUPT(log->l_mp, rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) return -EFSCORRUPTED; if (XFS_IS_CORRUPT(log->l_mp, (!rhead->h_version || (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) { xfs_warn(log->l_mp, "%s: unrecognised log version (%d).", __func__, be32_to_cpu(rhead->h_version)); return -EFSCORRUPTED; } /* * LR body must have data (or it wouldn't have been written) * and h_len must not be greater than LR buffer size. */ hlen = be32_to_cpu(rhead->h_len); if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > bufsize)) return -EFSCORRUPTED; if (XFS_IS_CORRUPT(log->l_mp, blkno > log->l_logBBsize || blkno > INT_MAX)) return -EFSCORRUPTED; return 0; } /* * Read the log from tail to head and process the log records found. * Handle the two cases where the tail and head are in the same cycle * and where the active portion of the log wraps around the end of * the physical log separately. The pass parameter is passed through * to the routines called to process the data and is not looked at * here. */ STATIC int xlog_do_recovery_pass( struct xlog *log, xfs_daddr_t head_blk, xfs_daddr_t tail_blk, int pass, xfs_daddr_t *first_bad) /* out: first bad log rec */ { xlog_rec_header_t *rhead; xfs_daddr_t blk_no, rblk_no; xfs_daddr_t rhead_blk; char *offset; char *hbp, *dbp; int error = 0, h_size, h_len; int error2 = 0; int bblks, split_bblks; int hblks = 1, split_hblks, wrapped_hblks; int i; struct hlist_head rhash[XLOG_RHASH_SIZE]; LIST_HEAD (buffer_list); ASSERT(head_blk != tail_blk); blk_no = rhead_blk = tail_blk; for (i = 0; i < XLOG_RHASH_SIZE; i++) INIT_HLIST_HEAD(&rhash[i]); hbp = xlog_alloc_buffer(log, hblks); if (!hbp) return -ENOMEM; /* * Read the header of the tail block and get the iclog buffer size from * h_size. Use this to tell how many sectors make up the log header. */ if (xfs_has_logv2(log->l_mp)) { /* * When using variable length iclogs, read first sector of * iclog header and extract the header size from it. Get a * new hbp that is the correct size. */ error = xlog_bread(log, tail_blk, 1, hbp, &offset); if (error) goto bread_err1; rhead = (xlog_rec_header_t *)offset; /* * xfsprogs has a bug where record length is based on lsunit but * h_size (iclog size) is hardcoded to 32k. Now that we * unconditionally CRC verify the unmount record, this means the * log buffer can be too small for the record and cause an * overrun. * * Detect this condition here. Use lsunit for the buffer size as * long as this looks like the mkfs case. Otherwise, return an * error to avoid a buffer overrun. */ h_size = be32_to_cpu(rhead->h_size); h_len = be32_to_cpu(rhead->h_len); if (h_len > h_size && h_len <= log->l_mp->m_logbsize && rhead->h_num_logops == cpu_to_be32(1)) { xfs_warn(log->l_mp, "invalid iclog size (%d bytes), using lsunit (%d bytes)", h_size, log->l_mp->m_logbsize); h_size = log->l_mp->m_logbsize; } error = xlog_valid_rec_header(log, rhead, tail_blk, h_size); if (error) goto bread_err1; /* * This open codes xlog_logrec_hblks so that we can reuse the * fixed up h_size value calculated above. Without that we'd * still allocate the buffer based on the incorrect on-disk * size. */ if (h_size > XLOG_HEADER_CYCLE_SIZE && (rhead->h_version & cpu_to_be32(XLOG_VERSION_2))) { hblks = DIV_ROUND_UP(h_size, XLOG_HEADER_CYCLE_SIZE); if (hblks > 1) { kvfree(hbp); hbp = xlog_alloc_buffer(log, hblks); if (!hbp) return -ENOMEM; } } } else { ASSERT(log->l_sectBBsize == 1); h_size = XLOG_BIG_RECORD_BSIZE; } dbp = xlog_alloc_buffer(log, BTOBB(h_size)); if (!dbp) { kvfree(hbp); return -ENOMEM; } memset(rhash, 0, sizeof(rhash)); if (tail_blk > head_blk) { /* * Perform recovery around the end of the physical log. * When the head is not on the same cycle number as the tail, * we can't do a sequential recovery. */ while (blk_no < log->l_logBBsize) { /* * Check for header wrapping around physical end-of-log */ offset = hbp; split_hblks = 0; wrapped_hblks = 0; if (blk_no + hblks <= log->l_logBBsize) { /* Read header in one read */ error = xlog_bread(log, blk_no, hblks, hbp, &offset); if (error) goto bread_err2; } else { /* This LR is split across physical log end */ if (blk_no != log->l_logBBsize) { /* some data before physical log end */ ASSERT(blk_no <= INT_MAX); split_hblks = log->l_logBBsize - (int)blk_no; ASSERT(split_hblks > 0); error = xlog_bread(log, blk_no, split_hblks, hbp, &offset); if (error) goto bread_err2; } /* * Note: this black magic still works with * large sector sizes (non-512) only because: * - we increased the buffer size originally * by 1 sector giving us enough extra space * for the second read; * - the log start is guaranteed to be sector * aligned; * - we read the log end (LR header start) * _first_, then the log start (LR header end) * - order is important. */ wrapped_hblks = hblks - split_hblks; error = xlog_bread_noalign(log, 0, wrapped_hblks, offset + BBTOB(split_hblks)); if (error) goto bread_err2; } rhead = (xlog_rec_header_t *)offset; error = xlog_valid_rec_header(log, rhead, split_hblks ? blk_no : 0, h_size); if (error) goto bread_err2; bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); blk_no += hblks; /* * Read the log record data in multiple reads if it * wraps around the end of the log. Note that if the * header already wrapped, blk_no could point past the * end of the log. The record data is contiguous in * that case. */ if (blk_no + bblks <= log->l_logBBsize || blk_no >= log->l_logBBsize) { rblk_no = xlog_wrap_logbno(log, blk_no); error = xlog_bread(log, rblk_no, bblks, dbp, &offset); if (error) goto bread_err2; } else { /* This log record is split across the * physical end of log */ offset = dbp; split_bblks = 0; if (blk_no != log->l_logBBsize) { /* some data is before the physical * end of log */ ASSERT(!wrapped_hblks); ASSERT(blk_no <= INT_MAX); split_bblks = log->l_logBBsize - (int)blk_no; ASSERT(split_bblks > 0); error = xlog_bread(log, blk_no, split_bblks, dbp, &offset); if (error) goto bread_err2; } /* * Note: this black magic still works with * large sector sizes (non-512) only because: * - we increased the buffer size originally * by 1 sector giving us enough extra space * for the second read; * - the log start is guaranteed to be sector * aligned; * - we read the log end (LR header start) * _first_, then the log start (LR header end) * - order is important. */ error = xlog_bread_noalign(log, 0, bblks - split_bblks, offset + BBTOB(split_bblks)); if (error) goto bread_err2; } error = xlog_recover_process(log, rhash, rhead, offset, pass, &buffer_list); if (error) goto bread_err2; blk_no += bblks; rhead_blk = blk_no; } ASSERT(blk_no >= log->l_logBBsize); blk_no -= log->l_logBBsize; rhead_blk = blk_no; } /* read first part of physical log */ while (blk_no < head_blk) { error = xlog_bread(log, blk_no, hblks, hbp, &offset); if (error) goto bread_err2; rhead = (xlog_rec_header_t *)offset; error = xlog_valid_rec_header(log, rhead, blk_no, h_size); if (error) goto bread_err2; /* blocks in data section */ bblks = (int)BTOBB(be32_to_cpu(rhead->h_len)); error = xlog_bread(log, blk_no+hblks, bblks, dbp, &offset); if (error) goto bread_err2; error = xlog_recover_process(log, rhash, rhead, offset, pass, &buffer_list); if (error) goto bread_err2; blk_no += bblks + hblks; rhead_blk = blk_no; } bread_err2: kvfree(dbp); bread_err1: kvfree(hbp); /* * Submit buffers that have been dirtied by the last record recovered. */ if (!list_empty(&buffer_list)) { if (error) { /* * If there has been an item recovery error then we * cannot allow partial checkpoint writeback to * occur. We might have multiple checkpoints with the * same start LSN in this buffer list, and partial * writeback of a checkpoint in this situation can * prevent future recovery of all the changes in the * checkpoints at this start LSN. * * Note: Shutting down the filesystem will result in the * delwri submission marking all the buffers stale, * completing them and cleaning up _XBF_LOGRECOVERY * state without doing any IO. */ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); } error2 = xfs_buf_delwri_submit(&buffer_list); } if (error && first_bad) *first_bad = rhead_blk; /* * Transactions are freed at commit time but transactions without commit * records on disk are never committed. Free any that may be left in the * hash table. */ for (i = 0; i < XLOG_RHASH_SIZE; i++) { struct hlist_node *tmp; struct xlog_recover *trans; hlist_for_each_entry_safe(trans, tmp, &rhash[i], r_list) xlog_recover_free_trans(trans); } return error ? error : error2; } /* * Do the recovery of the log. We actually do this in two phases. * The two passes are necessary in order to implement the function * of cancelling a record written into the log. The first pass * determines those things which have been cancelled, and the * second pass replays log items normally except for those which * have been cancelled. The handling of the replay and cancellations * takes place in the log item type specific routines. * * The table of items which have cancel records in the log is allocated * and freed at this level, since only here do we know when all of * the log recovery has been completed. */ STATIC int xlog_do_log_recovery( struct xlog *log, xfs_daddr_t head_blk, xfs_daddr_t tail_blk) { int error; ASSERT(head_blk != tail_blk); /* * First do a pass to find all of the cancelled buf log items. * Store them in the buf_cancel_table for use in the second pass. */ error = xlog_alloc_buf_cancel_table(log); if (error) return error; error = xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS1, NULL); if (error != 0) goto out_cancel; /* * Then do a second pass to actually recover the items in the log. * When it is complete free the table of buf cancel items. */ error = xlog_do_recovery_pass(log, head_blk, tail_blk, XLOG_RECOVER_PASS2, NULL); if (!error) xlog_check_buf_cancel_table(log); out_cancel: xlog_free_buf_cancel_table(log); return error; } /* * Do the actual recovery */ STATIC int xlog_do_recover( struct xlog *log, xfs_daddr_t head_blk, xfs_daddr_t tail_blk) { struct xfs_mount *mp = log->l_mp; struct xfs_buf *bp = mp->m_sb_bp; struct xfs_sb *sbp = &mp->m_sb; int error; trace_xfs_log_recover(log, head_blk, tail_blk); /* * First replay the images in the log. */ error = xlog_do_log_recovery(log, head_blk, tail_blk); if (error) return error; if (xlog_is_shutdown(log)) return -EIO; /* * We now update the tail_lsn since much of the recovery has completed * and there may be space available to use. If there were no extent or * iunlinks, we can free up the entire log. This was set in * xlog_find_tail to be the lsn of the last known good LR on disk. If * there are extent frees or iunlinks they will have some entries in the * AIL; so we look at the AIL to determine how to set the tail_lsn. */ xfs_ail_assign_tail_lsn(log->l_ailp); /* * Now that we've finished replaying all buffer and inode updates, * re-read the superblock and reverify it. */ xfs_buf_lock(bp); xfs_buf_hold(bp); error = _xfs_buf_read(bp, XBF_READ); if (error) { if (!xlog_is_shutdown(log)) { xfs_buf_ioerror_alert(bp, __this_address); ASSERT(0); } xfs_buf_relse(bp); return error; } /* Convert superblock from on-disk format */ xfs_sb_from_disk(sbp, bp->b_addr); xfs_buf_relse(bp); /* re-initialise in-core superblock and geometry structures */ mp->m_features |= xfs_sb_version_to_features(sbp); xfs_reinit_percpu_counters(mp); error = xfs_initialize_perag(mp, sbp->sb_agcount, sbp->sb_dblocks, &mp->m_maxagi); if (error) { xfs_warn(mp, "Failed post-recovery per-ag init: %d", error); return error; } mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); /* Normal transactions can now occur */ clear_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate); return 0; } /* * Perform recovery and re-initialize some log variables in xlog_find_tail. * * Return error or zero. */ int xlog_recover( struct xlog *log) { xfs_daddr_t head_blk, tail_blk; int error; /* find the tail of the log */ error = xlog_find_tail(log, &head_blk, &tail_blk); if (error) return error; /* * The superblock was read before the log was available and thus the LSN * could not be verified. Check the superblock LSN against the current * LSN now that it's known. */ if (xfs_has_crc(log->l_mp) && !xfs_log_check_lsn(log->l_mp, log->l_mp->m_sb.sb_lsn)) return -EINVAL; if (tail_blk != head_blk) { /* There used to be a comment here: * * disallow recovery on read-only mounts. note -- mount * checks for ENOSPC and turns it into an intelligent * error message. * ...but this is no longer true. Now, unless you specify * NORECOVERY (in which case this function would never be * called), we just go ahead and recover. We do this all * under the vfs layer, so we can get away with it unless * the device itself is read-only, in which case we fail. */ if ((error = xfs_dev_is_read_only(log->l_mp, "recovery"))) { return error; } /* * Version 5 superblock log feature mask validation. We know the * log is dirty so check if there are any unknown log features * in what we need to recover. If there are unknown features * (e.g. unsupported transactions, then simply reject the * attempt at recovery before touching anything. */ if (xfs_sb_is_v5(&log->l_mp->m_sb) && xfs_sb_has_incompat_log_feature(&log->l_mp->m_sb, XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)) { xfs_warn(log->l_mp, "Superblock has unknown incompatible log features (0x%x) enabled.", (log->l_mp->m_sb.sb_features_log_incompat & XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN)); xfs_warn(log->l_mp, "The log can not be fully and/or safely recovered by this kernel."); xfs_warn(log->l_mp, "Please recover the log on a kernel that supports the unknown features."); return -EINVAL; } /* * Delay log recovery if the debug hook is set. This is debug * instrumentation to coordinate simulation of I/O failures with * log recovery. */ if (xfs_globals.log_recovery_delay) { xfs_notice(log->l_mp, "Delaying log recovery for %d seconds.", xfs_globals.log_recovery_delay); msleep(xfs_globals.log_recovery_delay * 1000); } xfs_notice(log->l_mp, "Starting recovery (logdev: %s)", log->l_mp->m_logname ? log->l_mp->m_logname : "internal"); error = xlog_do_recover(log, head_blk, tail_blk); set_bit(XLOG_RECOVERY_NEEDED, &log->l_opstate); } return error; } /* * In the first part of recovery we replay inodes and buffers and build up the * list of intents which need to be processed. Here we process the intents and * clean up the on disk unlinked inode lists. This is separated from the first * part of recovery so that the root and real-time bitmap inodes can be read in * from disk in between the two stages. This is necessary so that we can free * space in the real-time portion of the file system. * * We run this whole process under GFP_NOFS allocation context. We do a * combination of non-transactional and transactional work, yet we really don't * want to recurse into the filesystem from direct reclaim during any of this * processing. This allows all the recovery code run here not to care about the * memory allocation context it is running in. */ int xlog_recover_finish( struct xlog *log) { unsigned int nofs_flags = memalloc_nofs_save(); int error; error = xlog_recover_process_intents(log); if (error) { /* * Cancel all the unprocessed intent items now so that we don't * leave them pinned in the AIL. This can cause the AIL to * livelock on the pinned item if anyone tries to push the AIL * (inode reclaim does this) before we get around to * xfs_log_mount_cancel. */ xlog_recover_cancel_intents(log); xfs_alert(log->l_mp, "Failed to recover intents"); xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); goto out_error; } /* * Sync the log to get all the intents out of the AIL. This isn't * absolutely necessary, but it helps in case the unlink transactions * would have problems pushing the intents out of the way. */ xfs_log_force(log->l_mp, XFS_LOG_SYNC); xlog_recover_process_iunlinks(log); /* * Recover any CoW staging blocks that are still referenced by the * ondisk refcount metadata. During mount there cannot be any live * staging extents as we have not permitted any user modifications. * Therefore, it is safe to free them all right now, even on a * read-only mount. */ error = xfs_reflink_recover_cow(log->l_mp); if (error) { xfs_alert(log->l_mp, "Failed to recover leftover CoW staging extents, err %d.", error); /* * If we get an error here, make sure the log is shut down * but return zero so that any log items committed since the * end of intents processing can be pushed through the CIL * and AIL. */ xlog_force_shutdown(log, SHUTDOWN_LOG_IO_ERROR); error = 0; goto out_error; } out_error: memalloc_nofs_restore(nofs_flags); return error; } void xlog_recover_cancel( struct xlog *log) { if (xlog_recovery_needed(log)) xlog_recover_cancel_intents(log); } |
| 2 1 1 2 2 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 | // SPDX-License-Identifier: GPL-2.0-only /* * Generic netlink handshake service * * Author: Chuck Lever <chuck.lever@oracle.com> * * Copyright (c) 2023, Oracle and/or its affiliates. */ #include <linux/types.h> #include <linux/socket.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/mm.h> #include <net/sock.h> #include <net/genetlink.h> #include <net/netns/generic.h> #include <kunit/visibility.h> #include <uapi/linux/handshake.h> #include "handshake.h" #include "genl.h" #include <trace/events/handshake.h> /** * handshake_genl_notify - Notify handlers that a request is waiting * @net: target network namespace * @proto: handshake protocol * @flags: memory allocation control flags * * Returns zero on success or a negative errno if notification failed. */ int handshake_genl_notify(struct net *net, const struct handshake_proto *proto, gfp_t flags) { struct sk_buff *msg; void *hdr; /* Disable notifications during unit testing */ if (!test_bit(HANDSHAKE_F_PROTO_NOTIFY, &proto->hp_flags)) return 0; if (!genl_has_listeners(&handshake_nl_family, net, proto->hp_handler_class)) return -ESRCH; msg = genlmsg_new(GENLMSG_DEFAULT_SIZE, flags); if (!msg) return -ENOMEM; hdr = genlmsg_put(msg, 0, 0, &handshake_nl_family, 0, HANDSHAKE_CMD_READY); if (!hdr) goto out_free; if (nla_put_u32(msg, HANDSHAKE_A_ACCEPT_HANDLER_CLASS, proto->hp_handler_class) < 0) { genlmsg_cancel(msg, hdr); goto out_free; } genlmsg_end(msg, hdr); return genlmsg_multicast_netns(&handshake_nl_family, net, msg, 0, proto->hp_handler_class, flags); out_free: nlmsg_free(msg); return -EMSGSIZE; } /** * handshake_genl_put - Create a generic netlink message header * @msg: buffer in which to create the header * @info: generic netlink message context * * Returns a ready-to-use header, or NULL. */ struct nlmsghdr *handshake_genl_put(struct sk_buff *msg, struct genl_info *info) { return genlmsg_put(msg, info->snd_portid, info->snd_seq, &handshake_nl_family, 0, info->genlhdr->cmd); } EXPORT_SYMBOL(handshake_genl_put); int handshake_nl_accept_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct handshake_net *hn = handshake_pernet(net); struct handshake_req *req = NULL; struct socket *sock; int class, fd, err; err = -EOPNOTSUPP; if (!hn) goto out_status; err = -EINVAL; if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_ACCEPT_HANDLER_CLASS)) goto out_status; class = nla_get_u32(info->attrs[HANDSHAKE_A_ACCEPT_HANDLER_CLASS]); err = -EAGAIN; req = handshake_req_next(hn, class); if (!req) goto out_status; sock = req->hr_sk->sk_socket; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { err = fd; goto out_complete; } err = req->hr_proto->hp_accept(req, info, fd); if (err) { put_unused_fd(fd); goto out_complete; } fd_install(fd, get_file(sock->file)); trace_handshake_cmd_accept(net, req, req->hr_sk, fd); return 0; out_complete: handshake_complete(req, -EIO, NULL); out_status: trace_handshake_cmd_accept_err(net, req, NULL, err); return err; } int handshake_nl_done_doit(struct sk_buff *skb, struct genl_info *info) { struct net *net = sock_net(skb->sk); struct handshake_req *req; struct socket *sock; int fd, status, err; if (GENL_REQ_ATTR_CHECK(info, HANDSHAKE_A_DONE_SOCKFD)) return -EINVAL; fd = nla_get_s32(info->attrs[HANDSHAKE_A_DONE_SOCKFD]); sock = sockfd_lookup(fd, &err); if (!sock) return err; req = handshake_req_hash_lookup(sock->sk); if (!req) { err = -EBUSY; trace_handshake_cmd_done_err(net, req, sock->sk, err); fput(sock->file); return err; } trace_handshake_cmd_done(net, req, sock->sk, fd); status = -EIO; if (info->attrs[HANDSHAKE_A_DONE_STATUS]) status = nla_get_u32(info->attrs[HANDSHAKE_A_DONE_STATUS]); handshake_complete(req, status, info); fput(sock->file); return 0; } static unsigned int handshake_net_id; static int __net_init handshake_net_init(struct net *net) { struct handshake_net *hn = net_generic(net, handshake_net_id); unsigned long tmp; struct sysinfo si; /* * Arbitrary limit to prevent handshakes that do not make * progress from clogging up the system. The cap scales up * with the amount of physical memory on the system. */ si_meminfo(&si); tmp = si.totalram / (25 * si.mem_unit); hn->hn_pending_max = clamp(tmp, 3UL, 50UL); spin_lock_init(&hn->hn_lock); hn->hn_pending = 0; hn->hn_flags = 0; INIT_LIST_HEAD(&hn->hn_requests); return 0; } static void __net_exit handshake_net_exit(struct net *net) { struct handshake_net *hn = net_generic(net, handshake_net_id); struct handshake_req *req; LIST_HEAD(requests); /* * Drain the net's pending list. Requests that have been * accepted and are in progress will be destroyed when * the socket is closed. */ spin_lock(&hn->hn_lock); set_bit(HANDSHAKE_F_NET_DRAINING, &hn->hn_flags); list_splice_init(&requests, &hn->hn_requests); spin_unlock(&hn->hn_lock); while (!list_empty(&requests)) { req = list_first_entry(&requests, struct handshake_req, hr_list); list_del(&req->hr_list); /* * Requests on this list have not yet been * accepted, so they do not have an fd to put. */ handshake_complete(req, -ETIMEDOUT, NULL); } } static struct pernet_operations handshake_genl_net_ops = { .init = handshake_net_init, .exit = handshake_net_exit, .id = &handshake_net_id, .size = sizeof(struct handshake_net), }; /** * handshake_pernet - Get the handshake private per-net structure * @net: network namespace * * Returns a pointer to the net's private per-net structure for the * handshake module, or NULL if handshake_init() failed. */ struct handshake_net *handshake_pernet(struct net *net) { return handshake_net_id ? net_generic(net, handshake_net_id) : NULL; } EXPORT_SYMBOL_IF_KUNIT(handshake_pernet); static int __init handshake_init(void) { int ret; ret = handshake_req_hash_init(); if (ret) { pr_warn("handshake: hash initialization failed (%d)\n", ret); return ret; } ret = genl_register_family(&handshake_nl_family); if (ret) { pr_warn("handshake: netlink registration failed (%d)\n", ret); handshake_req_hash_destroy(); return ret; } /* * ORDER: register_pernet_subsys must be done last. * * If initialization does not make it past pernet_subsys * registration, then handshake_net_id will remain 0. That * shunts the handshake consumer API to return ENOTSUPP * to prevent it from dereferencing something that hasn't * been allocated. */ ret = register_pernet_subsys(&handshake_genl_net_ops); if (ret) { pr_warn("handshake: pernet registration failed (%d)\n", ret); genl_unregister_family(&handshake_nl_family); handshake_req_hash_destroy(); } return ret; } static void __exit handshake_exit(void) { unregister_pernet_subsys(&handshake_genl_net_ops); handshake_net_id = 0; handshake_req_hash_destroy(); genl_unregister_family(&handshake_nl_family); } module_init(handshake_init); module_exit(handshake_exit); |
| 17 14 8 1 37 36 36 18 36 4 36 11 2 3 20 34 29 4 26 33 33 27 25 26 1 38 38 30 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/isofs/namei.c * * (C) 1992 Eric Youngdale Modified for ISO 9660 filesystem. * * (C) 1991 Linus Torvalds - minix filesystem */ #include <linux/gfp.h> #include "isofs.h" /* * ok, we cannot use strncmp, as the name is not in our data space. * Thus we'll have to use isofs_match. No big problem. Match also makes * some sanity tests. */ static int isofs_cmp(struct dentry *dentry, const char *compare, int dlen) { struct qstr qstr; qstr.name = compare; qstr.len = dlen; if (likely(!dentry->d_op)) return dentry->d_name.len != dlen || memcmp(dentry->d_name.name, compare, dlen); return dentry->d_op->d_compare(NULL, dentry->d_name.len, dentry->d_name.name, &qstr); } /* * isofs_find_entry() * * finds an entry in the specified directory with the wanted name. It * returns the inode number of the found entry, or 0 on error. */ static unsigned long isofs_find_entry(struct inode *dir, struct dentry *dentry, unsigned long *block_rv, unsigned long *offset_rv, char *tmpname, struct iso_directory_record *tmpde) { unsigned long bufsize = ISOFS_BUFFER_SIZE(dir); unsigned char bufbits = ISOFS_BUFFER_BITS(dir); unsigned long block, f_pos, offset, block_saved, offset_saved; struct buffer_head *bh = NULL; struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb); if (!ISOFS_I(dir)->i_first_extent) return 0; f_pos = 0; offset = 0; block = 0; while (f_pos < dir->i_size) { struct iso_directory_record *de; int de_len, match, i, dlen; char *dpnt; if (!bh) { bh = isofs_bread(dir, block); if (!bh) return 0; } de = (struct iso_directory_record *) (bh->b_data + offset); de_len = *(unsigned char *) de; if (!de_len) { brelse(bh); bh = NULL; f_pos = (f_pos + ISOFS_BLOCK_SIZE) & ~(ISOFS_BLOCK_SIZE - 1); block = f_pos >> bufbits; offset = 0; continue; } block_saved = bh->b_blocknr; offset_saved = offset; offset += de_len; f_pos += de_len; /* Make sure we have a full directory entry */ if (offset >= bufsize) { int slop = bufsize - offset + de_len; memcpy(tmpde, de, slop); offset &= bufsize - 1; block++; brelse(bh); bh = NULL; if (offset) { bh = isofs_bread(dir, block); if (!bh) return 0; memcpy((void *) tmpde + slop, bh->b_data, offset); } de = tmpde; } dlen = de->name_len[0]; dpnt = de->name; /* Basic sanity check, whether name doesn't exceed dir entry */ if (de_len < dlen + sizeof(struct iso_directory_record)) { printk(KERN_NOTICE "iso9660: Corrupted directory entry" " in block %lu of inode %lu\n", block, dir->i_ino); brelse(bh); return 0; } if (sbi->s_rock && ((i = get_rock_ridge_filename(de, tmpname, dir)))) { dlen = i; /* possibly -1 */ dpnt = tmpname; #ifdef CONFIG_JOLIET } else if (sbi->s_joliet_level) { dlen = get_joliet_filename(de, tmpname, dir); dpnt = tmpname; #endif } else if (sbi->s_mapping == 'a') { dlen = get_acorn_filename(de, tmpname, dir); dpnt = tmpname; } else if (sbi->s_mapping == 'n') { dlen = isofs_name_translate(de, tmpname, dir); dpnt = tmpname; } /* * Skip hidden or associated files unless hide or showassoc, * respectively, is set */ match = 0; if (dlen > 0 && (!sbi->s_hide || (!(de->flags[-sbi->s_high_sierra] & 1))) && (sbi->s_showassoc || (!(de->flags[-sbi->s_high_sierra] & 4)))) { if (dpnt && (dlen > 1 || dpnt[0] > 1)) match = (isofs_cmp(dentry, dpnt, dlen) == 0); } if (match) { isofs_normalize_block_and_offset(de, &block_saved, &offset_saved); *block_rv = block_saved; *offset_rv = offset_saved; brelse(bh); return 1; } } brelse(bh); return 0; } struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { int found; unsigned long block; unsigned long offset; struct inode *inode; struct page *page; page = alloc_page(GFP_USER); if (!page) return ERR_PTR(-ENOMEM); found = isofs_find_entry(dir, dentry, &block, &offset, page_address(page), 1024 + page_address(page)); __free_page(page); inode = found ? isofs_iget(dir->i_sb, block, offset) : NULL; return d_splice_alias(inode, dentry); } |
| 8559 8562 1 129 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/ethtool.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <net/net_namespace.h> #include <linux/if_arp.h> #include <net/rtnetlink.h> static netdev_tx_t nlmon_xmit(struct sk_buff *skb, struct net_device *dev) { dev_lstats_add(dev, skb->len); dev_kfree_skb(skb); return NETDEV_TX_OK; } struct nlmon { struct netlink_tap nt; }; static int nlmon_open(struct net_device *dev) { struct nlmon *nlmon = netdev_priv(dev); nlmon->nt.dev = dev; nlmon->nt.module = THIS_MODULE; return netlink_add_tap(&nlmon->nt); } static int nlmon_close(struct net_device *dev) { struct nlmon *nlmon = netdev_priv(dev); return netlink_remove_tap(&nlmon->nt); } static void nlmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { dev_lstats_read(dev, &stats->rx_packets, &stats->rx_bytes); } static u32 always_on(struct net_device *dev) { return 1; } static const struct ethtool_ops nlmon_ethtool_ops = { .get_link = always_on, }; static const struct net_device_ops nlmon_ops = { .ndo_open = nlmon_open, .ndo_stop = nlmon_close, .ndo_start_xmit = nlmon_xmit, .ndo_get_stats64 = nlmon_get_stats64, }; static void nlmon_setup(struct net_device *dev) { dev->type = ARPHRD_NETLINK; dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &nlmon_ops; dev->ethtool_ops = &nlmon_ethtool_ops; dev->needs_free_netdev = true; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | NETIF_F_LLTX; dev->flags = IFF_NOARP; dev->pcpu_stat_type = NETDEV_PCPU_STAT_LSTATS; /* That's rather a softlimit here, which, of course, * can be altered. Not a real MTU, but what is to be * expected in most cases. */ dev->mtu = NLMSG_GOODSIZE; dev->min_mtu = sizeof(struct nlmsghdr); } static int nlmon_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) return -EINVAL; return 0; } static struct rtnl_link_ops nlmon_link_ops __read_mostly = { .kind = "nlmon", .priv_size = sizeof(struct nlmon), .setup = nlmon_setup, .validate = nlmon_validate, }; static __init int nlmon_register(void) { return rtnl_link_register(&nlmon_link_ops); } static __exit void nlmon_unregister(void) { rtnl_link_unregister(&nlmon_link_ops); } module_init(nlmon_register); module_exit(nlmon_unregister); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>"); MODULE_AUTHOR("Mathieu Geli <geli@enseirb.fr>"); MODULE_DESCRIPTION("Netlink monitoring device"); MODULE_ALIAS_RTNL_LINK("nlmon"); |
| 4 1 1 2 1 1 8 3 1 1 4 1 37 6 29 112 2 60 1 171 1 1 1 64 2 64 6 5 1 1 125 28 1 24 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 5282 5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296 5297 5298 5299 5300 5301 5302 5303 5304 5305 5306 5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319 5320 5321 5322 5323 5324 5325 5326 5327 5328 5329 5330 5331 5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355 5356 5357 5358 5359 5360 5361 5362 5363 5364 5365 5366 5367 5368 5369 5370 5371 5372 5373 5374 5375 5376 5377 5378 5379 5380 5381 5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394 5395 5396 5397 5398 5399 5400 5401 5402 5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416 5417 5418 5419 5420 5421 5422 5423 5424 5425 5426 5427 5428 5429 5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445 5446 5447 5448 5449 5450 5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462 5463 5464 5465 5466 5467 5468 5469 5470 5471 5472 5473 5474 5475 5476 5477 5478 5479 5480 5481 5482 5483 5484 5485 5486 5487 5488 5489 5490 5491 5492 5493 5494 5495 5496 5497 5498 5499 5500 5501 5502 5503 5504 5505 5506 5507 5508 5509 5510 5511 5512 5513 5514 5515 5516 5517 5518 5519 5520 5521 5522 5523 5524 5525 5526 5527 5528 5529 5530 5531 5532 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * IEEE 802.11 defines * * Copyright (c) 2001-2002, SSH Communications Security Corp and Jouni Malinen * <jkmaline@cc.hut.fi> * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi> * Copyright (c) 2005, Devicescape Software, Inc. * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net> * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH * Copyright (c) 2016 - 2017 Intel Deutschland GmbH * Copyright (c) 2018 - 2024 Intel Corporation */ #ifndef LINUX_IEEE80211_H #define LINUX_IEEE80211_H #include <linux/types.h> #include <linux/if_ether.h> #include <linux/etherdevice.h> #include <linux/bitfield.h> #include <asm/byteorder.h> #include <asm/unaligned.h> /* * DS bit usage * * TA = transmitter address * RA = receiver address * DA = destination address * SA = source address * * ToDS FromDS A1(RA) A2(TA) A3 A4 Use * ----------------------------------------------------------------- * 0 0 DA SA BSSID - IBSS/DLS * 0 1 DA BSSID SA - AP -> STA * 1 0 BSSID SA DA - AP <- STA * 1 1 RA TA DA SA unspecified (WDS) */ #define FCS_LEN 4 #define IEEE80211_FCTL_VERS 0x0003 #define IEEE80211_FCTL_FTYPE 0x000c #define IEEE80211_FCTL_STYPE 0x00f0 #define IEEE80211_FCTL_TODS 0x0100 #define IEEE80211_FCTL_FROMDS 0x0200 #define IEEE80211_FCTL_MOREFRAGS 0x0400 #define IEEE80211_FCTL_RETRY 0x0800 #define IEEE80211_FCTL_PM 0x1000 #define IEEE80211_FCTL_MOREDATA 0x2000 #define IEEE80211_FCTL_PROTECTED 0x4000 #define IEEE80211_FCTL_ORDER 0x8000 #define IEEE80211_FCTL_CTL_EXT 0x0f00 #define IEEE80211_SCTL_FRAG 0x000F #define IEEE80211_SCTL_SEQ 0xFFF0 #define IEEE80211_FTYPE_MGMT 0x0000 #define IEEE80211_FTYPE_CTL 0x0004 #define IEEE80211_FTYPE_DATA 0x0008 #define IEEE80211_FTYPE_EXT 0x000c /* management */ #define IEEE80211_STYPE_ASSOC_REQ 0x0000 #define IEEE80211_STYPE_ASSOC_RESP 0x0010 #define IEEE80211_STYPE_REASSOC_REQ 0x0020 #define IEEE80211_STYPE_REASSOC_RESP 0x0030 #define IEEE80211_STYPE_PROBE_REQ 0x0040 #define IEEE80211_STYPE_PROBE_RESP 0x0050 #define IEEE80211_STYPE_BEACON 0x0080 #define IEEE80211_STYPE_ATIM 0x0090 #define IEEE80211_STYPE_DISASSOC 0x00A0 #define IEEE80211_STYPE_AUTH 0x00B0 #define IEEE80211_STYPE_DEAUTH 0x00C0 #define IEEE80211_STYPE_ACTION 0x00D0 /* control */ #define IEEE80211_STYPE_TRIGGER 0x0020 #define IEEE80211_STYPE_CTL_EXT 0x0060 #define IEEE80211_STYPE_BACK_REQ 0x0080 #define IEEE80211_STYPE_BACK 0x0090 #define IEEE80211_STYPE_PSPOLL 0x00A0 #define IEEE80211_STYPE_RTS 0x00B0 #define IEEE80211_STYPE_CTS 0x00C0 #define IEEE80211_STYPE_ACK 0x00D0 #define IEEE80211_STYPE_CFEND 0x00E0 #define IEEE80211_STYPE_CFENDACK 0x00F0 /* data */ #define IEEE80211_STYPE_DATA 0x0000 #define IEEE80211_STYPE_DATA_CFACK 0x0010 #define IEEE80211_STYPE_DATA_CFPOLL 0x0020 #define IEEE80211_STYPE_DATA_CFACKPOLL 0x0030 #define IEEE80211_STYPE_NULLFUNC 0x0040 #define IEEE80211_STYPE_CFACK 0x0050 #define IEEE80211_STYPE_CFPOLL 0x0060 #define IEEE80211_STYPE_CFACKPOLL 0x0070 #define IEEE80211_STYPE_QOS_DATA 0x0080 #define IEEE80211_STYPE_QOS_DATA_CFACK 0x0090 #define IEEE80211_STYPE_QOS_DATA_CFPOLL 0x00A0 #define IEEE80211_STYPE_QOS_DATA_CFACKPOLL 0x00B0 #define IEEE80211_STYPE_QOS_NULLFUNC 0x00C0 #define IEEE80211_STYPE_QOS_CFACK 0x00D0 #define IEEE80211_STYPE_QOS_CFPOLL 0x00E0 #define IEEE80211_STYPE_QOS_CFACKPOLL 0x00F0 /* extension, added by 802.11ad */ #define IEEE80211_STYPE_DMG_BEACON 0x0000 #define IEEE80211_STYPE_S1G_BEACON 0x0010 /* bits unique to S1G beacon */ #define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 /* see 802.11ah-2016 9.9 NDP CMAC frames */ #define IEEE80211_S1G_1MHZ_NDP_BITS 25 #define IEEE80211_S1G_1MHZ_NDP_BYTES 4 #define IEEE80211_S1G_2MHZ_NDP_BITS 37 #define IEEE80211_S1G_2MHZ_NDP_BYTES 5 #define IEEE80211_NDP_FTYPE_CTS 0 #define IEEE80211_NDP_FTYPE_CF_END 0 #define IEEE80211_NDP_FTYPE_PS_POLL 1 #define IEEE80211_NDP_FTYPE_ACK 2 #define IEEE80211_NDP_FTYPE_PS_POLL_ACK 3 #define IEEE80211_NDP_FTYPE_BA 4 #define IEEE80211_NDP_FTYPE_BF_REPORT_POLL 5 #define IEEE80211_NDP_FTYPE_PAGING 6 #define IEEE80211_NDP_FTYPE_PREQ 7 #define SM64(f, v) ((((u64)v) << f##_S) & f) /* NDP CMAC frame fields */ #define IEEE80211_NDP_FTYPE 0x0000000000000007 #define IEEE80211_NDP_FTYPE_S 0x0000000000000000 /* 1M Probe Request 11ah 9.9.3.1.1 */ #define IEEE80211_NDP_1M_PREQ_ANO 0x0000000000000008 #define IEEE80211_NDP_1M_PREQ_ANO_S 3 #define IEEE80211_NDP_1M_PREQ_CSSID 0x00000000000FFFF0 #define IEEE80211_NDP_1M_PREQ_CSSID_S 4 #define IEEE80211_NDP_1M_PREQ_RTYPE 0x0000000000100000 #define IEEE80211_NDP_1M_PREQ_RTYPE_S 20 #define IEEE80211_NDP_1M_PREQ_RSV 0x0000000001E00000 #define IEEE80211_NDP_1M_PREQ_RSV 0x0000000001E00000 /* 2M Probe Request 11ah 9.9.3.1.2 */ #define IEEE80211_NDP_2M_PREQ_ANO 0x0000000000000008 #define IEEE80211_NDP_2M_PREQ_ANO_S 3 #define IEEE80211_NDP_2M_PREQ_CSSID 0x0000000FFFFFFFF0 #define IEEE80211_NDP_2M_PREQ_CSSID_S 4 #define IEEE80211_NDP_2M_PREQ_RTYPE 0x0000001000000000 #define IEEE80211_NDP_2M_PREQ_RTYPE_S 36 #define IEEE80211_ANO_NETTYPE_WILD 15 /* bits unique to S1G beacon */ #define IEEE80211_S1G_BCN_NEXT_TBTT 0x100 /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */ #define IEEE80211_CTL_EXT_POLL 0x2000 #define IEEE80211_CTL_EXT_SPR 0x3000 #define IEEE80211_CTL_EXT_GRANT 0x4000 #define IEEE80211_CTL_EXT_DMG_CTS 0x5000 #define IEEE80211_CTL_EXT_DMG_DTS 0x6000 #define IEEE80211_CTL_EXT_SSW 0x8000 #define IEEE80211_CTL_EXT_SSW_FBACK 0x9000 #define IEEE80211_CTL_EXT_SSW_ACK 0xa000 #define IEEE80211_SN_MASK ((IEEE80211_SCTL_SEQ) >> 4) #define IEEE80211_MAX_SN IEEE80211_SN_MASK #define IEEE80211_SN_MODULO (IEEE80211_MAX_SN + 1) /* PV1 Layout IEEE 802.11-2020 9.8.3.1 */ #define IEEE80211_PV1_FCTL_VERS 0x0003 #define IEEE80211_PV1_FCTL_FTYPE 0x001c #define IEEE80211_PV1_FCTL_STYPE 0x00e0 #define IEEE80211_PV1_FCTL_FROMDS 0x0100 #define IEEE80211_PV1_FCTL_MOREFRAGS 0x0200 #define IEEE80211_PV1_FCTL_PM 0x0400 #define IEEE80211_PV1_FCTL_MOREDATA 0x0800 #define IEEE80211_PV1_FCTL_PROTECTED 0x1000 #define IEEE80211_PV1_FCTL_END_SP 0x2000 #define IEEE80211_PV1_FCTL_RELAYED 0x4000 #define IEEE80211_PV1_FCTL_ACK_POLICY 0x8000 #define IEEE80211_PV1_FCTL_CTL_EXT 0x0f00 static inline bool ieee80211_sn_less(u16 sn1, u16 sn2) { return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); } static inline bool ieee80211_sn_less_eq(u16 sn1, u16 sn2) { return ((sn2 - sn1) & IEEE80211_SN_MASK) <= (IEEE80211_SN_MODULO >> 1); } static inline u16 ieee80211_sn_add(u16 sn1, u16 sn2) { return (sn1 + sn2) & IEEE80211_SN_MASK; } static inline u16 ieee80211_sn_inc(u16 sn) { return ieee80211_sn_add(sn, 1); } static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) { return (sn1 - sn2) & IEEE80211_SN_MASK; } #define IEEE80211_SEQ_TO_SN(seq) (((seq) & IEEE80211_SCTL_SEQ) >> 4) #define IEEE80211_SN_TO_SEQ(ssn) (((ssn) << 4) & IEEE80211_SCTL_SEQ) /* miscellaneous IEEE 802.11 constants */ #define IEEE80211_MAX_FRAG_THRESHOLD 2352 #define IEEE80211_MAX_RTS_THRESHOLD 2353 #define IEEE80211_MAX_AID 2007 #define IEEE80211_MAX_AID_S1G 8191 #define IEEE80211_MAX_TIM_LEN 251 #define IEEE80211_MAX_MESH_PEERINGS 63 /* Maximum size for the MA-UNITDATA primitive, 802.11 standard section 6.2.1.1.2. 802.11e clarifies the figure in section 7.1.2. The frame body is up to 2304 octets long (maximum MSDU size) plus any crypt overhead. */ #define IEEE80211_MAX_DATA_LEN 2304 /* 802.11ad extends maximum MSDU size for DMG (freq > 40Ghz) networks * to 7920 bytes, see 8.2.3 General frame format */ #define IEEE80211_MAX_DATA_LEN_DMG 7920 /* 30 byte 4 addr hdr, 2 byte QoS, 2304 byte MSDU, 12 byte crypt, 4 byte FCS */ #define IEEE80211_MAX_FRAME_LEN 2352 /* Maximal size of an A-MSDU that can be transported in a HT BA session */ #define IEEE80211_MAX_MPDU_LEN_HT_BA 4095 /* Maximal size of an A-MSDU */ #define IEEE80211_MAX_MPDU_LEN_HT_3839 3839 #define IEEE80211_MAX_MPDU_LEN_HT_7935 7935 #define IEEE80211_MAX_MPDU_LEN_VHT_3895 3895 #define IEEE80211_MAX_MPDU_LEN_VHT_7991 7991 #define IEEE80211_MAX_MPDU_LEN_VHT_11454 11454 #define IEEE80211_MAX_SSID_LEN 32 #define IEEE80211_MAX_MESH_ID_LEN 32 #define IEEE80211_FIRST_TSPEC_TSID 8 #define IEEE80211_NUM_TIDS 16 /* number of user priorities 802.11 uses */ #define IEEE80211_NUM_UPS 8 /* number of ACs */ #define IEEE80211_NUM_ACS 4 #define IEEE80211_QOS_CTL_LEN 2 /* 1d tag mask */ #define IEEE80211_QOS_CTL_TAG1D_MASK 0x0007 /* TID mask */ #define IEEE80211_QOS_CTL_TID_MASK 0x000f /* EOSP */ #define IEEE80211_QOS_CTL_EOSP 0x0010 /* ACK policy */ #define IEEE80211_QOS_CTL_ACK_POLICY_NORMAL 0x0000 #define IEEE80211_QOS_CTL_ACK_POLICY_NOACK 0x0020 #define IEEE80211_QOS_CTL_ACK_POLICY_NO_EXPL 0x0040 #define IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK 0x0060 #define IEEE80211_QOS_CTL_ACK_POLICY_MASK 0x0060 /* A-MSDU 802.11n */ #define IEEE80211_QOS_CTL_A_MSDU_PRESENT 0x0080 /* Mesh Control 802.11s */ #define IEEE80211_QOS_CTL_MESH_CONTROL_PRESENT 0x0100 /* Mesh Power Save Level */ #define IEEE80211_QOS_CTL_MESH_PS_LEVEL 0x0200 /* Mesh Receiver Service Period Initiated */ #define IEEE80211_QOS_CTL_RSPI 0x0400 /* U-APSD queue for WMM IEs sent by AP */ #define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD (1<<7) #define IEEE80211_WMM_IE_AP_QOSINFO_PARAM_SET_CNT_MASK 0x0f /* U-APSD queues for WMM IEs sent by STA */ #define IEEE80211_WMM_IE_STA_QOSINFO_AC_VO (1<<0) #define IEEE80211_WMM_IE_STA_QOSINFO_AC_VI (1<<1) #define IEEE80211_WMM_IE_STA_QOSINFO_AC_BK (1<<2) #define IEEE80211_WMM_IE_STA_QOSINFO_AC_BE (1<<3) #define IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK 0x0f /* U-APSD max SP length for WMM IEs sent by STA */ #define IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL 0x00 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_2 0x01 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_4 0x02 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_6 0x03 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK 0x03 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT 5 #define IEEE80211_HT_CTL_LEN 4 /* trigger type within common_info of trigger frame */ #define IEEE80211_TRIGGER_TYPE_MASK 0xf #define IEEE80211_TRIGGER_TYPE_BASIC 0x0 #define IEEE80211_TRIGGER_TYPE_BFRP 0x1 #define IEEE80211_TRIGGER_TYPE_MU_BAR 0x2 #define IEEE80211_TRIGGER_TYPE_MU_RTS 0x3 #define IEEE80211_TRIGGER_TYPE_BSRP 0x4 #define IEEE80211_TRIGGER_TYPE_GCR_MU_BAR 0x5 #define IEEE80211_TRIGGER_TYPE_BQRP 0x6 #define IEEE80211_TRIGGER_TYPE_NFRP 0x7 /* UL-bandwidth within common_info of trigger frame */ #define IEEE80211_TRIGGER_ULBW_MASK 0xc0000 #define IEEE80211_TRIGGER_ULBW_20MHZ 0x0 #define IEEE80211_TRIGGER_ULBW_40MHZ 0x1 #define IEEE80211_TRIGGER_ULBW_80MHZ 0x2 #define IEEE80211_TRIGGER_ULBW_160_80P80MHZ 0x3 struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; struct_group(addrs, u8 addr1[ETH_ALEN]; u8 addr2[ETH_ALEN]; u8 addr3[ETH_ALEN]; ); __le16 seq_ctrl; u8 addr4[ETH_ALEN]; } __packed __aligned(2); struct ieee80211_hdr_3addr { __le16 frame_control; __le16 duration_id; u8 addr1[ETH_ALEN]; u8 addr2[ETH_ALEN]; u8 addr3[ETH_ALEN]; __le16 seq_ctrl; } __packed __aligned(2); struct ieee80211_qos_hdr { __le16 frame_control; __le16 duration_id; u8 addr1[ETH_ALEN]; u8 addr2[ETH_ALEN]; u8 addr3[ETH_ALEN]; __le16 seq_ctrl; __le16 qos_ctrl; } __packed __aligned(2); struct ieee80211_qos_hdr_4addr { __le16 frame_control; __le16 duration_id; u8 addr1[ETH_ALEN]; u8 addr2[ETH_ALEN]; u8 addr3[ETH_ALEN]; __le16 seq_ctrl; u8 addr4[ETH_ALEN]; __le16 qos_ctrl; } __packed __aligned(2); struct ieee80211_trigger { __le16 frame_control; __le16 duration; u8 ra[ETH_ALEN]; u8 ta[ETH_ALEN]; __le64 common_info; u8 variable[]; } __packed __aligned(2); /** * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame has to-DS set */ static inline bool ieee80211_has_tods(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_TODS)) != 0; } /** * ieee80211_has_fromds - check if IEEE80211_FCTL_FROMDS is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame has from-DS set */ static inline bool ieee80211_has_fromds(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FROMDS)) != 0; } /** * ieee80211_has_a4 - check if IEEE80211_FCTL_TODS and IEEE80211_FCTL_FROMDS are set * @fc: frame control bytes in little-endian byteorder * Return: whether or not it's a 4-address frame (from-DS and to-DS set) */ static inline bool ieee80211_has_a4(__le16 fc) { __le16 tmp = cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS); return (fc & tmp) == tmp; } /** * ieee80211_has_morefrags - check if IEEE80211_FCTL_MOREFRAGS is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame has more fragments (more frags bit set) */ static inline bool ieee80211_has_morefrags(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_MOREFRAGS)) != 0; } /** * ieee80211_has_retry - check if IEEE80211_FCTL_RETRY is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the retry flag is set */ static inline bool ieee80211_has_retry(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_RETRY)) != 0; } /** * ieee80211_has_pm - check if IEEE80211_FCTL_PM is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the power management flag is set */ static inline bool ieee80211_has_pm(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_PM)) != 0; } /** * ieee80211_has_moredata - check if IEEE80211_FCTL_MOREDATA is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the more data flag is set */ static inline bool ieee80211_has_moredata(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_MOREDATA)) != 0; } /** * ieee80211_has_protected - check if IEEE80211_FCTL_PROTECTED is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the protected flag is set */ static inline bool ieee80211_has_protected(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_PROTECTED)) != 0; } /** * ieee80211_has_order - check if IEEE80211_FCTL_ORDER is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the order flag is set */ static inline bool ieee80211_has_order(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_ORDER)) != 0; } /** * ieee80211_is_mgmt - check if type is IEEE80211_FTYPE_MGMT * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame type is management */ static inline bool ieee80211_is_mgmt(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT); } /** * ieee80211_is_ctl - check if type is IEEE80211_FTYPE_CTL * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame type is control */ static inline bool ieee80211_is_ctl(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL); } /** * ieee80211_is_data - check if type is IEEE80211_FTYPE_DATA * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a data frame */ static inline bool ieee80211_is_data(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA); } /** * ieee80211_is_ext - check if type is IEEE80211_FTYPE_EXT * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame type is extended */ static inline bool ieee80211_is_ext(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_EXT); } /** * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a QoS data frame */ static inline bool ieee80211_is_data_qos(__le16 fc) { /* * mask with QOS_DATA rather than IEEE80211_FCTL_STYPE as we just need * to check the one bit */ return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_STYPE_QOS_DATA)) == cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_DATA); } /** * ieee80211_is_data_present - check if type is IEEE80211_FTYPE_DATA and has data * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a QoS data frame that has data * (i.e. is not null data) */ static inline bool ieee80211_is_data_present(__le16 fc) { /* * mask with 0x40 and test that that bit is clear to only return true * for the data-containing substypes. */ return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | 0x40)) == cpu_to_le16(IEEE80211_FTYPE_DATA); } /** * ieee80211_is_assoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_REQ * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an association request */ static inline bool ieee80211_is_assoc_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_REQ); } /** * ieee80211_is_assoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_RESP * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an association response */ static inline bool ieee80211_is_assoc_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_RESP); } /** * ieee80211_is_reassoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_REQ * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a reassociation request */ static inline bool ieee80211_is_reassoc_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_REQ); } /** * ieee80211_is_reassoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_RESP * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a reassociation response */ static inline bool ieee80211_is_reassoc_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_RESP); } /** * ieee80211_is_probe_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_REQ * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a probe request */ static inline bool ieee80211_is_probe_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ); } /** * ieee80211_is_probe_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_RESP * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a probe response */ static inline bool ieee80211_is_probe_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_RESP); } /** * ieee80211_is_beacon - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_BEACON * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a (regular, not S1G) beacon */ static inline bool ieee80211_is_beacon(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); } /** * ieee80211_is_s1g_beacon - check if IEEE80211_FTYPE_EXT && * IEEE80211_STYPE_S1G_BEACON * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an S1G beacon */ static inline bool ieee80211_is_s1g_beacon(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON); } /** * ieee80211_is_s1g_short_beacon - check if frame is an S1G short beacon * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an S1G short beacon, * i.e. it is an S1G beacon with 'next TBTT' flag set */ static inline bool ieee80211_is_s1g_short_beacon(__le16 fc) { return ieee80211_is_s1g_beacon(fc) && (fc & cpu_to_le16(IEEE80211_S1G_BCN_NEXT_TBTT)); } /** * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an ATIM frame */ static inline bool ieee80211_is_atim(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ATIM); } /** * ieee80211_is_disassoc - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DISASSOC * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a disassociation frame */ static inline bool ieee80211_is_disassoc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_DISASSOC); } /** * ieee80211_is_auth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_AUTH * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an authentication frame */ static inline bool ieee80211_is_auth(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH); } /** * ieee80211_is_deauth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DEAUTH * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a deauthentication frame */ static inline bool ieee80211_is_deauth(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_DEAUTH); } /** * ieee80211_is_action - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ACTION * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an action frame */ static inline bool ieee80211_is_action(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); } /** * ieee80211_is_back_req - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK_REQ * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a block-ACK request frame */ static inline bool ieee80211_is_back_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK_REQ); } /** * ieee80211_is_back - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a block-ACK frame */ static inline bool ieee80211_is_back(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK); } /** * ieee80211_is_pspoll - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_PSPOLL * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a PS-poll frame */ static inline bool ieee80211_is_pspoll(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL); } /** * ieee80211_is_rts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_RTS * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an RTS frame */ static inline bool ieee80211_is_rts(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS); } /** * ieee80211_is_cts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CTS * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a CTS frame */ static inline bool ieee80211_is_cts(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS); } /** * ieee80211_is_ack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_ACK * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is an ACK frame */ static inline bool ieee80211_is_ack(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_ACK); } /** * ieee80211_is_cfend - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFEND * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a CF-end frame */ static inline bool ieee80211_is_cfend(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CFEND); } /** * ieee80211_is_cfendack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFENDACK * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a CF-end-ack frame */ static inline bool ieee80211_is_cfendack(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CFENDACK); } /** * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a nullfunc frame */ static inline bool ieee80211_is_nullfunc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC); } /** * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a QoS nullfunc frame */ static inline bool ieee80211_is_qos_nullfunc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); } /** * ieee80211_is_trigger - check if frame is trigger frame * @fc: frame control field in little-endian byteorder * Return: whether or not the frame is a trigger frame */ static inline bool ieee80211_is_trigger(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_TRIGGER); } /** * ieee80211_is_any_nullfunc - check if frame is regular or QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder * Return: whether or not the frame is a nullfunc or QoS nullfunc frame */ static inline bool ieee80211_is_any_nullfunc(__le16 fc) { return (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)); } /** * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set * @seq_ctrl: frame sequence control bytes in little-endian byteorder * Return: whether or not the frame is the first fragment (also true if * it's not fragmented at all) */ static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) { return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0; } /** * ieee80211_is_frag - check if a frame is a fragment * @hdr: 802.11 header of the frame * Return: whether or not the frame is a fragment */ static inline bool ieee80211_is_frag(struct ieee80211_hdr *hdr) { return ieee80211_has_morefrags(hdr->frame_control) || hdr->seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG); } static inline u16 ieee80211_get_sn(struct ieee80211_hdr *hdr) { return le16_get_bits(hdr->seq_ctrl, IEEE80211_SCTL_SEQ); } struct ieee80211s_hdr { u8 flags; u8 ttl; __le32 seqnum; u8 eaddr1[ETH_ALEN]; u8 eaddr2[ETH_ALEN]; } __packed __aligned(2); /* Mesh flags */ #define MESH_FLAGS_AE_A4 0x1 #define MESH_FLAGS_AE_A5_A6 0x2 #define MESH_FLAGS_AE 0x3 #define MESH_FLAGS_PS_DEEP 0x4 /** * enum ieee80211_preq_flags - mesh PREQ element flags * * @IEEE80211_PREQ_PROACTIVE_PREP_FLAG: proactive PREP subfield */ enum ieee80211_preq_flags { IEEE80211_PREQ_PROACTIVE_PREP_FLAG = 1<<2, }; /** * enum ieee80211_preq_target_flags - mesh PREQ element per target flags * * @IEEE80211_PREQ_TO_FLAG: target only subfield * @IEEE80211_PREQ_USN_FLAG: unknown target HWMP sequence number subfield */ enum ieee80211_preq_target_flags { IEEE80211_PREQ_TO_FLAG = 1<<0, IEEE80211_PREQ_USN_FLAG = 1<<2, }; /** * struct ieee80211_quiet_ie - Quiet element * @count: Quiet Count * @period: Quiet Period * @duration: Quiet Duration * @offset: Quiet Offset * * This structure represents the payload of the "Quiet element" as * described in IEEE Std 802.11-2020 section 9.4.2.22. */ struct ieee80211_quiet_ie { u8 count; u8 period; __le16 duration; __le16 offset; } __packed; /** * struct ieee80211_msrment_ie - Measurement element * @token: Measurement Token * @mode: Measurement Report Mode * @type: Measurement Type * @request: Measurement Request or Measurement Report * * This structure represents the payload of both the "Measurement * Request element" and the "Measurement Report element" as described * in IEEE Std 802.11-2020 sections 9.4.2.20 and 9.4.2.21. */ struct ieee80211_msrment_ie { u8 token; u8 mode; u8 type; u8 request[]; } __packed; /** * struct ieee80211_channel_sw_ie - Channel Switch Announcement element * @mode: Channel Switch Mode * @new_ch_num: New Channel Number * @count: Channel Switch Count * * This structure represents the payload of the "Channel Switch * Announcement element" as described in IEEE Std 802.11-2020 section * 9.4.2.18. */ struct ieee80211_channel_sw_ie { u8 mode; u8 new_ch_num; u8 count; } __packed; /** * struct ieee80211_ext_chansw_ie - Extended Channel Switch Announcement element * @mode: Channel Switch Mode * @new_operating_class: New Operating Class * @new_ch_num: New Channel Number * @count: Channel Switch Count * * This structure represents the "Extended Channel Switch Announcement * element" as described in IEEE Std 802.11-2020 section 9.4.2.52. */ struct ieee80211_ext_chansw_ie { u8 mode; u8 new_operating_class; u8 new_ch_num; u8 count; } __packed; /** * struct ieee80211_sec_chan_offs_ie - secondary channel offset IE * @sec_chan_offs: secondary channel offset, uses IEEE80211_HT_PARAM_CHA_SEC_* * values here * This structure represents the "Secondary Channel Offset element" */ struct ieee80211_sec_chan_offs_ie { u8 sec_chan_offs; } __packed; /** * struct ieee80211_mesh_chansw_params_ie - mesh channel switch parameters IE * @mesh_ttl: Time To Live * @mesh_flags: Flags * @mesh_reason: Reason Code * @mesh_pre_value: Precedence Value * * This structure represents the payload of the "Mesh Channel Switch * Parameters element" as described in IEEE Std 802.11-2020 section * 9.4.2.102. */ struct ieee80211_mesh_chansw_params_ie { u8 mesh_ttl; u8 mesh_flags; __le16 mesh_reason; __le16 mesh_pre_value; } __packed; /** * struct ieee80211_wide_bw_chansw_ie - wide bandwidth channel switch IE * @new_channel_width: New Channel Width * @new_center_freq_seg0: New Channel Center Frequency Segment 0 * @new_center_freq_seg1: New Channel Center Frequency Segment 1 * * This structure represents the payload of the "Wide Bandwidth * Channel Switch element" as described in IEEE Std 802.11-2020 * section 9.4.2.160. */ struct ieee80211_wide_bw_chansw_ie { u8 new_channel_width; u8 new_center_freq_seg0, new_center_freq_seg1; } __packed; /** * struct ieee80211_tim_ie - Traffic Indication Map information element * @dtim_count: DTIM Count * @dtim_period: DTIM Period * @bitmap_ctrl: Bitmap Control * @required_octet: "Syntatic sugar" to force the struct size to the * minimum valid size when carried in a non-S1G PPDU * @virtual_map: Partial Virtual Bitmap * * This structure represents the payload of the "TIM element" as * described in IEEE Std 802.11-2020 section 9.4.2.5. Note that this * definition is only applicable when the element is carried in a * non-S1G PPDU. When the TIM is carried in an S1G PPDU, the Bitmap * Control and Partial Virtual Bitmap may not be present. */ struct ieee80211_tim_ie { u8 dtim_count; u8 dtim_period; u8 bitmap_ctrl; union { u8 required_octet; DECLARE_FLEX_ARRAY(u8, virtual_map); }; } __packed; /** * struct ieee80211_meshconf_ie - Mesh Configuration element * @meshconf_psel: Active Path Selection Protocol Identifier * @meshconf_pmetric: Active Path Selection Metric Identifier * @meshconf_congest: Congestion Control Mode Identifier * @meshconf_synch: Synchronization Method Identifier * @meshconf_auth: Authentication Protocol Identifier * @meshconf_form: Mesh Formation Info * @meshconf_cap: Mesh Capability (see &enum mesh_config_capab_flags) * * This structure represents the payload of the "Mesh Configuration * element" as described in IEEE Std 802.11-2020 section 9.4.2.97. */ struct ieee80211_meshconf_ie { u8 meshconf_psel; u8 meshconf_pmetric; u8 meshconf_congest; u8 meshconf_synch; u8 meshconf_auth; u8 meshconf_form; u8 meshconf_cap; } __packed; /** * enum mesh_config_capab_flags - Mesh Configuration IE capability field flags * * @IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS: STA is willing to establish * additional mesh peerings with other mesh STAs * @IEEE80211_MESHCONF_CAPAB_FORWARDING: the STA forwards MSDUs * @IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING: TBTT adjustment procedure * is ongoing * @IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL: STA is in deep sleep mode or has * neighbors in deep sleep mode * * Enumerates the "Mesh Capability" as described in IEEE Std * 802.11-2020 section 9.4.2.97.7. */ enum mesh_config_capab_flags { IEEE80211_MESHCONF_CAPAB_ACCEPT_PLINKS = 0x01, IEEE80211_MESHCONF_CAPAB_FORWARDING = 0x08, IEEE80211_MESHCONF_CAPAB_TBTT_ADJUSTING = 0x20, IEEE80211_MESHCONF_CAPAB_POWER_SAVE_LEVEL = 0x40, }; #define IEEE80211_MESHCONF_FORM_CONNECTED_TO_GATE 0x1 /* * mesh channel switch parameters element's flag indicator * */ #define WLAN_EID_CHAN_SWITCH_PARAM_TX_RESTRICT BIT(0) #define WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR BIT(1) #define WLAN_EID_CHAN_SWITCH_PARAM_REASON BIT(2) /** * struct ieee80211_rann_ie - RANN (root announcement) element * @rann_flags: Flags * @rann_hopcount: Hop Count * @rann_ttl: Element TTL * @rann_addr: Root Mesh STA Address * @rann_seq: HWMP Sequence Number * @rann_interval: Interval * @rann_metric: Metric * * This structure represents the payload of the "RANN element" as * described in IEEE Std 802.11-2020 section 9.4.2.111. */ struct ieee80211_rann_ie { u8 rann_flags; u8 rann_hopcount; u8 rann_ttl; u8 rann_addr[ETH_ALEN]; __le32 rann_seq; __le32 rann_interval; __le32 rann_metric; } __packed; enum ieee80211_rann_flags { RANN_FLAG_IS_GATE = 1 << 0, }; enum ieee80211_ht_chanwidth_values { IEEE80211_HT_CHANWIDTH_20MHZ = 0, IEEE80211_HT_CHANWIDTH_ANY = 1, }; /** * enum ieee80211_vht_opmode_bits - VHT operating mode field bits * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK: channel width mask * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ: 20 MHz channel width * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: 80 MHz channel width * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: 160 MHz or 80+80 MHz channel width * @IEEE80211_OPMODE_NOTIF_BW_160_80P80: 160 / 80+80 MHz indicator flag * @IEEE80211_OPMODE_NOTIF_RX_NSS_MASK: number of spatial streams mask * (the NSS value is the value of this field + 1) * @IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT: number of spatial streams shift * @IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF: indicates streams in SU-MIMO PPDU * using a beamforming steering matrix */ enum ieee80211_vht_opmode_bits { IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK = 0x03, IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ = 0, IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ = 1, IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ = 2, IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ = 3, IEEE80211_OPMODE_NOTIF_BW_160_80P80 = 0x04, IEEE80211_OPMODE_NOTIF_RX_NSS_MASK = 0x70, IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT = 4, IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF = 0x80, }; /** * enum ieee80211_s1g_chanwidth - S1G channel widths * These are defined in IEEE802.11-2016ah Table 10-20 * as BSS Channel Width * * @IEEE80211_S1G_CHANWIDTH_1MHZ: 1MHz operating channel * @IEEE80211_S1G_CHANWIDTH_2MHZ: 2MHz operating channel * @IEEE80211_S1G_CHANWIDTH_4MHZ: 4MHz operating channel * @IEEE80211_S1G_CHANWIDTH_8MHZ: 8MHz operating channel * @IEEE80211_S1G_CHANWIDTH_16MHZ: 16MHz operating channel */ enum ieee80211_s1g_chanwidth { IEEE80211_S1G_CHANWIDTH_1MHZ = 0, IEEE80211_S1G_CHANWIDTH_2MHZ = 1, IEEE80211_S1G_CHANWIDTH_4MHZ = 3, IEEE80211_S1G_CHANWIDTH_8MHZ = 7, IEEE80211_S1G_CHANWIDTH_16MHZ = 15, }; #define WLAN_SA_QUERY_TR_ID_LEN 2 #define WLAN_MEMBERSHIP_LEN 8 #define WLAN_USER_POSITION_LEN 16 /** * struct ieee80211_tpc_report_ie - TPC Report element * @tx_power: Transmit Power * @link_margin: Link Margin * * This structure represents the payload of the "TPC Report element" as * described in IEEE Std 802.11-2020 section 9.4.2.16. */ struct ieee80211_tpc_report_ie { u8 tx_power; u8 link_margin; } __packed; #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_MASK GENMASK(2, 1) #define IEEE80211_ADDBA_EXT_FRAG_LEVEL_SHIFT 1 #define IEEE80211_ADDBA_EXT_NO_FRAG BIT(0) #define IEEE80211_ADDBA_EXT_BUF_SIZE_MASK GENMASK(7, 5) #define IEEE80211_ADDBA_EXT_BUF_SIZE_SHIFT 10 struct ieee80211_addba_ext_ie { u8 data; } __packed; /** * struct ieee80211_s1g_bcn_compat_ie - S1G Beacon Compatibility element * @compat_info: Compatibility Information * @beacon_int: Beacon Interval * @tsf_completion: TSF Completion * * This structure represents the payload of the "S1G Beacon * Compatibility element" as described in IEEE Std 802.11-2020 section * 9.4.2.196. */ struct ieee80211_s1g_bcn_compat_ie { __le16 compat_info; __le16 beacon_int; __le32 tsf_completion; } __packed; /** * struct ieee80211_s1g_oper_ie - S1G Operation element * @ch_width: S1G Operation Information Channel Width * @oper_class: S1G Operation Information Operating Class * @primary_ch: S1G Operation Information Primary Channel Number * @oper_ch: S1G Operation Information Channel Center Frequency * @basic_mcs_nss: Basic S1G-MCS and NSS Set * * This structure represents the payload of the "S1G Operation * element" as described in IEEE Std 802.11-2020 section 9.4.2.212. */ struct ieee80211_s1g_oper_ie { u8 ch_width; u8 oper_class; u8 primary_ch; u8 oper_ch; __le16 basic_mcs_nss; } __packed; /** * struct ieee80211_aid_response_ie - AID Response element * @aid: AID/Group AID * @switch_count: AID Switch Count * @response_int: AID Response Interval * * This structure represents the payload of the "AID Response element" * as described in IEEE Std 802.11-2020 section 9.4.2.194. */ struct ieee80211_aid_response_ie { __le16 aid; u8 switch_count; __le16 response_int; } __packed; struct ieee80211_s1g_cap { u8 capab_info[10]; u8 supp_mcs_nss[5]; } __packed; struct ieee80211_ext { __le16 frame_control; __le16 duration; union { struct { u8 sa[ETH_ALEN]; __le32 timestamp; u8 change_seq; u8 variable[0]; } __packed s1g_beacon; struct { u8 sa[ETH_ALEN]; __le32 timestamp; u8 change_seq; u8 next_tbtt[3]; u8 variable[0]; } __packed s1g_short_beacon; } u; } __packed __aligned(2); #define IEEE80211_TWT_CONTROL_NDP BIT(0) #define IEEE80211_TWT_CONTROL_RESP_MODE BIT(1) #define IEEE80211_TWT_CONTROL_NEG_TYPE_BROADCAST BIT(3) #define IEEE80211_TWT_CONTROL_RX_DISABLED BIT(4) #define IEEE80211_TWT_CONTROL_WAKE_DUR_UNIT BIT(5) #define IEEE80211_TWT_REQTYPE_REQUEST BIT(0) #define IEEE80211_TWT_REQTYPE_SETUP_CMD GENMASK(3, 1) #define IEEE80211_TWT_REQTYPE_TRIGGER BIT(4) #define IEEE80211_TWT_REQTYPE_IMPLICIT BIT(5) #define IEEE80211_TWT_REQTYPE_FLOWTYPE BIT(6) #define IEEE80211_TWT_REQTYPE_FLOWID GENMASK(9, 7) #define IEEE80211_TWT_REQTYPE_WAKE_INT_EXP GENMASK(14, 10) #define IEEE80211_TWT_REQTYPE_PROTECTION BIT(15) enum ieee80211_twt_setup_cmd { TWT_SETUP_CMD_REQUEST, TWT_SETUP_CMD_SUGGEST, TWT_SETUP_CMD_DEMAND, TWT_SETUP_CMD_GROUPING, TWT_SETUP_CMD_ACCEPT, TWT_SETUP_CMD_ALTERNATE, TWT_SETUP_CMD_DICTATE, TWT_SETUP_CMD_REJECT, }; struct ieee80211_twt_params { __le16 req_type; __le64 twt; u8 min_twt_dur; __le16 mantissa; u8 channel; } __packed; struct ieee80211_twt_setup { u8 dialog_token; u8 element_id; u8 length; u8 control; u8 params[]; } __packed; #define IEEE80211_TTLM_MAX_CNT 2 #define IEEE80211_TTLM_CONTROL_DIRECTION 0x03 #define IEEE80211_TTLM_CONTROL_DEF_LINK_MAP 0x04 #define IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT 0x08 #define IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT 0x10 #define IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE 0x20 #define IEEE80211_TTLM_DIRECTION_DOWN 0 #define IEEE80211_TTLM_DIRECTION_UP 1 #define IEEE80211_TTLM_DIRECTION_BOTH 2 /** * struct ieee80211_ttlm_elem - TID-To-Link Mapping element * * Defined in section 9.4.2.314 in P802.11be_D4 * * @control: the first part of control field * @optional: the second part of control field */ struct ieee80211_ttlm_elem { u8 control; u8 optional[]; } __packed; /** * struct ieee80211_bss_load_elem - BSS Load elemen * * Defined in section 9.4.2.26 in IEEE 802.11-REVme D4.1 * * @sta_count: total number of STAs currently associated with the AP. * @channel_util: Percentage of time that the access point sensed the channel * was busy. This value is in range [0, 255], the highest value means * 100% busy. * @avail_admission_capa: remaining amount of medium time used for admission * control. */ struct ieee80211_bss_load_elem { __le16 sta_count; u8 channel_util; __le16 avail_admission_capa; } __packed; struct ieee80211_mgmt { __le16 frame_control; __le16 duration; u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; u8 bssid[ETH_ALEN]; __le16 seq_ctrl; union { struct { __le16 auth_alg; __le16 auth_transaction; __le16 status_code; /* possibly followed by Challenge text */ u8 variable[]; } __packed auth; struct { __le16 reason_code; } __packed deauth; struct { __le16 capab_info; __le16 listen_interval; /* followed by SSID and Supported rates */ u8 variable[]; } __packed assoc_req; struct { __le16 capab_info; __le16 status_code; __le16 aid; /* followed by Supported rates */ u8 variable[]; } __packed assoc_resp, reassoc_resp; struct { __le16 capab_info; __le16 status_code; u8 variable[]; } __packed s1g_assoc_resp, s1g_reassoc_resp; struct { __le16 capab_info; __le16 listen_interval; u8 current_ap[ETH_ALEN]; /* followed by SSID and Supported rates */ u8 variable[]; } __packed reassoc_req; struct { __le16 reason_code; } __packed disassoc; struct { __le64 timestamp; __le16 beacon_int; __le16 capab_info; /* followed by some of SSID, Supported rates, * FH Params, DS Params, CF Params, IBSS Params, TIM */ u8 variable[]; } __packed beacon; struct { /* only variable items: SSID, Supported rates */ DECLARE_FLEX_ARRAY(u8, variable); } __packed probe_req; struct { __le64 timestamp; __le16 beacon_int; __le16 capab_info; /* followed by some of SSID, Supported rates, * FH Params, DS Params, CF Params, IBSS Params */ u8 variable[]; } __packed probe_resp; struct { u8 category; union { struct { u8 action_code; u8 dialog_token; u8 status_code; u8 variable[]; } __packed wme_action; struct{ u8 action_code; u8 variable[]; } __packed chan_switch; struct{ u8 action_code; struct ieee80211_ext_chansw_ie data; u8 variable[]; } __packed ext_chan_switch; struct{ u8 action_code; u8 dialog_token; u8 element_id; u8 length; struct ieee80211_msrment_ie msr_elem; } __packed measurement; struct{ u8 action_code; u8 dialog_token; __le16 capab; __le16 timeout; __le16 start_seq_num; /* followed by BA Extension */ u8 variable[]; } __packed addba_req; struct{ u8 action_code; u8 dialog_token; __le16 status; __le16 capab; __le16 timeout; } __packed addba_resp; struct{ u8 action_code; __le16 params; __le16 reason_code; } __packed delba; struct { u8 action_code; u8 variable[]; } __packed self_prot; struct{ u8 action_code; u8 variable[]; } __packed mesh_action; struct { u8 action; u8 trans_id[WLAN_SA_QUERY_TR_ID_LEN]; } __packed sa_query; struct { u8 action; u8 smps_control; } __packed ht_smps; struct { u8 action_code; u8 chanwidth; } __packed ht_notify_cw; struct { u8 action_code; u8 dialog_token; __le16 capability; u8 variable[0]; } __packed tdls_discover_resp; struct { u8 action_code; u8 operating_mode; } __packed vht_opmode_notif; struct { u8 action_code; u8 membership[WLAN_MEMBERSHIP_LEN]; u8 position[WLAN_USER_POSITION_LEN]; } __packed vht_group_notif; struct { u8 action_code; u8 dialog_token; u8 tpc_elem_id; u8 tpc_elem_length; struct ieee80211_tpc_report_ie tpc; } __packed tpc_report; struct { u8 action_code; u8 dialog_token; u8 follow_up; u8 tod[6]; u8 toa[6]; __le16 tod_error; __le16 toa_error; u8 variable[]; } __packed ftm; struct { u8 action_code; u8 variable[]; } __packed s1g; struct { u8 action_code; u8 dialog_token; u8 follow_up; u32 tod; u32 toa; u8 max_tod_error; u8 max_toa_error; } __packed wnm_timing_msr; struct { u8 action_code; u8 dialog_token; u8 variable[]; } __packed ttlm_req; struct { u8 action_code; u8 dialog_token; u8 status_code; u8 variable[]; } __packed ttlm_res; struct { u8 action_code; } __packed ttlm_tear_down; } u; } __packed action; DECLARE_FLEX_ARRAY(u8, body); /* Generic frame body */ } u; } __packed __aligned(2); /* Supported rates membership selectors */ #define BSS_MEMBERSHIP_SELECTOR_HT_PHY 127 #define BSS_MEMBERSHIP_SELECTOR_VHT_PHY 126 #define BSS_MEMBERSHIP_SELECTOR_GLK 125 #define BSS_MEMBERSHIP_SELECTOR_EPS 124 #define BSS_MEMBERSHIP_SELECTOR_SAE_H2E 123 #define BSS_MEMBERSHIP_SELECTOR_HE_PHY 122 #define BSS_MEMBERSHIP_SELECTOR_EHT_PHY 121 /* mgmt header + 1 byte category code */ #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u) /* Management MIC information element (IEEE 802.11w) */ struct ieee80211_mmie { u8 element_id; u8 length; __le16 key_id; u8 sequence_number[6]; u8 mic[8]; } __packed; /* Management MIC information element (IEEE 802.11w) for GMAC and CMAC-256 */ struct ieee80211_mmie_16 { u8 element_id; u8 length; __le16 key_id; u8 sequence_number[6]; u8 mic[16]; } __packed; struct ieee80211_vendor_ie { u8 element_id; u8 len; u8 oui[3]; u8 oui_type; } __packed; struct ieee80211_wmm_ac_param { u8 aci_aifsn; /* AIFSN, ACM, ACI */ u8 cw; /* ECWmin, ECWmax (CW = 2^ECW - 1) */ __le16 txop_limit; } __packed; struct ieee80211_wmm_param_ie { u8 element_id; /* Element ID: 221 (0xdd); */ u8 len; /* Length: 24 */ /* required fields for WMM version 1 */ u8 oui[3]; /* 00:50:f2 */ u8 oui_type; /* 2 */ u8 oui_subtype; /* 1 */ u8 version; /* 1 for WMM version 1.0 */ u8 qos_info; /* AP/STA specific QoS info */ u8 reserved; /* 0 */ /* AC_BE, AC_BK, AC_VI, AC_VO */ struct ieee80211_wmm_ac_param ac[4]; } __packed; /* Control frames */ struct ieee80211_rts { __le16 frame_control; __le16 duration; u8 ra[ETH_ALEN]; u8 ta[ETH_ALEN]; } __packed __aligned(2); struct ieee80211_cts { __le16 frame_control; __le16 duration; u8 ra[ETH_ALEN]; } __packed __aligned(2); struct ieee80211_pspoll { __le16 frame_control; __le16 aid; u8 bssid[ETH_ALEN]; u8 ta[ETH_ALEN]; } __packed __aligned(2); /* TDLS */ /* Channel switch timing */ struct ieee80211_ch_switch_timing { __le16 switch_time; __le16 switch_timeout; } __packed; /* Link-id information element */ struct ieee80211_tdls_lnkie { u8 ie_type; /* Link Identifier IE */ u8 ie_len; u8 bssid[ETH_ALEN]; u8 init_sta[ETH_ALEN]; u8 resp_sta[ETH_ALEN]; } __packed; struct ieee80211_tdls_data { u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; __be16 ether_type; u8 payload_type; u8 category; u8 action_code; union { struct { u8 dialog_token; __le16 capability; u8 variable[0]; } __packed setup_req; struct { __le16 status_code; u8 dialog_token; __le16 capability; u8 variable[0]; } __packed setup_resp; struct { __le16 status_code; u8 dialog_token; u8 variable[0]; } __packed setup_cfm; struct { __le16 reason_code; u8 variable[0]; } __packed teardown; struct { u8 dialog_token; u8 variable[0]; } __packed discover_req; struct { u8 target_channel; u8 oper_class; u8 variable[0]; } __packed chan_switch_req; struct { __le16 status_code; u8 variable[0]; } __packed chan_switch_resp; } u; } __packed; /* * Peer-to-Peer IE attribute related definitions. */ /* * enum ieee80211_p2p_attr_id - identifies type of peer-to-peer attribute. */ enum ieee80211_p2p_attr_id { IEEE80211_P2P_ATTR_STATUS = 0, IEEE80211_P2P_ATTR_MINOR_REASON, IEEE80211_P2P_ATTR_CAPABILITY, IEEE80211_P2P_ATTR_DEVICE_ID, IEEE80211_P2P_ATTR_GO_INTENT, IEEE80211_P2P_ATTR_GO_CONFIG_TIMEOUT, IEEE80211_P2P_ATTR_LISTEN_CHANNEL, IEEE80211_P2P_ATTR_GROUP_BSSID, IEEE80211_P2P_ATTR_EXT_LISTEN_TIMING, IEEE80211_P2P_ATTR_INTENDED_IFACE_ADDR, IEEE80211_P2P_ATTR_MANAGABILITY, IEEE80211_P2P_ATTR_CHANNEL_LIST, IEEE80211_P2P_ATTR_ABSENCE_NOTICE, IEEE80211_P2P_ATTR_DEVICE_INFO, IEEE80211_P2P_ATTR_GROUP_INFO, IEEE80211_P2P_ATTR_GROUP_ID, IEEE80211_P2P_ATTR_INTERFACE, IEEE80211_P2P_ATTR_OPER_CHANNEL, IEEE80211_P2P_ATTR_INVITE_FLAGS, /* 19 - 220: Reserved */ IEEE80211_P2P_ATTR_VENDOR_SPECIFIC = 221, IEEE80211_P2P_ATTR_MAX }; /* Notice of Absence attribute - described in P2P spec 4.1.14 */ /* Typical max value used here */ #define IEEE80211_P2P_NOA_DESC_MAX 4 struct ieee80211_p2p_noa_desc { u8 count; __le32 duration; __le32 interval; __le32 start_time; } __packed; struct ieee80211_p2p_noa_attr { u8 index; u8 oppps_ctwindow; struct ieee80211_p2p_noa_desc desc[IEEE80211_P2P_NOA_DESC_MAX]; } __packed; #define IEEE80211_P2P_OPPPS_ENABLE_BIT BIT(7) #define IEEE80211_P2P_OPPPS_CTWINDOW_MASK 0x7F /** * struct ieee80211_bar - Block Ack Request frame format * @frame_control: Frame Control * @duration: Duration * @ra: RA * @ta: TA * @control: BAR Control * @start_seq_num: Starting Sequence Number (see Figure 9-37) * * This structure represents the "BlockAckReq frame format" * as described in IEEE Std 802.11-2020 section 9.3.1.7. */ struct ieee80211_bar { __le16 frame_control; __le16 duration; __u8 ra[ETH_ALEN]; __u8 ta[ETH_ALEN]; __le16 control; __le16 start_seq_num; } __packed; /* 802.11 BAR control masks */ #define IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL 0x0000 #define IEEE80211_BAR_CTRL_MULTI_TID 0x0002 #define IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA 0x0004 #define IEEE80211_BAR_CTRL_TID_INFO_MASK 0xf000 #define IEEE80211_BAR_CTRL_TID_INFO_SHIFT 12 #define IEEE80211_HT_MCS_MASK_LEN 10 /** * struct ieee80211_mcs_info - Supported MCS Set field * @rx_mask: RX mask * @rx_highest: highest supported RX rate. If set represents * the highest supported RX data rate in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest RX data rate supported. * @tx_params: TX parameters * @reserved: Reserved bits * * This structure represents the "Supported MCS Set field" as * described in IEEE Std 802.11-2020 section 9.4.2.55.4. */ struct ieee80211_mcs_info { u8 rx_mask[IEEE80211_HT_MCS_MASK_LEN]; __le16 rx_highest; u8 tx_params; u8 reserved[3]; } __packed; /* 802.11n HT capability MSC set */ #define IEEE80211_HT_MCS_RX_HIGHEST_MASK 0x3ff #define IEEE80211_HT_MCS_TX_DEFINED 0x01 #define IEEE80211_HT_MCS_TX_RX_DIFF 0x02 /* value 0 == 1 stream etc */ #define IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK 0x0C #define IEEE80211_HT_MCS_TX_MAX_STREAMS_SHIFT 2 #define IEEE80211_HT_MCS_TX_MAX_STREAMS 4 #define IEEE80211_HT_MCS_TX_UNEQUAL_MODULATION 0x10 #define IEEE80211_HT_MCS_CHAINS(mcs) ((mcs) == 32 ? 1 : (1 + ((mcs) >> 3))) /* * 802.11n D5.0 20.3.5 / 20.6 says: * - indices 0 to 7 and 32 are single spatial stream * - 8 to 31 are multiple spatial streams using equal modulation * [8..15 for two streams, 16..23 for three and 24..31 for four] * - remainder are multiple spatial streams using unequal modulation */ #define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START 33 #define IEEE80211_HT_MCS_UNEQUAL_MODULATION_START_BYTE \ (IEEE80211_HT_MCS_UNEQUAL_MODULATION_START / 8) /** * struct ieee80211_ht_cap - HT capabilities element * @cap_info: HT Capability Information * @ampdu_params_info: A-MPDU Parameters * @mcs: Supported MCS Set * @extended_ht_cap_info: HT Extended Capabilities * @tx_BF_cap_info: Transmit Beamforming Capabilities * @antenna_selection_info: ASEL Capability * * This structure represents the payload of the "HT Capabilities * element" as described in IEEE Std 802.11-2020 section 9.4.2.55. */ struct ieee80211_ht_cap { __le16 cap_info; u8 ampdu_params_info; /* 16 bytes MCS information */ struct ieee80211_mcs_info mcs; __le16 extended_ht_cap_info; __le32 tx_BF_cap_info; u8 antenna_selection_info; } __packed; /* 802.11n HT capabilities masks (for cap_info) */ #define IEEE80211_HT_CAP_LDPC_CODING 0x0001 #define IEEE80211_HT_CAP_SUP_WIDTH_20_40 0x0002 #define IEEE80211_HT_CAP_SM_PS 0x000C #define IEEE80211_HT_CAP_SM_PS_SHIFT 2 #define IEEE80211_HT_CAP_GRN_FLD 0x0010 #define IEEE80211_HT_CAP_SGI_20 0x0020 #define IEEE80211_HT_CAP_SGI_40 0x0040 #define IEEE80211_HT_CAP_TX_STBC 0x0080 #define IEEE80211_HT_CAP_RX_STBC 0x0300 #define IEEE80211_HT_CAP_RX_STBC_SHIFT 8 #define IEEE80211_HT_CAP_DELAY_BA 0x0400 #define IEEE80211_HT_CAP_MAX_AMSDU 0x0800 #define IEEE80211_HT_CAP_DSSSCCK40 0x1000 #define IEEE80211_HT_CAP_RESERVED 0x2000 #define IEEE80211_HT_CAP_40MHZ_INTOLERANT 0x4000 #define IEEE80211_HT_CAP_LSIG_TXOP_PROT 0x8000 /* 802.11n HT extended capabilities masks (for extended_ht_cap_info) */ #define IEEE80211_HT_EXT_CAP_PCO 0x0001 #define IEEE80211_HT_EXT_CAP_PCO_TIME 0x0006 #define IEEE80211_HT_EXT_CAP_PCO_TIME_SHIFT 1 #define IEEE80211_HT_EXT_CAP_MCS_FB 0x0300 #define IEEE80211_HT_EXT_CAP_MCS_FB_SHIFT 8 #define IEEE80211_HT_EXT_CAP_HTC_SUP 0x0400 #define IEEE80211_HT_EXT_CAP_RD_RESPONDER 0x0800 /* 802.11n HT capability AMPDU settings (for ampdu_params_info) */ #define IEEE80211_HT_AMPDU_PARM_FACTOR 0x03 #define IEEE80211_HT_AMPDU_PARM_DENSITY 0x1C #define IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT 2 /* * Maximum length of AMPDU that the STA can receive in high-throughput (HT). * Length = 2 ^ (13 + max_ampdu_length_exp) - 1 (octets) */ enum ieee80211_max_ampdu_length_exp { IEEE80211_HT_MAX_AMPDU_8K = 0, IEEE80211_HT_MAX_AMPDU_16K = 1, IEEE80211_HT_MAX_AMPDU_32K = 2, IEEE80211_HT_MAX_AMPDU_64K = 3 }; /* * Maximum length of AMPDU that the STA can receive in VHT. * Length = 2 ^ (13 + max_ampdu_length_exp) - 1 (octets) */ enum ieee80211_vht_max_ampdu_length_exp { IEEE80211_VHT_MAX_AMPDU_8K = 0, IEEE80211_VHT_MAX_AMPDU_16K = 1, IEEE80211_VHT_MAX_AMPDU_32K = 2, IEEE80211_VHT_MAX_AMPDU_64K = 3, IEEE80211_VHT_MAX_AMPDU_128K = 4, IEEE80211_VHT_MAX_AMPDU_256K = 5, IEEE80211_VHT_MAX_AMPDU_512K = 6, IEEE80211_VHT_MAX_AMPDU_1024K = 7 }; #define IEEE80211_HT_MAX_AMPDU_FACTOR 13 /* Minimum MPDU start spacing */ enum ieee80211_min_mpdu_spacing { IEEE80211_HT_MPDU_DENSITY_NONE = 0, /* No restriction */ IEEE80211_HT_MPDU_DENSITY_0_25 = 1, /* 1/4 usec */ IEEE80211_HT_MPDU_DENSITY_0_5 = 2, /* 1/2 usec */ IEEE80211_HT_MPDU_DENSITY_1 = 3, /* 1 usec */ IEEE80211_HT_MPDU_DENSITY_2 = 4, /* 2 usec */ IEEE80211_HT_MPDU_DENSITY_4 = 5, /* 4 usec */ IEEE80211_HT_MPDU_DENSITY_8 = 6, /* 8 usec */ IEEE80211_HT_MPDU_DENSITY_16 = 7 /* 16 usec */ }; /** * struct ieee80211_ht_operation - HT operation IE * @primary_chan: Primary Channel * @ht_param: HT Operation Information parameters * @operation_mode: HT Operation Information operation mode * @stbc_param: HT Operation Information STBC params * @basic_set: Basic HT-MCS Set * * This structure represents the payload of the "HT Operation * element" as described in IEEE Std 802.11-2020 section 9.4.2.56. */ struct ieee80211_ht_operation { u8 primary_chan; u8 ht_param; __le16 operation_mode; __le16 stbc_param; u8 basic_set[16]; } __packed; /* for ht_param */ #define IEEE80211_HT_PARAM_CHA_SEC_OFFSET 0x03 #define IEEE80211_HT_PARAM_CHA_SEC_NONE 0x00 #define IEEE80211_HT_PARAM_CHA_SEC_ABOVE 0x01 #define IEEE80211_HT_PARAM_CHA_SEC_BELOW 0x03 #define IEEE80211_HT_PARAM_CHAN_WIDTH_ANY 0x04 #define IEEE80211_HT_PARAM_RIFS_MODE 0x08 /* for operation_mode */ #define IEEE80211_HT_OP_MODE_PROTECTION 0x0003 #define IEEE80211_HT_OP_MODE_PROTECTION_NONE 0 #define IEEE80211_HT_OP_MODE_PROTECTION_NONMEMBER 1 #define IEEE80211_HT_OP_MODE_PROTECTION_20MHZ 2 #define IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED 3 #define IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT 0x0004 #define IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT 0x0010 #define IEEE80211_HT_OP_MODE_CCFS2_SHIFT 5 #define IEEE80211_HT_OP_MODE_CCFS2_MASK 0x1fe0 /* for stbc_param */ #define IEEE80211_HT_STBC_PARAM_DUAL_BEACON 0x0040 #define IEEE80211_HT_STBC_PARAM_DUAL_CTS_PROT 0x0080 #define IEEE80211_HT_STBC_PARAM_STBC_BEACON 0x0100 #define IEEE80211_HT_STBC_PARAM_LSIG_TXOP_FULLPROT 0x0200 #define IEEE80211_HT_STBC_PARAM_PCO_ACTIVE 0x0400 #define IEEE80211_HT_STBC_PARAM_PCO_PHASE 0x0800 /* block-ack parameters */ #define IEEE80211_ADDBA_PARAM_AMSDU_MASK 0x0001 #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 #define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C #define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFC0 #define IEEE80211_DELBA_PARAM_TID_MASK 0xF000 #define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800 /* * A-MPDU buffer sizes * According to HT size varies from 8 to 64 frames * HE adds the ability to have up to 256 frames. * EHT adds the ability to have up to 1K frames. */ #define IEEE80211_MIN_AMPDU_BUF 0x8 #define IEEE80211_MAX_AMPDU_BUF_HT 0x40 #define IEEE80211_MAX_AMPDU_BUF_HE 0x100 #define IEEE80211_MAX_AMPDU_BUF_EHT 0x400 /* Spatial Multiplexing Power Save Modes (for capability) */ #define WLAN_HT_CAP_SM_PS_STATIC 0 #define WLAN_HT_CAP_SM_PS_DYNAMIC 1 #define WLAN_HT_CAP_SM_PS_INVALID 2 #define WLAN_HT_CAP_SM_PS_DISABLED 3 /* for SM power control field lower two bits */ #define WLAN_HT_SMPS_CONTROL_DISABLED 0 #define WLAN_HT_SMPS_CONTROL_STATIC 1 #define WLAN_HT_SMPS_CONTROL_DYNAMIC 3 /** * struct ieee80211_vht_mcs_info - VHT MCS information * @rx_mcs_map: RX MCS map 2 bits for each stream, total 8 streams * @rx_highest: Indicates highest long GI VHT PPDU data rate * STA can receive. Rate expressed in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest RX data rate supported. * The top 3 bits of this field indicate the Maximum NSTS,total * (a beamformee capability.) * @tx_mcs_map: TX MCS map 2 bits for each stream, total 8 streams * @tx_highest: Indicates highest long GI VHT PPDU data rate * STA can transmit. Rate expressed in units of 1 Mbps. * If this field is 0 this value should not be used to * consider the highest TX data rate supported. * The top 2 bits of this field are reserved, the * 3rd bit from the top indiciates VHT Extended NSS BW * Capability. */ struct ieee80211_vht_mcs_info { __le16 rx_mcs_map; __le16 rx_highest; __le16 tx_mcs_map; __le16 tx_highest; } __packed; /* for rx_highest */ #define IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT 13 #define IEEE80211_VHT_MAX_NSTS_TOTAL_MASK (7 << IEEE80211_VHT_MAX_NSTS_TOTAL_SHIFT) /* for tx_highest */ #define IEEE80211_VHT_EXT_NSS_BW_CAPABLE (1 << 13) /** * enum ieee80211_vht_mcs_support - VHT MCS support definitions * @IEEE80211_VHT_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the * number of streams * @IEEE80211_VHT_MCS_SUPPORT_0_8: MCSes 0-8 are supported * @IEEE80211_VHT_MCS_SUPPORT_0_9: MCSes 0-9 are supported * @IEEE80211_VHT_MCS_NOT_SUPPORTED: This number of streams isn't supported * * These definitions are used in each 2-bit subfield of the @rx_mcs_map * and @tx_mcs_map fields of &struct ieee80211_vht_mcs_info, which are * both split into 8 subfields by number of streams. These values indicate * which MCSes are supported for the number of streams the value appears * for. */ enum ieee80211_vht_mcs_support { IEEE80211_VHT_MCS_SUPPORT_0_7 = 0, IEEE80211_VHT_MCS_SUPPORT_0_8 = 1, IEEE80211_VHT_MCS_SUPPORT_0_9 = 2, IEEE80211_VHT_MCS_NOT_SUPPORTED = 3, }; /** * struct ieee80211_vht_cap - VHT capabilities * * This structure is the "VHT capabilities element" as * described in 802.11ac D3.0 8.4.2.160 * @vht_cap_info: VHT capability info * @supp_mcs: VHT MCS supported rates */ struct ieee80211_vht_cap { __le32 vht_cap_info; struct ieee80211_vht_mcs_info supp_mcs; } __packed; /** * enum ieee80211_vht_chanwidth - VHT channel width * @IEEE80211_VHT_CHANWIDTH_USE_HT: use the HT operation IE to * determine the channel width (20 or 40 MHz) * @IEEE80211_VHT_CHANWIDTH_80MHZ: 80 MHz bandwidth * @IEEE80211_VHT_CHANWIDTH_160MHZ: 160 MHz bandwidth * @IEEE80211_VHT_CHANWIDTH_80P80MHZ: 80+80 MHz bandwidth */ enum ieee80211_vht_chanwidth { IEEE80211_VHT_CHANWIDTH_USE_HT = 0, IEEE80211_VHT_CHANWIDTH_80MHZ = 1, IEEE80211_VHT_CHANWIDTH_160MHZ = 2, IEEE80211_VHT_CHANWIDTH_80P80MHZ = 3, }; /** * struct ieee80211_vht_operation - VHT operation IE * * This structure is the "VHT operation element" as * described in 802.11ac D3.0 8.4.2.161 * @chan_width: Operating channel width * @center_freq_seg0_idx: center freq segment 0 index * @center_freq_seg1_idx: center freq segment 1 index * @basic_mcs_set: VHT Basic MCS rate set */ struct ieee80211_vht_operation { u8 chan_width; u8 center_freq_seg0_idx; u8 center_freq_seg1_idx; __le16 basic_mcs_set; } __packed; /** * struct ieee80211_he_cap_elem - HE capabilities element * @mac_cap_info: HE MAC Capabilities Information * @phy_cap_info: HE PHY Capabilities Information * * This structure represents the fixed fields of the payload of the * "HE capabilities element" as described in IEEE Std 802.11ax-2021 * sections 9.4.2.248.2 and 9.4.2.248.3. */ struct ieee80211_he_cap_elem { u8 mac_cap_info[6]; u8 phy_cap_info[11]; } __packed; #define IEEE80211_TX_RX_MCS_NSS_DESC_MAX_LEN 5 /** * enum ieee80211_he_mcs_support - HE MCS support definitions * @IEEE80211_HE_MCS_SUPPORT_0_7: MCSes 0-7 are supported for the * number of streams * @IEEE80211_HE_MCS_SUPPORT_0_9: MCSes 0-9 are supported * @IEEE80211_HE_MCS_SUPPORT_0_11: MCSes 0-11 are supported * @IEEE80211_HE_MCS_NOT_SUPPORTED: This number of streams isn't supported * * These definitions are used in each 2-bit subfield of the rx_mcs_* * and tx_mcs_* fields of &struct ieee80211_he_mcs_nss_supp, which are * both split into 8 subfields by number of streams. These values indicate * which MCSes are supported for the number of streams the value appears * for. */ enum ieee80211_he_mcs_support { IEEE80211_HE_MCS_SUPPORT_0_7 = 0, IEEE80211_HE_MCS_SUPPORT_0_9 = 1, IEEE80211_HE_MCS_SUPPORT_0_11 = 2, IEEE80211_HE_MCS_NOT_SUPPORTED = 3, }; /** * struct ieee80211_he_mcs_nss_supp - HE Tx/Rx HE MCS NSS Support Field * * This structure holds the data required for the Tx/Rx HE MCS NSS Support Field * described in P802.11ax_D2.0 section 9.4.2.237.4 * * @rx_mcs_80: Rx MCS map 2 bits for each stream, total 8 streams, for channel * widths less than 80MHz. * @tx_mcs_80: Tx MCS map 2 bits for each stream, total 8 streams, for channel * widths less than 80MHz. * @rx_mcs_160: Rx MCS map 2 bits for each stream, total 8 streams, for channel * width 160MHz. * @tx_mcs_160: Tx MCS map 2 bits for each stream, total 8 streams, for channel * width 160MHz. * @rx_mcs_80p80: Rx MCS map 2 bits for each stream, total 8 streams, for * channel width 80p80MHz. * @tx_mcs_80p80: Tx MCS map 2 bits for each stream, total 8 streams, for * channel width 80p80MHz. */ struct ieee80211_he_mcs_nss_supp { __le16 rx_mcs_80; __le16 tx_mcs_80; __le16 rx_mcs_160; __le16 tx_mcs_160; __le16 rx_mcs_80p80; __le16 tx_mcs_80p80; } __packed; /** * struct ieee80211_he_operation - HE Operation element * @he_oper_params: HE Operation Parameters + BSS Color Information * @he_mcs_nss_set: Basic HE-MCS And NSS Set * @optional: Optional fields VHT Operation Information, Max Co-Hosted * BSSID Indicator, and 6 GHz Operation Information * * This structure represents the payload of the "HE Operation * element" as described in IEEE Std 802.11ax-2021 section 9.4.2.249. */ struct ieee80211_he_operation { __le32 he_oper_params; __le16 he_mcs_nss_set; u8 optional[]; } __packed; /** * struct ieee80211_he_spr - Spatial Reuse Parameter Set element * @he_sr_control: SR Control * @optional: Optional fields Non-SRG OBSS PD Max Offset, SRG OBSS PD * Min Offset, SRG OBSS PD Max Offset, SRG BSS Color * Bitmap, and SRG Partial BSSID Bitmap * * This structure represents the payload of the "Spatial Reuse * Parameter Set element" as described in IEEE Std 802.11ax-2021 * section 9.4.2.252. */ struct ieee80211_he_spr { u8 he_sr_control; u8 optional[]; } __packed; /** * struct ieee80211_he_mu_edca_param_ac_rec - MU AC Parameter Record field * @aifsn: ACI/AIFSN * @ecw_min_max: ECWmin/ECWmax * @mu_edca_timer: MU EDCA Timer * * This structure represents the "MU AC Parameter Record" as described * in IEEE Std 802.11ax-2021 section 9.4.2.251, Figure 9-788p. */ struct ieee80211_he_mu_edca_param_ac_rec { u8 aifsn; u8 ecw_min_max; u8 mu_edca_timer; } __packed; /** * struct ieee80211_mu_edca_param_set - MU EDCA Parameter Set element * @mu_qos_info: QoS Info * @ac_be: MU AC_BE Parameter Record * @ac_bk: MU AC_BK Parameter Record * @ac_vi: MU AC_VI Parameter Record * @ac_vo: MU AC_VO Parameter Record * * This structure represents the payload of the "MU EDCA Parameter Set * element" as described in IEEE Std 802.11ax-2021 section 9.4.2.251. */ struct ieee80211_mu_edca_param_set { u8 mu_qos_info; struct ieee80211_he_mu_edca_param_ac_rec ac_be; struct ieee80211_he_mu_edca_param_ac_rec ac_bk; struct ieee80211_he_mu_edca_param_ac_rec ac_vi; struct ieee80211_he_mu_edca_param_ac_rec ac_vo; } __packed; #define IEEE80211_EHT_MCS_NSS_RX 0x0f #define IEEE80211_EHT_MCS_NSS_TX 0xf0 /** * struct ieee80211_eht_mcs_nss_supp_20mhz_only - EHT 20MHz only station max * supported NSS for per MCS. * * For each field below, bits 0 - 3 indicate the maximal number of spatial * streams for Rx, and bits 4 - 7 indicate the maximal number of spatial streams * for Tx. * * @rx_tx_mcs7_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 0 - 7. * @rx_tx_mcs9_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 8 - 9. * @rx_tx_mcs11_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 10 - 11. * @rx_tx_mcs13_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 12 - 13. * @rx_tx_max_nss: array of the previous fields for easier loop access */ struct ieee80211_eht_mcs_nss_supp_20mhz_only { union { struct { u8 rx_tx_mcs7_max_nss; u8 rx_tx_mcs9_max_nss; u8 rx_tx_mcs11_max_nss; u8 rx_tx_mcs13_max_nss; }; u8 rx_tx_max_nss[4]; }; }; /** * struct ieee80211_eht_mcs_nss_supp_bw - EHT max supported NSS per MCS (except * 20MHz only stations). * * For each field below, bits 0 - 3 indicate the maximal number of spatial * streams for Rx, and bits 4 - 7 indicate the maximal number of spatial streams * for Tx. * * @rx_tx_mcs9_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 0 - 9. * @rx_tx_mcs11_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 10 - 11. * @rx_tx_mcs13_max_nss: indicates the maximum number of spatial streams * supported for reception and the maximum number of spatial streams * supported for transmission for MCS 12 - 13. * @rx_tx_max_nss: array of the previous fields for easier loop access */ struct ieee80211_eht_mcs_nss_supp_bw { union { struct { u8 rx_tx_mcs9_max_nss; u8 rx_tx_mcs11_max_nss; u8 rx_tx_mcs13_max_nss; }; u8 rx_tx_max_nss[3]; }; }; /** * struct ieee80211_eht_cap_elem_fixed - EHT capabilities fixed data * * This structure is the "EHT Capabilities element" fixed fields as * described in P802.11be_D2.0 section 9.4.2.313. * * @mac_cap_info: MAC capabilities, see IEEE80211_EHT_MAC_CAP* * @phy_cap_info: PHY capabilities, see IEEE80211_EHT_PHY_CAP* */ struct ieee80211_eht_cap_elem_fixed { u8 mac_cap_info[2]; u8 phy_cap_info[9]; } __packed; /** * struct ieee80211_eht_cap_elem - EHT capabilities element * @fixed: fixed parts, see &ieee80211_eht_cap_elem_fixed * @optional: optional parts */ struct ieee80211_eht_cap_elem { struct ieee80211_eht_cap_elem_fixed fixed; /* * Followed by: * Supported EHT-MCS And NSS Set field: 4, 3, 6 or 9 octets. * EHT PPE Thresholds field: variable length. */ u8 optional[]; } __packed; #define IEEE80211_EHT_OPER_INFO_PRESENT 0x01 #define IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT 0x02 #define IEEE80211_EHT_OPER_EHT_DEF_PE_DURATION 0x04 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_LIMIT 0x08 #define IEEE80211_EHT_OPER_GROUP_ADDRESSED_BU_IND_EXP_MASK 0x30 /** * struct ieee80211_eht_operation - eht operation element * * This structure is the "EHT Operation Element" fields as * described in P802.11be_D2.0 section 9.4.2.311 * * @params: EHT operation element parameters. See &IEEE80211_EHT_OPER_* * @basic_mcs_nss: indicates the EHT-MCSs for each number of spatial streams in * EHT PPDUs that are supported by all EHT STAs in the BSS in transmit and * receive. * @optional: optional parts */ struct ieee80211_eht_operation { u8 params; struct ieee80211_eht_mcs_nss_supp_20mhz_only basic_mcs_nss; u8 optional[]; } __packed; /** * struct ieee80211_eht_operation_info - eht operation information * * @control: EHT operation information control. * @ccfs0: defines a channel center frequency for a 20, 40, 80, 160, or 320 MHz * EHT BSS. * @ccfs1: defines a channel center frequency for a 160 or 320 MHz EHT BSS. * @optional: optional parts */ struct ieee80211_eht_operation_info { u8 control; u8 ccfs0; u8 ccfs1; u8 optional[]; } __packed; /* 802.11ac VHT Capabilities */ #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895 0x00000000 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 0x00000001 #define IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 0x00000002 #define IEEE80211_VHT_CAP_MAX_MPDU_MASK 0x00000003 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ 0x00000004 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ 0x00000008 #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK 0x0000000C #define IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_SHIFT 2 #define IEEE80211_VHT_CAP_RXLDPC 0x00000010 #define IEEE80211_VHT_CAP_SHORT_GI_80 0x00000020 #define IEEE80211_VHT_CAP_SHORT_GI_160 0x00000040 #define IEEE80211_VHT_CAP_TXSTBC 0x00000080 #define IEEE80211_VHT_CAP_RXSTBC_1 0x00000100 #define IEEE80211_VHT_CAP_RXSTBC_2 0x00000200 #define IEEE80211_VHT_CAP_RXSTBC_3 0x00000300 #define IEEE80211_VHT_CAP_RXSTBC_4 0x00000400 #define IEEE80211_VHT_CAP_RXSTBC_MASK 0x00000700 #define IEEE80211_VHT_CAP_RXSTBC_SHIFT 8 #define IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE 0x00000800 #define IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE 0x00001000 #define IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT 13 #define IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK \ (7 << IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT) #define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT 16 #define IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK \ (7 << IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_SHIFT) #define IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE 0x00080000 #define IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE 0x00100000 #define IEEE80211_VHT_CAP_VHT_TXOP_PS 0x00200000 #define IEEE80211_VHT_CAP_HTC_VHT 0x00400000 #define IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT 23 #define IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK \ (7 << IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT) #define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_UNSOL_MFB 0x08000000 #define IEEE80211_VHT_CAP_VHT_LINK_ADAPTATION_VHT_MRQ_MFB 0x0c000000 #define IEEE80211_VHT_CAP_RX_ANTENNA_PATTERN 0x10000000 #define IEEE80211_VHT_CAP_TX_ANTENNA_PATTERN 0x20000000 #define IEEE80211_VHT_CAP_EXT_NSS_BW_SHIFT 30 #define IEEE80211_VHT_CAP_EXT_NSS_BW_MASK 0xc0000000 /** * ieee80211_get_vht_max_nss - return max NSS for a given bandwidth/MCS * @cap: VHT capabilities of the peer * @bw: bandwidth to use * @mcs: MCS index to use * @ext_nss_bw_capable: indicates whether or not the local transmitter * (rate scaling algorithm) can deal with the new logic * (dot11VHTExtendedNSSBWCapable) * @max_vht_nss: current maximum NSS as advertised by the STA in * operating mode notification, can be 0 in which case the * capability data will be used to derive this (from MCS support) * Return: The maximum NSS that can be used for the given bandwidth/MCS * combination * * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can * vary for a given BW/MCS. This function parses the data. * * Note: This function is exported by cfg80211. */ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap, enum ieee80211_vht_chanwidth bw, int mcs, bool ext_nss_bw_capable, unsigned int max_vht_nss); /* 802.11ax HE MAC capabilities */ #define IEEE80211_HE_MAC_CAP0_HTC_HE 0x01 #define IEEE80211_HE_MAC_CAP0_TWT_REQ 0x02 #define IEEE80211_HE_MAC_CAP0_TWT_RES 0x04 #define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_NOT_SUPP 0x00 #define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_1 0x08 #define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_2 0x10 #define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_LEVEL_3 0x18 #define IEEE80211_HE_MAC_CAP0_DYNAMIC_FRAG_MASK 0x18 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_1 0x00 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_2 0x20 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_4 0x40 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_8 0x60 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_16 0x80 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_32 0xa0 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_64 0xc0 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_UNLIMITED 0xe0 #define IEEE80211_HE_MAC_CAP0_MAX_NUM_FRAG_MSDU_MASK 0xe0 #define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_UNLIMITED 0x00 #define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_128 0x01 #define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_256 0x02 #define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_512 0x03 #define IEEE80211_HE_MAC_CAP1_MIN_FRAG_SIZE_MASK 0x03 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_0US 0x00 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_8US 0x04 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_16US 0x08 #define IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK 0x0c #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_1 0x00 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_2 0x10 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_3 0x20 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_4 0x30 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_5 0x40 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_6 0x50 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_7 0x60 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_8 0x70 #define IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_MASK 0x70 /* Link adaptation is split between byte HE_MAC_CAP1 and * HE_MAC_CAP2. It should be set only if IEEE80211_HE_MAC_CAP0_HTC_HE * in which case the following values apply: * 0 = No feedback. * 1 = reserved. * 2 = Unsolicited feedback. * 3 = both */ #define IEEE80211_HE_MAC_CAP1_LINK_ADAPTATION 0x80 #define IEEE80211_HE_MAC_CAP2_LINK_ADAPTATION 0x01 #define IEEE80211_HE_MAC_CAP2_ALL_ACK 0x02 #define IEEE80211_HE_MAC_CAP2_TRS 0x04 #define IEEE80211_HE_MAC_CAP2_BSR 0x08 #define IEEE80211_HE_MAC_CAP2_BCAST_TWT 0x10 #define IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP 0x20 #define IEEE80211_HE_MAC_CAP2_MU_CASCADING 0x40 #define IEEE80211_HE_MAC_CAP2_ACK_EN 0x80 #define IEEE80211_HE_MAC_CAP3_OMI_CONTROL 0x02 #define IEEE80211_HE_MAC_CAP3_OFDMA_RA 0x04 /* The maximum length of an A-MDPU is defined by the combination of the Maximum * A-MDPU Length Exponent field in the HT capabilities, VHT capabilities and the * same field in the HE capabilities. */ #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_0 0x00 #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_1 0x08 #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_2 0x10 #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_EXT_3 0x18 #define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_MASK 0x18 #define IEEE80211_HE_MAC_CAP3_AMSDU_FRAG 0x20 #define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED 0x40 #define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS 0x80 #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG 0x01 #define IEEE80211_HE_MAC_CAP4_QTP 0x02 #define IEEE80211_HE_MAC_CAP4_BQR 0x04 #define IEEE80211_HE_MAC_CAP4_PSR_RESP 0x08 #define IEEE80211_HE_MAC_CAP4_NDP_FB_REP 0x10 #define IEEE80211_HE_MAC_CAP4_OPS 0x20 #define IEEE80211_HE_MAC_CAP4_AMSDU_IN_AMPDU 0x40 /* Multi TID agg TX is split between byte #4 and #5 * The value is a combination of B39,B40,B41 */ #define IEEE80211_HE_MAC_CAP4_MULTI_TID_AGG_TX_QOS_B39 0x80 #define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B40 0x01 #define IEEE80211_HE_MAC_CAP5_MULTI_TID_AGG_TX_QOS_B41 0x02 #define IEEE80211_HE_MAC_CAP5_SUBCHAN_SELECTIVE_TRANSMISSION 0x04 #define IEEE80211_HE_MAC_CAP5_UL_2x996_TONE_RU 0x08 #define IEEE80211_HE_MAC_CAP5_OM_CTRL_UL_MU_DATA_DIS_RX 0x10 #define IEEE80211_HE_MAC_CAP5_HE_DYNAMIC_SM_PS 0x20 #define IEEE80211_HE_MAC_CAP5_PUNCTURED_SOUNDING 0x40 #define IEEE80211_HE_MAC_CAP5_HT_VHT_TRIG_FRAME_RX 0x80 #define IEEE80211_HE_VHT_MAX_AMPDU_FACTOR 20 #define IEEE80211_HE_HT_MAX_AMPDU_FACTOR 16 #define IEEE80211_HE_6GHZ_MAX_AMPDU_FACTOR 13 /* 802.11ax HE PHY capabilities */ #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G 0x02 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G 0x04 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G 0x08 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G 0x10 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK_ALL 0x1e #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G 0x20 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G 0x40 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_MASK 0xfe #define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_20MHZ 0x01 #define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_80MHZ_ONLY_SECOND_40MHZ 0x02 #define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_20MHZ 0x04 #define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_160MHZ_ONLY_SECOND_40MHZ 0x08 #define IEEE80211_HE_PHY_CAP1_PREAMBLE_PUNC_RX_MASK 0x0f #define IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A 0x10 #define IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD 0x20 #define IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US 0x40 /* Midamble RX/TX Max NSTS is split between byte #2 and byte #3 */ #define IEEE80211_HE_PHY_CAP1_MIDAMBLE_RX_TX_MAX_NSTS 0x80 #define IEEE80211_HE_PHY_CAP2_MIDAMBLE_RX_TX_MAX_NSTS 0x01 #define IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US 0x02 #define IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ 0x04 #define IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ 0x08 #define IEEE80211_HE_PHY_CAP2_DOPPLER_TX 0x10 #define IEEE80211_HE_PHY_CAP2_DOPPLER_RX 0x20 /* Note that the meaning of UL MU below is different between an AP and a non-AP * sta, where in the AP case it indicates support for Rx and in the non-AP sta * case it indicates support for Tx. */ #define IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO 0x40 #define IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO 0x80 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_NO_DCM 0x00 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_BPSK 0x01 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_QPSK 0x02 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_16_QAM 0x03 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK 0x03 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_1 0x00 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_TX_NSS_2 0x04 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_NO_DCM 0x00 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_BPSK 0x08 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_QPSK 0x10 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_16_QAM 0x18 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_MASK 0x18 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_1 0x00 #define IEEE80211_HE_PHY_CAP3_DCM_MAX_RX_NSS_2 0x20 #define IEEE80211_HE_PHY_CAP3_RX_PARTIAL_BW_SU_IN_20MHZ_MU 0x40 #define IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER 0x80 #define IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE 0x01 #define IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER 0x02 /* Minimal allowed value of Max STS under 80MHz is 3 */ #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4 0x0c #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_5 0x10 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_6 0x14 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_7 0x18 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_8 0x1c #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_MASK 0x1c /* Minimal allowed value of Max STS above 80MHz is 3 */ #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4 0x60 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_5 0x80 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_6 0xa0 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_7 0xc0 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_8 0xe0 #define IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_MASK 0xe0 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_1 0x00 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_2 0x01 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_3 0x02 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_4 0x03 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_5 0x04 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_6 0x05 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_7 0x06 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_8 0x07 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK 0x07 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_1 0x00 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_2 0x08 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_3 0x10 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_4 0x18 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_5 0x20 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_6 0x28 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_7 0x30 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_8 0x38 #define IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK 0x38 #define IEEE80211_HE_PHY_CAP5_NG16_SU_FEEDBACK 0x40 #define IEEE80211_HE_PHY_CAP5_NG16_MU_FEEDBACK 0x80 #define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU 0x01 #define IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU 0x02 #define IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMING_FB 0x04 #define IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMING_PARTIAL_BW_FB 0x08 #define IEEE80211_HE_PHY_CAP6_TRIG_CQI_FB 0x10 #define IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE 0x20 #define IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO 0x40 #define IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT 0x80 #define IEEE80211_HE_PHY_CAP7_PSR_BASED_SR 0x01 #define IEEE80211_HE_PHY_CAP7_POWER_BOOST_FACTOR_SUPP 0x02 #define IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI 0x04 #define IEEE80211_HE_PHY_CAP7_MAX_NC_1 0x08 #define IEEE80211_HE_PHY_CAP7_MAX_NC_2 0x10 #define IEEE80211_HE_PHY_CAP7_MAX_NC_3 0x18 #define IEEE80211_HE_PHY_CAP7_MAX_NC_4 0x20 #define IEEE80211_HE_PHY_CAP7_MAX_NC_5 0x28 #define IEEE80211_HE_PHY_CAP7_MAX_NC_6 0x30 #define IEEE80211_HE_PHY_CAP7_MAX_NC_7 0x38 #define IEEE80211_HE_PHY_CAP7_MAX_NC_MASK 0x38 #define IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ 0x40 #define IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ 0x80 #define IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI 0x01 #define IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G 0x02 #define IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU 0x04 #define IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU 0x08 #define IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI 0x10 #define IEEE80211_HE_PHY_CAP8_MIDAMBLE_RX_TX_2X_AND_1XLTF 0x20 #define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_242 0x00 #define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_484 0x40 #define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_996 0x80 #define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_2x996 0xc0 #define IEEE80211_HE_PHY_CAP8_DCM_MAX_RU_MASK 0xc0 #define IEEE80211_HE_PHY_CAP9_LONGER_THAN_16_SIGB_OFDM_SYM 0x01 #define IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK 0x02 #define IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU 0x04 #define IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU 0x08 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_COMP_SIGB 0x10 #define IEEE80211_HE_PHY_CAP9_RX_FULL_BW_SU_USING_MU_WITH_NON_COMP_SIGB 0x20 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_0US 0x0 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_8US 0x1 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_16US 0x2 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_RESERVED 0x3 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_POS 6 #define IEEE80211_HE_PHY_CAP9_NOMINAL_PKT_PADDING_MASK 0xc0 #define IEEE80211_HE_PHY_CAP10_HE_MU_M1RU_MAX_LTF 0x01 /* 802.11ax HE TX/RX MCS NSS Support */ #define IEEE80211_TX_RX_MCS_NSS_SUPP_HIGHEST_MCS_POS (3) #define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_POS (6) #define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_POS (11) #define IEEE80211_TX_RX_MCS_NSS_SUPP_TX_BITMAP_MASK 0x07c0 #define IEEE80211_TX_RX_MCS_NSS_SUPP_RX_BITMAP_MASK 0xf800 /* TX/RX HE MCS Support field Highest MCS subfield encoding */ enum ieee80211_he_highest_mcs_supported_subfield_enc { HIGHEST_MCS_SUPPORTED_MCS7 = 0, HIGHEST_MCS_SUPPORTED_MCS8, HIGHEST_MCS_SUPPORTED_MCS9, HIGHEST_MCS_SUPPORTED_MCS10, HIGHEST_MCS_SUPPORTED_MCS11, }; /* Calculate 802.11ax HE capabilities IE Tx/Rx HE MCS NSS Support Field size */ static inline u8 ieee80211_he_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap) { u8 count = 4; if (he_cap->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) count += 4; if (he_cap->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) count += 4; return count; } /* 802.11ax HE PPE Thresholds */ #define IEEE80211_PPE_THRES_NSS_SUPPORT_2NSS (1) #define IEEE80211_PPE_THRES_NSS_POS (0) #define IEEE80211_PPE_THRES_NSS_MASK (7) #define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_2x966_AND_966_RU \ (BIT(5) | BIT(6)) #define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK 0x78 #define IEEE80211_PPE_THRES_RU_INDEX_BITMASK_POS (3) #define IEEE80211_PPE_THRES_INFO_PPET_SIZE (3) #define IEEE80211_HE_PPE_THRES_INFO_HEADER_SIZE (7) /* * Calculate 802.11ax HE capabilities IE PPE field size * Input: Header byte of ppe_thres (first byte), and HE capa IE's PHY cap u8* */ static inline u8 ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info) { u8 n; if ((phy_cap_info[6] & IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) == 0) return 0; n = hweight8(ppe_thres_hdr & IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK); n *= (1 + ((ppe_thres_hdr & IEEE80211_PPE_THRES_NSS_MASK) >> IEEE80211_PPE_THRES_NSS_POS)); /* * Each pair is 6 bits, and we need to add the 7 "header" bits to the * total size. */ n = (n * IEEE80211_PPE_THRES_INFO_PPET_SIZE * 2) + 7; n = DIV_ROUND_UP(n, 8); return n; } static inline bool ieee80211_he_capa_size_ok(const u8 *data, u8 len) { const struct ieee80211_he_cap_elem *he_cap_ie_elem = (const void *)data; u8 needed = sizeof(*he_cap_ie_elem); if (len < needed) return false; needed += ieee80211_he_mcs_nss_size(he_cap_ie_elem); if (len < needed) return false; if (he_cap_ie_elem->phy_cap_info[6] & IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) { if (len < needed + 1) return false; needed += ieee80211_he_ppe_size(data[needed], he_cap_ie_elem->phy_cap_info); } return len >= needed; } /* HE Operation defines */ #define IEEE80211_HE_OPERATION_DFLT_PE_DURATION_MASK 0x00000007 #define IEEE80211_HE_OPERATION_TWT_REQUIRED 0x00000008 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_MASK 0x00003ff0 #define IEEE80211_HE_OPERATION_RTS_THRESHOLD_OFFSET 4 #define IEEE80211_HE_OPERATION_VHT_OPER_INFO 0x00004000 #define IEEE80211_HE_OPERATION_CO_HOSTED_BSS 0x00008000 #define IEEE80211_HE_OPERATION_ER_SU_DISABLE 0x00010000 #define IEEE80211_HE_OPERATION_6GHZ_OP_INFO 0x00020000 #define IEEE80211_HE_OPERATION_BSS_COLOR_MASK 0x3f000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_OFFSET 24 #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR 0x40000000 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED 0x80000000 #define IEEE80211_6GHZ_CTRL_REG_LPI_AP 0 #define IEEE80211_6GHZ_CTRL_REG_SP_AP 1 #define IEEE80211_6GHZ_CTRL_REG_VLP_AP 2 #define IEEE80211_6GHZ_CTRL_REG_INDOOR_LPI_AP 3 #define IEEE80211_6GHZ_CTRL_REG_INDOOR_SP_AP 4 /** * struct ieee80211_he_6ghz_oper - HE 6 GHz operation Information field * @primary: primary channel * @control: control flags * @ccfs0: channel center frequency segment 0 * @ccfs1: channel center frequency segment 1 * @minrate: minimum rate (in 1 Mbps units) */ struct ieee80211_he_6ghz_oper { u8 primary; #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH 0x3 #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ 0 #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ 1 #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ 2 #define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ 3 #define IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON 0x4 #define IEEE80211_HE_6GHZ_OPER_CTRL_REG_INFO 0x38 u8 control; u8 ccfs0; u8 ccfs1; u8 minrate; } __packed; /* transmit power interpretation type of transmit power envelope element */ enum ieee80211_tx_power_intrpt_type { IEEE80211_TPE_LOCAL_EIRP, IEEE80211_TPE_LOCAL_EIRP_PSD, IEEE80211_TPE_REG_CLIENT_EIRP, IEEE80211_TPE_REG_CLIENT_EIRP_PSD, }; /* category type of transmit power envelope element */ enum ieee80211_tx_power_category_6ghz { IEEE80211_TPE_CAT_6GHZ_DEFAULT = 0, IEEE80211_TPE_CAT_6GHZ_SUBORDINATE = 1, }; /* * For IEEE80211_TPE_LOCAL_EIRP / IEEE80211_TPE_REG_CLIENT_EIRP, * setting to 63.5 dBm means no constraint. */ #define IEEE80211_TPE_MAX_TX_PWR_NO_CONSTRAINT 127 /* * For IEEE80211_TPE_LOCAL_EIRP_PSD / IEEE80211_TPE_REG_CLIENT_EIRP_PSD, * setting to 127 indicates no PSD limit for the 20 MHz channel. */ #define IEEE80211_TPE_PSD_NO_LIMIT 127 /** * struct ieee80211_tx_pwr_env - Transmit Power Envelope * @info: Transmit Power Information field * @variable: Maximum Transmit Power field * * This structure represents the payload of the "Transmit Power * Envelope element" as described in IEEE Std 802.11ax-2021 section * 9.4.2.161 */ struct ieee80211_tx_pwr_env { u8 info; u8 variable[]; } __packed; #define IEEE80211_TX_PWR_ENV_INFO_COUNT 0x7 #define IEEE80211_TX_PWR_ENV_INFO_INTERPRET 0x38 #define IEEE80211_TX_PWR_ENV_INFO_CATEGORY 0xC0 #define IEEE80211_TX_PWR_ENV_EXT_COUNT 0xF static inline bool ieee80211_valid_tpe_element(const u8 *data, u8 len) { const struct ieee80211_tx_pwr_env *env = (const void *)data; u8 count, interpret, category; u8 needed = sizeof(*env); u8 N; /* also called N in the spec */ if (len < needed) return false; count = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_COUNT); interpret = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_INTERPRET); category = u8_get_bits(env->info, IEEE80211_TX_PWR_ENV_INFO_CATEGORY); switch (category) { case IEEE80211_TPE_CAT_6GHZ_DEFAULT: case IEEE80211_TPE_CAT_6GHZ_SUBORDINATE: break; default: return false; } switch (interpret) { case IEEE80211_TPE_LOCAL_EIRP: case IEEE80211_TPE_REG_CLIENT_EIRP: if (count > 3) return false; /* count == 0 encodes 1 value for 20 MHz, etc. */ needed += count + 1; if (len < needed) return false; /* there can be extension fields not accounted for in 'count' */ return true; case IEEE80211_TPE_LOCAL_EIRP_PSD: case IEEE80211_TPE_REG_CLIENT_EIRP_PSD: if (count > 4) return false; N = count ? 1 << (count - 1) : 1; needed += N; if (len < needed) return false; if (len > needed) { u8 K = u8_get_bits(env->variable[N], IEEE80211_TX_PWR_ENV_EXT_COUNT); needed += 1 + K; if (len < needed) return false; } return true; } return false; } /* * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size * @he_oper_ie: byte data of the He Operations IE, stating from the byte * after the ext ID byte. It is assumed that he_oper_ie has at least * sizeof(struct ieee80211_he_operation) bytes, the caller must have * validated this. * @return the actual size of the IE data (not including header), or 0 on error */ static inline u8 ieee80211_he_oper_size(const u8 *he_oper_ie) { const struct ieee80211_he_operation *he_oper = (const void *)he_oper_ie; u8 oper_len = sizeof(struct ieee80211_he_operation); u32 he_oper_params; /* Make sure the input is not NULL */ if (!he_oper_ie) return 0; /* Calc required length */ he_oper_params = le32_to_cpu(he_oper->he_oper_params); if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO) oper_len += 3; if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS) oper_len++; if (he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO) oper_len += sizeof(struct ieee80211_he_6ghz_oper); /* Add the first byte (extension ID) to the total length */ oper_len++; return oper_len; } /** * ieee80211_he_6ghz_oper - obtain 6 GHz operation field * @he_oper: HE operation element (must be pre-validated for size) * but may be %NULL * * Return: a pointer to the 6 GHz operation field, or %NULL */ static inline const struct ieee80211_he_6ghz_oper * ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper) { const u8 *ret; u32 he_oper_params; if (!he_oper) return NULL; ret = (const void *)&he_oper->optional; he_oper_params = le32_to_cpu(he_oper->he_oper_params); if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO)) return NULL; if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO) ret += 3; if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS) ret++; return (const void *)ret; } /* HE Spatial Reuse defines */ #define IEEE80211_HE_SPR_PSR_DISALLOWED BIT(0) #define IEEE80211_HE_SPR_NON_SRG_OBSS_PD_SR_DISALLOWED BIT(1) #define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT BIT(2) #define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT BIT(3) #define IEEE80211_HE_SPR_HESIGA_SR_VAL15_ALLOWED BIT(4) /* * ieee80211_he_spr_size - calculate 802.11ax HE Spatial Reuse IE size * @he_spr_ie: byte data of the He Spatial Reuse IE, stating from the byte * after the ext ID byte. It is assumed that he_spr_ie has at least * sizeof(struct ieee80211_he_spr) bytes, the caller must have validated * this * @return the actual size of the IE data (not including header), or 0 on error */ static inline u8 ieee80211_he_spr_size(const u8 *he_spr_ie) { const struct ieee80211_he_spr *he_spr = (const void *)he_spr_ie; u8 spr_len = sizeof(struct ieee80211_he_spr); u8 he_spr_params; /* Make sure the input is not NULL */ if (!he_spr_ie) return 0; /* Calc required length */ he_spr_params = he_spr->he_sr_control; if (he_spr_params & IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT) spr_len++; if (he_spr_params & IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT) spr_len += 18; /* Add the first byte (extension ID) to the total length */ spr_len++; return spr_len; } /* S1G Capabilities Information field */ #define IEEE80211_S1G_CAPABILITY_LEN 15 #define S1G_CAP0_S1G_LONG BIT(0) #define S1G_CAP0_SGI_1MHZ BIT(1) #define S1G_CAP0_SGI_2MHZ BIT(2) #define S1G_CAP0_SGI_4MHZ BIT(3) #define S1G_CAP0_SGI_8MHZ BIT(4) #define S1G_CAP0_SGI_16MHZ BIT(5) #define S1G_CAP0_SUPP_CH_WIDTH GENMASK(7, 6) #define S1G_SUPP_CH_WIDTH_2 0 #define S1G_SUPP_CH_WIDTH_4 1 #define S1G_SUPP_CH_WIDTH_8 2 #define S1G_SUPP_CH_WIDTH_16 3 #define S1G_SUPP_CH_WIDTH_MAX(cap) ((1 << FIELD_GET(S1G_CAP0_SUPP_CH_WIDTH, \ cap[0])) << 1) #define S1G_CAP1_RX_LDPC BIT(0) #define S1G_CAP1_TX_STBC BIT(1) #define S1G_CAP1_RX_STBC BIT(2) #define S1G_CAP1_SU_BFER BIT(3) #define S1G_CAP1_SU_BFEE BIT(4) #define S1G_CAP1_BFEE_STS GENMASK(7, 5) #define S1G_CAP2_SOUNDING_DIMENSIONS GENMASK(2, 0) #define S1G_CAP2_MU_BFER BIT(3) #define S1G_CAP2_MU_BFEE BIT(4) #define S1G_CAP2_PLUS_HTC_VHT BIT(5) #define S1G_CAP2_TRAVELING_PILOT GENMASK(7, 6) #define S1G_CAP3_RD_RESPONDER BIT(0) #define S1G_CAP3_HT_DELAYED_BA BIT(1) #define S1G_CAP3_MAX_MPDU_LEN BIT(2) #define S1G_CAP3_MAX_AMPDU_LEN_EXP GENMASK(4, 3) #define S1G_CAP3_MIN_MPDU_START GENMASK(7, 5) #define S1G_CAP4_UPLINK_SYNC BIT(0) #define S1G_CAP4_DYNAMIC_AID BIT(1) #define S1G_CAP4_BAT BIT(2) #define S1G_CAP4_TIME_ADE BIT(3) #define S1G_CAP4_NON_TIM BIT(4) #define S1G_CAP4_GROUP_AID BIT(5) #define S1G_CAP4_STA_TYPE GENMASK(7, 6) #define S1G_CAP5_CENT_AUTH_CONTROL BIT(0) #define S1G_CAP5_DIST_AUTH_CONTROL BIT(1) #define S1G_CAP5_AMSDU BIT(2) #define S1G_CAP5_AMPDU BIT(3) #define S1G_CAP5_ASYMMETRIC_BA BIT(4) #define S1G_CAP5_FLOW_CONTROL BIT(5) #define S1G_CAP5_SECTORIZED_BEAM GENMASK(7, 6) #define S1G_CAP6_OBSS_MITIGATION BIT(0) #define S1G_CAP6_FRAGMENT_BA BIT(1) #define S1G_CAP6_NDP_PS_POLL BIT(2) #define S1G_CAP6_RAW_OPERATION BIT(3) #define S1G_CAP6_PAGE_SLICING BIT(4) #define S1G_CAP6_TXOP_SHARING_IMP_ACK BIT(5) #define S1G_CAP6_VHT_LINK_ADAPT GENMASK(7, 6) #define S1G_CAP7_TACK_AS_PS_POLL BIT(0) #define S1G_CAP7_DUP_1MHZ BIT(1) #define S1G_CAP7_MCS_NEGOTIATION BIT(2) #define S1G_CAP7_1MHZ_CTL_RESPONSE_PREAMBLE BIT(3) #define S1G_CAP7_NDP_BFING_REPORT_POLL BIT(4) #define S1G_CAP7_UNSOLICITED_DYN_AID BIT(5) #define S1G_CAP7_SECTOR_TRAINING_OPERATION BIT(6) #define S1G_CAP7_TEMP_PS_MODE_SWITCH BIT(7) #define S1G_CAP8_TWT_GROUPING BIT(0) #define S1G_CAP8_BDT BIT(1) #define S1G_CAP8_COLOR GENMASK(4, 2) #define S1G_CAP8_TWT_REQUEST BIT(5) #define S1G_CAP8_TWT_RESPOND BIT(6) #define S1G_CAP8_PV1_FRAME BIT(7) #define S1G_CAP9_LINK_ADAPT_PER_CONTROL_RESPONSE BIT(0) #define S1G_OPER_CH_WIDTH_PRIMARY_1MHZ BIT(0) #define S1G_OPER_CH_WIDTH_OPER GENMASK(4, 1) /* EHT MAC capabilities as defined in P802.11be_D2.0 section 9.4.2.313.2 */ #define IEEE80211_EHT_MAC_CAP0_EPCS_PRIO_ACCESS 0x01 #define IEEE80211_EHT_MAC_CAP0_OM_CONTROL 0x02 #define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE1 0x04 #define IEEE80211_EHT_MAC_CAP0_TRIG_TXOP_SHARING_MODE2 0x08 #define IEEE80211_EHT_MAC_CAP0_RESTRICTED_TWT 0x10 #define IEEE80211_EHT_MAC_CAP0_SCS_TRAFFIC_DESC 0x20 #define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_MASK 0xc0 #define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_3895 0 #define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_7991 1 #define IEEE80211_EHT_MAC_CAP0_MAX_MPDU_LEN_11454 2 #define IEEE80211_EHT_MAC_CAP1_MAX_AMPDU_LEN_MASK 0x01 /* EHT PHY capabilities as defined in P802.11be_D2.0 section 9.4.2.313.3 */ #define IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ 0x02 #define IEEE80211_EHT_PHY_CAP0_242_TONE_RU_GT20MHZ 0x04 #define IEEE80211_EHT_PHY_CAP0_NDP_4_EHT_LFT_32_GI 0x08 #define IEEE80211_EHT_PHY_CAP0_PARTIAL_BW_UL_MU_MIMO 0x10 #define IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMER 0x20 #define IEEE80211_EHT_PHY_CAP0_SU_BEAMFORMEE 0x40 /* EHT beamformee number of spatial streams <= 80MHz is split */ #define IEEE80211_EHT_PHY_CAP0_BEAMFORMEE_SS_80MHZ_MASK 0x80 #define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_80MHZ_MASK 0x03 #define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_160MHZ_MASK 0x1c #define IEEE80211_EHT_PHY_CAP1_BEAMFORMEE_SS_320MHZ_MASK 0xe0 #define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_80MHZ_MASK 0x07 #define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_160MHZ_MASK 0x38 /* EHT number of sounding dimensions for 320MHz is split */ #define IEEE80211_EHT_PHY_CAP2_SOUNDING_DIM_320MHZ_MASK 0xc0 #define IEEE80211_EHT_PHY_CAP3_SOUNDING_DIM_320MHZ_MASK 0x01 #define IEEE80211_EHT_PHY_CAP3_NG_16_SU_FEEDBACK 0x02 #define IEEE80211_EHT_PHY_CAP3_NG_16_MU_FEEDBACK 0x04 #define IEEE80211_EHT_PHY_CAP3_CODEBOOK_4_2_SU_FDBK 0x08 #define IEEE80211_EHT_PHY_CAP3_CODEBOOK_7_5_MU_FDBK 0x10 #define IEEE80211_EHT_PHY_CAP3_TRIG_SU_BF_FDBK 0x20 #define IEEE80211_EHT_PHY_CAP3_TRIG_MU_BF_PART_BW_FDBK 0x40 #define IEEE80211_EHT_PHY_CAP3_TRIG_CQI_FDBK 0x80 #define IEEE80211_EHT_PHY_CAP4_PART_BW_DL_MU_MIMO 0x01 #define IEEE80211_EHT_PHY_CAP4_PSR_SR_SUPP 0x02 #define IEEE80211_EHT_PHY_CAP4_POWER_BOOST_FACT_SUPP 0x04 #define IEEE80211_EHT_PHY_CAP4_EHT_MU_PPDU_4_EHT_LTF_08_GI 0x08 #define IEEE80211_EHT_PHY_CAP4_MAX_NC_MASK 0xf0 #define IEEE80211_EHT_PHY_CAP5_NON_TRIG_CQI_FEEDBACK 0x01 #define IEEE80211_EHT_PHY_CAP5_TX_LESS_242_TONE_RU_SUPP 0x02 #define IEEE80211_EHT_PHY_CAP5_RX_LESS_242_TONE_RU_SUPP 0x04 #define IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT 0x08 #define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_MASK 0x30 #define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_0US 0 #define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_8US 1 #define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_16US 2 #define IEEE80211_EHT_PHY_CAP5_COMMON_NOMINAL_PKT_PAD_20US 3 /* Maximum number of supported EHT LTF is split */ #define IEEE80211_EHT_PHY_CAP5_MAX_NUM_SUPP_EHT_LTF_MASK 0xc0 #define IEEE80211_EHT_PHY_CAP5_SUPP_EXTRA_EHT_LTF 0x40 #define IEEE80211_EHT_PHY_CAP6_MAX_NUM_SUPP_EHT_LTF_MASK 0x07 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_80MHZ 0x08 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_160MHZ 0x30 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP6_MCS15_SUPP_MASK 0x78 #define IEEE80211_EHT_PHY_CAP6_EHT_DUP_6GHZ_SUPP 0x80 #define IEEE80211_EHT_PHY_CAP7_20MHZ_STA_RX_NDP_WIDER_BW 0x01 #define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_80MHZ 0x02 #define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_160MHZ 0x04 #define IEEE80211_EHT_PHY_CAP7_NON_OFDMA_UL_MU_MIMO_320MHZ 0x08 #define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_80MHZ 0x10 #define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_160MHZ 0x20 #define IEEE80211_EHT_PHY_CAP7_MU_BEAMFORMER_320MHZ 0x40 #define IEEE80211_EHT_PHY_CAP7_TB_SOUNDING_FDBK_RATE_LIMIT 0x80 #define IEEE80211_EHT_PHY_CAP8_RX_1024QAM_WIDER_BW_DL_OFDMA 0x01 #define IEEE80211_EHT_PHY_CAP8_RX_4096QAM_WIDER_BW_DL_OFDMA 0x02 /* * EHT operation channel width as defined in P802.11be_D2.0 section 9.4.2.311 */ #define IEEE80211_EHT_OPER_CHAN_WIDTH 0x7 #define IEEE80211_EHT_OPER_CHAN_WIDTH_20MHZ 0 #define IEEE80211_EHT_OPER_CHAN_WIDTH_40MHZ 1 #define IEEE80211_EHT_OPER_CHAN_WIDTH_80MHZ 2 #define IEEE80211_EHT_OPER_CHAN_WIDTH_160MHZ 3 #define IEEE80211_EHT_OPER_CHAN_WIDTH_320MHZ 4 /* Calculate 802.11be EHT capabilities IE Tx/Rx EHT MCS NSS Support Field size */ static inline u8 ieee80211_eht_mcs_nss_size(const struct ieee80211_he_cap_elem *he_cap, const struct ieee80211_eht_cap_elem_fixed *eht_cap, bool from_ap) { u8 count = 0; /* on 2.4 GHz, if it supports 40 MHz, the result is 3 */ if (he_cap->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G) return 3; /* on 2.4 GHz, these three bits are reserved, so should be 0 */ if (he_cap->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G) count += 3; if (he_cap->phy_cap_info[0] & IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) count += 3; if (eht_cap->phy_cap_info[0] & IEEE80211_EHT_PHY_CAP0_320MHZ_IN_6GHZ) count += 3; if (count) return count; return from_ap ? 3 : 4; } /* 802.11be EHT PPE Thresholds */ #define IEEE80211_EHT_PPE_THRES_NSS_POS 0 #define IEEE80211_EHT_PPE_THRES_NSS_MASK 0xf #define IEEE80211_EHT_PPE_THRES_RU_INDEX_BITMASK_MASK 0x1f0 #define IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE 3 #define IEEE80211_EHT_PPE_THRES_INFO_HEADER_SIZE 9 /* * Calculate 802.11be EHT capabilities IE EHT field size */ static inline u8 ieee80211_eht_ppe_size(u16 ppe_thres_hdr, const u8 *phy_cap_info) { u32 n; if (!(phy_cap_info[5] & IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT)) return 0; n = hweight16(ppe_thres_hdr & IEEE80211_EHT_PPE_THRES_RU_INDEX_BITMASK_MASK); n *= 1 + u16_get_bits(ppe_thres_hdr, IEEE80211_EHT_PPE_THRES_NSS_MASK); /* * Each pair is 6 bits, and we need to add the 9 "header" bits to the * total size. */ n = n * IEEE80211_EHT_PPE_THRES_INFO_PPET_SIZE * 2 + IEEE80211_EHT_PPE_THRES_INFO_HEADER_SIZE; return DIV_ROUND_UP(n, 8); } static inline bool ieee80211_eht_capa_size_ok(const u8 *he_capa, const u8 *data, u8 len, bool from_ap) { const struct ieee80211_eht_cap_elem_fixed *elem = (const void *)data; u8 needed = sizeof(struct ieee80211_eht_cap_elem_fixed); if (len < needed || !he_capa) return false; needed += ieee80211_eht_mcs_nss_size((const void *)he_capa, (const void *)data, from_ap); if (len < needed) return false; if (elem->phy_cap_info[5] & IEEE80211_EHT_PHY_CAP5_PPE_THRESHOLD_PRESENT) { u16 ppe_thres_hdr; if (len < needed + sizeof(ppe_thres_hdr)) return false; ppe_thres_hdr = get_unaligned_le16(data + needed); needed += ieee80211_eht_ppe_size(ppe_thres_hdr, elem->phy_cap_info); } return len >= needed; } static inline bool ieee80211_eht_oper_size_ok(const u8 *data, u8 len) { const struct ieee80211_eht_operation *elem = (const void *)data; u8 needed = sizeof(*elem); if (len < needed) return false; if (elem->params & IEEE80211_EHT_OPER_INFO_PRESENT) { needed += 3; if (elem->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT) needed += 2; } return len >= needed; } /* must validate ieee80211_eht_oper_size_ok() first */ static inline u16 ieee80211_eht_oper_dis_subchan_bitmap(const struct ieee80211_eht_operation *eht_oper) { const struct ieee80211_eht_operation_info *info = (const void *)eht_oper->optional; if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT)) return 0; if (!(eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) return 0; return get_unaligned_le16(info->optional); } #define IEEE80211_BW_IND_DIS_SUBCH_PRESENT BIT(1) struct ieee80211_bandwidth_indication { u8 params; struct ieee80211_eht_operation_info info; } __packed; static inline bool ieee80211_bandwidth_indication_size_ok(const u8 *data, u8 len) { const struct ieee80211_bandwidth_indication *bwi = (const void *)data; if (len < sizeof(*bwi)) return false; if (bwi->params & IEEE80211_BW_IND_DIS_SUBCH_PRESENT && len < sizeof(*bwi) + 2) return false; return true; } #define LISTEN_INT_USF GENMASK(15, 14) #define LISTEN_INT_UI GENMASK(13, 0) #define IEEE80211_MAX_USF FIELD_MAX(LISTEN_INT_USF) #define IEEE80211_MAX_UI FIELD_MAX(LISTEN_INT_UI) /* Authentication algorithms */ #define WLAN_AUTH_OPEN 0 #define WLAN_AUTH_SHARED_KEY 1 #define WLAN_AUTH_FT 2 #define WLAN_AUTH_SAE 3 #define WLAN_AUTH_FILS_SK 4 #define WLAN_AUTH_FILS_SK_PFS 5 #define WLAN_AUTH_FILS_PK 6 #define WLAN_AUTH_LEAP 128 #define WLAN_AUTH_CHALLENGE_LEN 128 #define WLAN_CAPABILITY_ESS (1<<0) #define WLAN_CAPABILITY_IBSS (1<<1) /* * A mesh STA sets the ESS and IBSS capability bits to zero. * however, this holds true for p2p probe responses (in the p2p_find * phase) as well. */ #define WLAN_CAPABILITY_IS_STA_BSS(cap) \ (!((cap) & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS))) #define WLAN_CAPABILITY_CF_POLLABLE (1<<2) #define WLAN_CAPABILITY_CF_POLL_REQUEST (1<<3) #define WLAN_CAPABILITY_PRIVACY (1<<4) #define WLAN_CAPABILITY_SHORT_PREAMBLE (1<<5) #define WLAN_CAPABILITY_PBCC (1<<6) #define WLAN_CAPABILITY_CHANNEL_AGILITY (1<<7) /* 802.11h */ #define WLAN_CAPABILITY_SPECTRUM_MGMT (1<<8) #define WLAN_CAPABILITY_QOS (1<<9) #define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10) #define WLAN_CAPABILITY_APSD (1<<11) #define WLAN_CAPABILITY_RADIO_MEASURE (1<<12) #define WLAN_CAPABILITY_DSSS_OFDM (1<<13) #define WLAN_CAPABILITY_DEL_BACK (1<<14) #define WLAN_CAPABILITY_IMM_BACK (1<<15) /* DMG (60gHz) 802.11ad */ /* type - bits 0..1 */ #define WLAN_CAPABILITY_DMG_TYPE_MASK (3<<0) #define WLAN_CAPABILITY_DMG_TYPE_IBSS (1<<0) /* Tx by: STA */ #define WLAN_CAPABILITY_DMG_TYPE_PBSS (2<<0) /* Tx by: PCP */ #define WLAN_CAPABILITY_DMG_TYPE_AP (3<<0) /* Tx by: AP */ #define WLAN_CAPABILITY_DMG_CBAP_ONLY (1<<2) #define WLAN_CAPABILITY_DMG_CBAP_SOURCE (1<<3) #define WLAN_CAPABILITY_DMG_PRIVACY (1<<4) #define WLAN_CAPABILITY_DMG_ECPAC (1<<5) #define WLAN_CAPABILITY_DMG_SPECTRUM_MGMT (1<<8) #define WLAN_CAPABILITY_DMG_RADIO_MEASURE (1<<12) /* measurement */ #define IEEE80211_SPCT_MSR_RPRT_MODE_LATE (1<<0) #define IEEE80211_SPCT_MSR_RPRT_MODE_INCAPABLE (1<<1) #define IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED (1<<2) #define IEEE80211_SPCT_MSR_RPRT_TYPE_BASIC 0 #define IEEE80211_SPCT_MSR_RPRT_TYPE_CCA 1 #define IEEE80211_SPCT_MSR_RPRT_TYPE_RPI 2 #define IEEE80211_SPCT_MSR_RPRT_TYPE_LCI 8 #define IEEE80211_SPCT_MSR_RPRT_TYPE_CIVIC 11 /* 802.11g ERP information element */ #define WLAN_ERP_NON_ERP_PRESENT (1<<0) #define WLAN_ERP_USE_PROTECTION (1<<1) #define WLAN_ERP_BARKER_PREAMBLE (1<<2) /* WLAN_ERP_BARKER_PREAMBLE values */ enum { WLAN_ERP_PREAMBLE_SHORT = 0, WLAN_ERP_PREAMBLE_LONG = 1, }; /* Band ID, 802.11ad #8.4.1.45 */ enum { IEEE80211_BANDID_TV_WS = 0, /* TV white spaces */ IEEE80211_BANDID_SUB1 = 1, /* Sub-1 GHz (excluding TV white spaces) */ IEEE80211_BANDID_2G = 2, /* 2.4 GHz */ IEEE80211_BANDID_3G = 3, /* 3.6 GHz */ IEEE80211_BANDID_5G = 4, /* 4.9 and 5 GHz */ IEEE80211_BANDID_60G = 5, /* 60 GHz */ }; /* Status codes */ enum ieee80211_statuscode { WLAN_STATUS_SUCCESS = 0, WLAN_STATUS_UNSPECIFIED_FAILURE = 1, WLAN_STATUS_CAPS_UNSUPPORTED = 10, WLAN_STATUS_REASSOC_NO_ASSOC = 11, WLAN_STATUS_ASSOC_DENIED_UNSPEC = 12, WLAN_STATUS_NOT_SUPPORTED_AUTH_ALG = 13, WLAN_STATUS_UNKNOWN_AUTH_TRANSACTION = 14, WLAN_STATUS_CHALLENGE_FAIL = 15, WLAN_STATUS_AUTH_TIMEOUT = 16, WLAN_STATUS_AP_UNABLE_TO_HANDLE_NEW_STA = 17, WLAN_STATUS_ASSOC_DENIED_RATES = 18, /* 802.11b */ WLAN_STATUS_ASSOC_DENIED_NOSHORTPREAMBLE = 19, WLAN_STATUS_ASSOC_DENIED_NOPBCC = 20, WLAN_STATUS_ASSOC_DENIED_NOAGILITY = 21, /* 802.11h */ WLAN_STATUS_ASSOC_DENIED_NOSPECTRUM = 22, WLAN_STATUS_ASSOC_REJECTED_BAD_POWER = 23, WLAN_STATUS_ASSOC_REJECTED_BAD_SUPP_CHAN = 24, /* 802.11g */ WLAN_STATUS_ASSOC_DENIED_NOSHORTTIME = 25, WLAN_STATUS_ASSOC_DENIED_NODSSSOFDM = 26, /* 802.11w */ WLAN_STATUS_ASSOC_REJECTED_TEMPORARILY = 30, WLAN_STATUS_ROBUST_MGMT_FRAME_POLICY_VIOLATION = 31, /* 802.11i */ WLAN_STATUS_INVALID_IE = 40, WLAN_STATUS_INVALID_GROUP_CIPHER = 41, WLAN_STATUS_INVALID_PAIRWISE_CIPHER = 42, WLAN_STATUS_INVALID_AKMP = 43, WLAN_STATUS_UNSUPP_RSN_VERSION = 44, WLAN_STATUS_INVALID_RSN_IE_CAP = 45, WLAN_STATUS_CIPHER_SUITE_REJECTED = 46, /* 802.11e */ WLAN_STATUS_UNSPECIFIED_QOS = 32, WLAN_STATUS_ASSOC_DENIED_NOBANDWIDTH = 33, WLAN_STATUS_ASSOC_DENIED_LOWACK = 34, WLAN_STATUS_ASSOC_DENIED_UNSUPP_QOS = 35, WLAN_STATUS_REQUEST_DECLINED = 37, WLAN_STATUS_INVALID_QOS_PARAM = 38, WLAN_STATUS_CHANGE_TSPEC = 39, WLAN_STATUS_WAIT_TS_DELAY = 47, WLAN_STATUS_NO_DIRECT_LINK = 48, WLAN_STATUS_STA_NOT_PRESENT = 49, WLAN_STATUS_STA_NOT_QSTA = 50, /* 802.11s */ WLAN_STATUS_ANTI_CLOG_REQUIRED = 76, WLAN_STATUS_FCG_NOT_SUPP = 78, WLAN_STATUS_STA_NO_TBTT = 78, /* 802.11ad */ WLAN_STATUS_REJECTED_WITH_SUGGESTED_CHANGES = 39, WLAN_STATUS_REJECTED_FOR_DELAY_PERIOD = 47, WLAN_STATUS_REJECT_WITH_SCHEDULE = 83, WLAN_STATUS_PENDING_ADMITTING_FST_SESSION = 86, WLAN_STATUS_PERFORMING_FST_NOW = 87, WLAN_STATUS_PENDING_GAP_IN_BA_WINDOW = 88, WLAN_STATUS_REJECT_U_PID_SETTING = 89, WLAN_STATUS_REJECT_DSE_BAND = 96, WLAN_STATUS_DENIED_WITH_SUGGESTED_BAND_AND_CHANNEL = 99, WLAN_STATUS_DENIED_DUE_TO_SPECTRUM_MANAGEMENT = 103, /* 802.11ai */ WLAN_STATUS_FILS_AUTHENTICATION_FAILURE = 108, WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109, WLAN_STATUS_SAE_HASH_TO_ELEMENT = 126, WLAN_STATUS_SAE_PK = 127, WLAN_STATUS_DENIED_TID_TO_LINK_MAPPING = 133, WLAN_STATUS_PREF_TID_TO_LINK_MAPPING_SUGGESTED = 134, }; /* Reason codes */ enum ieee80211_reasoncode { WLAN_REASON_UNSPECIFIED = 1, WLAN_REASON_PREV_AUTH_NOT_VALID = 2, WLAN_REASON_DEAUTH_LEAVING = 3, WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY = 4, WLAN_REASON_DISASSOC_AP_BUSY = 5, WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA = 6, WLAN_REASON_CLASS3_FRAME_FROM_NONASSOC_STA = 7, WLAN_REASON_DISASSOC_STA_HAS_LEFT = 8, WLAN_REASON_STA_REQ_ASSOC_WITHOUT_AUTH = 9, /* 802.11h */ WLAN_REASON_DISASSOC_BAD_POWER = 10, WLAN_REASON_DISASSOC_BAD_SUPP_CHAN = 11, /* 802.11i */ WLAN_REASON_INVALID_IE = 13, WLAN_REASON_MIC_FAILURE = 14, WLAN_REASON_4WAY_HANDSHAKE_TIMEOUT = 15, WLAN_REASON_GROUP_KEY_HANDSHAKE_TIMEOUT = 16, WLAN_REASON_IE_DIFFERENT = 17, WLAN_REASON_INVALID_GROUP_CIPHER = 18, WLAN_REASON_INVALID_PAIRWISE_CIPHER = 19, WLAN_REASON_INVALID_AKMP = 20, WLAN_REASON_UNSUPP_RSN_VERSION = 21, WLAN_REASON_INVALID_RSN_IE_CAP = 22, WLAN_REASON_IEEE8021X_FAILED = 23, WLAN_REASON_CIPHER_SUITE_REJECTED = 24, /* TDLS (802.11z) */ WLAN_REASON_TDLS_TEARDOWN_UNREACHABLE = 25, WLAN_REASON_TDLS_TEARDOWN_UNSPECIFIED = 26, /* 802.11e */ WLAN_REASON_DISASSOC_UNSPECIFIED_QOS = 32, WLAN_REASON_DISASSOC_QAP_NO_BANDWIDTH = 33, WLAN_REASON_DISASSOC_LOW_ACK = 34, WLAN_REASON_DISASSOC_QAP_EXCEED_TXOP = 35, WLAN_REASON_QSTA_LEAVE_QBSS = 36, WLAN_REASON_QSTA_NOT_USE = 37, WLAN_REASON_QSTA_REQUIRE_SETUP = 38, WLAN_REASON_QSTA_TIMEOUT = 39, WLAN_REASON_QSTA_CIPHER_NOT_SUPP = 45, /* 802.11s */ WLAN_REASON_MESH_PEER_CANCELED = 52, WLAN_REASON_MESH_MAX_PEERS = 53, WLAN_REASON_MESH_CONFIG = 54, WLAN_REASON_MESH_CLOSE = 55, WLAN_REASON_MESH_MAX_RETRIES = 56, WLAN_REASON_MESH_CONFIRM_TIMEOUT = 57, WLAN_REASON_MESH_INVALID_GTK = 58, WLAN_REASON_MESH_INCONSISTENT_PARAM = 59, WLAN_REASON_MESH_INVALID_SECURITY = 60, WLAN_REASON_MESH_PATH_ERROR = 61, WLAN_REASON_MESH_PATH_NOFORWARD = 62, WLAN_REASON_MESH_PATH_DEST_UNREACHABLE = 63, WLAN_REASON_MAC_EXISTS_IN_MBSS = 64, WLAN_REASON_MESH_CHAN_REGULATORY = 65, WLAN_REASON_MESH_CHAN = 66, }; /* Information Element IDs */ enum ieee80211_eid { WLAN_EID_SSID = 0, WLAN_EID_SUPP_RATES = 1, WLAN_EID_FH_PARAMS = 2, /* reserved now */ WLAN_EID_DS_PARAMS = 3, WLAN_EID_CF_PARAMS = 4, WLAN_EID_TIM = 5, WLAN_EID_IBSS_PARAMS = 6, WLAN_EID_COUNTRY = 7, /* 8, 9 reserved */ WLAN_EID_REQUEST = 10, WLAN_EID_QBSS_LOAD = 11, WLAN_EID_EDCA_PARAM_SET = 12, WLAN_EID_TSPEC = 13, WLAN_EID_TCLAS = 14, WLAN_EID_SCHEDULE = 15, WLAN_EID_CHALLENGE = 16, /* 17-31 reserved for challenge text extension */ WLAN_EID_PWR_CONSTRAINT = 32, WLAN_EID_PWR_CAPABILITY = 33, WLAN_EID_TPC_REQUEST = 34, WLAN_EID_TPC_REPORT = 35, WLAN_EID_SUPPORTED_CHANNELS = 36, WLAN_EID_CHANNEL_SWITCH = 37, WLAN_EID_MEASURE_REQUEST = 38, WLAN_EID_MEASURE_REPORT = 39, WLAN_EID_QUIET = 40, WLAN_EID_IBSS_DFS = 41, WLAN_EID_ERP_INFO = 42, WLAN_EID_TS_DELAY = 43, WLAN_EID_TCLAS_PROCESSING = 44, WLAN_EID_HT_CAPABILITY = 45, WLAN_EID_QOS_CAPA = 46, /* 47 reserved for Broadcom */ WLAN_EID_RSN = 48, WLAN_EID_802_15_COEX = 49, WLAN_EID_EXT_SUPP_RATES = 50, WLAN_EID_AP_CHAN_REPORT = 51, WLAN_EID_NEIGHBOR_REPORT = 52, WLAN_EID_RCPI = 53, WLAN_EID_MOBILITY_DOMAIN = 54, WLAN_EID_FAST_BSS_TRANSITION = 55, WLAN_EID_TIMEOUT_INTERVAL = 56, WLAN_EID_RIC_DATA = 57, WLAN_EID_DSE_REGISTERED_LOCATION = 58, WLAN_EID_SUPPORTED_REGULATORY_CLASSES = 59, WLAN_EID_EXT_CHANSWITCH_ANN = 60, WLAN_EID_HT_OPERATION = 61, WLAN_EID_SECONDARY_CHANNEL_OFFSET = 62, WLAN_EID_BSS_AVG_ACCESS_DELAY = 63, WLAN_EID_ANTENNA_INFO = 64, WLAN_EID_RSNI = 65, WLAN_EID_MEASUREMENT_PILOT_TX_INFO = 66, WLAN_EID_BSS_AVAILABLE_CAPACITY = 67, WLAN_EID_BSS_AC_ACCESS_DELAY = 68, WLAN_EID_TIME_ADVERTISEMENT = 69, WLAN_EID_RRM_ENABLED_CAPABILITIES = 70, WLAN_EID_MULTIPLE_BSSID = 71, WLAN_EID_BSS_COEX_2040 = 72, WLAN_EID_BSS_INTOLERANT_CHL_REPORT = 73, WLAN_EID_OVERLAP_BSS_SCAN_PARAM = 74, WLAN_EID_RIC_DESCRIPTOR = 75, WLAN_EID_MMIE = 76, WLAN_EID_ASSOC_COMEBACK_TIME = 77, WLAN_EID_EVENT_REQUEST = 78, WLAN_EID_EVENT_REPORT = 79, WLAN_EID_DIAGNOSTIC_REQUEST = 80, WLAN_EID_DIAGNOSTIC_REPORT = 81, WLAN_EID_LOCATION_PARAMS = 82, WLAN_EID_NON_TX_BSSID_CAP = 83, WLAN_EID_SSID_LIST = 84, WLAN_EID_MULTI_BSSID_IDX = 85, WLAN_EID_FMS_DESCRIPTOR = 86, WLAN_EID_FMS_REQUEST = 87, WLAN_EID_FMS_RESPONSE = 88, WLAN_EID_QOS_TRAFFIC_CAPA = 89, WLAN_EID_BSS_MAX_IDLE_PERIOD = 90, WLAN_EID_TSF_REQUEST = 91, WLAN_EID_TSF_RESPOSNE = 92, WLAN_EID_WNM_SLEEP_MODE = 93, WLAN_EID_TIM_BCAST_REQ = 94, WLAN_EID_TIM_BCAST_RESP = 95, WLAN_EID_COLL_IF_REPORT = 96, WLAN_EID_CHANNEL_USAGE = 97, WLAN_EID_TIME_ZONE = 98, WLAN_EID_DMS_REQUEST = 99, WLAN_EID_DMS_RESPONSE = 100, WLAN_EID_LINK_ID = 101, WLAN_EID_WAKEUP_SCHEDUL = 102, /* 103 reserved */ WLAN_EID_CHAN_SWITCH_TIMING = 104, WLAN_EID_PTI_CONTROL = 105, WLAN_EID_PU_BUFFER_STATUS = 106, WLAN_EID_INTERWORKING = 107, WLAN_EID_ADVERTISEMENT_PROTOCOL = 108, WLAN_EID_EXPEDITED_BW_REQ = 109, WLAN_EID_QOS_MAP_SET = 110, WLAN_EID_ROAMING_CONSORTIUM = 111, WLAN_EID_EMERGENCY_ALERT = 112, WLAN_EID_MESH_CONFIG = 113, WLAN_EID_MESH_ID = 114, WLAN_EID_LINK_METRIC_REPORT = 115, WLAN_EID_CONGESTION_NOTIFICATION = 116, WLAN_EID_PEER_MGMT = 117, WLAN_EID_CHAN_SWITCH_PARAM = 118, WLAN_EID_MESH_AWAKE_WINDOW = 119, WLAN_EID_BEACON_TIMING = 120, WLAN_EID_MCCAOP_SETUP_REQ = 121, WLAN_EID_MCCAOP_SETUP_RESP = 122, WLAN_EID_MCCAOP_ADVERT = 123, WLAN_EID_MCCAOP_TEARDOWN = 124, WLAN_EID_GANN = 125, WLAN_EID_RANN = 126, WLAN_EID_EXT_CAPABILITY = 127, /* 128, 129 reserved for Agere */ WLAN_EID_PREQ = 130, WLAN_EID_PREP = 131, WLAN_EID_PERR = 132, /* 133-136 reserved for Cisco */ WLAN_EID_PXU = 137, WLAN_EID_PXUC = 138, WLAN_EID_AUTH_MESH_PEER_EXCH = 139, WLAN_EID_MIC = 140, WLAN_EID_DESTINATION_URI = 141, WLAN_EID_UAPSD_COEX = 142, WLAN_EID_WAKEUP_SCHEDULE = 143, WLAN_EID_EXT_SCHEDULE = 144, WLAN_EID_STA_AVAILABILITY = 145, WLAN_EID_DMG_TSPEC = 146, WLAN_EID_DMG_AT = 147, WLAN_EID_DMG_CAP = 148, /* 149 reserved for Cisco */ WLAN_EID_CISCO_VENDOR_SPECIFIC = 150, WLAN_EID_DMG_OPERATION = 151, WLAN_EID_DMG_BSS_PARAM_CHANGE = 152, WLAN_EID_DMG_BEAM_REFINEMENT = 153, WLAN_EID_CHANNEL_MEASURE_FEEDBACK = 154, /* 155-156 reserved for Cisco */ WLAN_EID_AWAKE_WINDOW = 157, WLAN_EID_MULTI_BAND = 158, WLAN_EID_ADDBA_EXT = 159, WLAN_EID_NEXT_PCP_LIST = 160, WLAN_EID_PCP_HANDOVER = 161, WLAN_EID_DMG_LINK_MARGIN = 162, WLAN_EID_SWITCHING_STREAM = 163, WLAN_EID_SESSION_TRANSITION = 164, WLAN_EID_DYN_TONE_PAIRING_REPORT = 165, WLAN_EID_CLUSTER_REPORT = 166, WLAN_EID_RELAY_CAP = 167, WLAN_EID_RELAY_XFER_PARAM_SET = 168, WLAN_EID_BEAM_LINK_MAINT = 169, WLAN_EID_MULTIPLE_MAC_ADDR = 170, WLAN_EID_U_PID = 171, WLAN_EID_DMG_LINK_ADAPT_ACK = 172, /* 173 reserved for Symbol */ WLAN_EID_MCCAOP_ADV_OVERVIEW = 174, WLAN_EID_QUIET_PERIOD_REQ = 175, /* 176 reserved for Symbol */ WLAN_EID_QUIET_PERIOD_RESP = 177, /* 178-179 reserved for Symbol */ /* 180 reserved for ISO/IEC 20011 */ WLAN_EID_EPAC_POLICY = 182, WLAN_EID_CLISTER_TIME_OFF = 183, WLAN_EID_INTER_AC_PRIO = 184, WLAN_EID_SCS_DESCRIPTOR = 185, WLAN_EID_QLOAD_REPORT = 186, WLAN_EID_HCCA_TXOP_UPDATE_COUNT = 187, WLAN_EID_HL_STREAM_ID = 188, WLAN_EID_GCR_GROUP_ADDR = 189, WLAN_EID_ANTENNA_SECTOR_ID_PATTERN = 190, WLAN_EID_VHT_CAPABILITY = 191, WLAN_EID_VHT_OPERATION = 192, WLAN_EID_EXTENDED_BSS_LOAD = 193, WLAN_EID_WIDE_BW_CHANNEL_SWITCH = 194, WLAN_EID_TX_POWER_ENVELOPE = 195, WLAN_EID_CHANNEL_SWITCH_WRAPPER = 196, WLAN_EID_AID = 197, WLAN_EID_QUIET_CHANNEL = 198, WLAN_EID_OPMODE_NOTIF = 199, WLAN_EID_REDUCED_NEIGHBOR_REPORT = 201, WLAN_EID_AID_REQUEST = 210, WLAN_EID_AID_RESPONSE = 211, WLAN_EID_S1G_BCN_COMPAT = 213, WLAN_EID_S1G_SHORT_BCN_INTERVAL = 214, WLAN_EID_S1G_TWT = 216, WLAN_EID_S1G_CAPABILITIES = 217, WLAN_EID_VENDOR_SPECIFIC = 221, WLAN_EID_QOS_PARAMETER = 222, WLAN_EID_S1G_OPERATION = 232, WLAN_EID_CAG_NUMBER = 237, WLAN_EID_AP_CSN = 239, WLAN_EID_FILS_INDICATION = 240, WLAN_EID_DILS = 241, WLAN_EID_FRAGMENT = 242, WLAN_EID_RSNX = 244, WLAN_EID_EXTENSION = 255 }; /* Element ID Extensions for Element ID 255 */ enum ieee80211_eid_ext { WLAN_EID_EXT_ASSOC_DELAY_INFO = 1, WLAN_EID_EXT_FILS_REQ_PARAMS = 2, WLAN_EID_EXT_FILS_KEY_CONFIRM = 3, WLAN_EID_EXT_FILS_SESSION = 4, WLAN_EID_EXT_FILS_HLP_CONTAINER = 5, WLAN_EID_EXT_FILS_IP_ADDR_ASSIGN = 6, WLAN_EID_EXT_KEY_DELIVERY = 7, WLAN_EID_EXT_FILS_WRAPPED_DATA = 8, WLAN_EID_EXT_FILS_PUBLIC_KEY = 12, WLAN_EID_EXT_FILS_NONCE = 13, WLAN_EID_EXT_FUTURE_CHAN_GUIDANCE = 14, WLAN_EID_EXT_HE_CAPABILITY = 35, WLAN_EID_EXT_HE_OPERATION = 36, WLAN_EID_EXT_UORA = 37, WLAN_EID_EXT_HE_MU_EDCA = 38, WLAN_EID_EXT_HE_SPR = 39, WLAN_EID_EXT_NDP_FEEDBACK_REPORT_PARAMSET = 41, WLAN_EID_EXT_BSS_COLOR_CHG_ANN = 42, WLAN_EID_EXT_QUIET_TIME_PERIOD_SETUP = 43, WLAN_EID_EXT_ESS_REPORT = 45, WLAN_EID_EXT_OPS = 46, WLAN_EID_EXT_HE_BSS_LOAD = 47, WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME = 52, WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION = 55, WLAN_EID_EXT_NON_INHERITANCE = 56, WLAN_EID_EXT_KNOWN_BSSID = 57, WLAN_EID_EXT_SHORT_SSID_LIST = 58, WLAN_EID_EXT_HE_6GHZ_CAPA = 59, WLAN_EID_EXT_UL_MU_POWER_CAPA = 60, WLAN_EID_EXT_EHT_OPERATION = 106, WLAN_EID_EXT_EHT_MULTI_LINK = 107, WLAN_EID_EXT_EHT_CAPABILITY = 108, WLAN_EID_EXT_TID_TO_LINK_MAPPING = 109, WLAN_EID_EXT_BANDWIDTH_INDICATION = 135, }; /* Action category code */ enum ieee80211_category { WLAN_CATEGORY_SPECTRUM_MGMT = 0, WLAN_CATEGORY_QOS = 1, WLAN_CATEGORY_DLS = 2, WLAN_CATEGORY_BACK = 3, WLAN_CATEGORY_PUBLIC = 4, WLAN_CATEGORY_RADIO_MEASUREMENT = 5, WLAN_CATEGORY_FAST_BBS_TRANSITION = 6, WLAN_CATEGORY_HT = 7, WLAN_CATEGORY_SA_QUERY = 8, WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9, WLAN_CATEGORY_WNM = 10, WLAN_CATEGORY_WNM_UNPROTECTED = 11, WLAN_CATEGORY_TDLS = 12, WLAN_CATEGORY_MESH_ACTION = 13, WLAN_CATEGORY_MULTIHOP_ACTION = 14, WLAN_CATEGORY_SELF_PROTECTED = 15, WLAN_CATEGORY_DMG = 16, WLAN_CATEGORY_WMM = 17, WLAN_CATEGORY_FST = 18, WLAN_CATEGORY_UNPROT_DMG = 20, WLAN_CATEGORY_VHT = 21, WLAN_CATEGORY_S1G = 22, WLAN_CATEGORY_PROTECTED_EHT = 37, WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126, WLAN_CATEGORY_VENDOR_SPECIFIC = 127, }; /* SPECTRUM_MGMT action code */ enum ieee80211_spectrum_mgmt_actioncode { WLAN_ACTION_SPCT_MSR_REQ = 0, WLAN_ACTION_SPCT_MSR_RPRT = 1, WLAN_ACTION_SPCT_TPC_REQ = 2, WLAN_ACTION_SPCT_TPC_RPRT = 3, WLAN_ACTION_SPCT_CHL_SWITCH = 4, }; /* HT action codes */ enum ieee80211_ht_actioncode { WLAN_HT_ACTION_NOTIFY_CHANWIDTH = 0, WLAN_HT_ACTION_SMPS = 1, WLAN_HT_ACTION_PSMP = 2, WLAN_HT_ACTION_PCO_PHASE = 3, WLAN_HT_ACTION_CSI = 4, WLAN_HT_ACTION_NONCOMPRESSED_BF = 5, WLAN_HT_ACTION_COMPRESSED_BF = 6, WLAN_HT_ACTION_ASEL_IDX_FEEDBACK = 7, }; /* VHT action codes */ enum ieee80211_vht_actioncode { WLAN_VHT_ACTION_COMPRESSED_BF = 0, WLAN_VHT_ACTION_GROUPID_MGMT = 1, WLAN_VHT_ACTION_OPMODE_NOTIF = 2, }; /* Self Protected Action codes */ enum ieee80211_self_protected_actioncode { WLAN_SP_RESERVED = 0, WLAN_SP_MESH_PEERING_OPEN = 1, WLAN_SP_MESH_PEERING_CONFIRM = 2, WLAN_SP_MESH_PEERING_CLOSE = 3, WLAN_SP_MGK_INFORM = 4, WLAN_SP_MGK_ACK = 5, }; /* Mesh action codes */ enum ieee80211_mesh_actioncode { WLAN_MESH_ACTION_LINK_METRIC_REPORT, WLAN_MESH_ACTION_HWMP_PATH_SELECTION, WLAN_MESH_ACTION_GATE_ANNOUNCEMENT, WLAN_MESH_ACTION_CONGESTION_CONTROL_NOTIFICATION, WLAN_MESH_ACTION_MCCA_SETUP_REQUEST, WLAN_MESH_ACTION_MCCA_SETUP_REPLY, WLAN_MESH_ACTION_MCCA_ADVERTISEMENT_REQUEST, WLAN_MESH_ACTION_MCCA_ADVERTISEMENT, WLAN_MESH_ACTION_MCCA_TEARDOWN, WLAN_MESH_ACTION_TBTT_ADJUSTMENT_REQUEST, WLAN_MESH_ACTION_TBTT_ADJUSTMENT_RESPONSE, }; /* Unprotected WNM action codes */ enum ieee80211_unprotected_wnm_actioncode { WLAN_UNPROTECTED_WNM_ACTION_TIM = 0, WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE = 1, }; /* Protected EHT action codes */ enum ieee80211_protected_eht_actioncode { WLAN_PROTECTED_EHT_ACTION_TTLM_REQ = 0, WLAN_PROTECTED_EHT_ACTION_TTLM_RES = 1, WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN = 2, }; /* Security key length */ enum ieee80211_key_len { WLAN_KEY_LEN_WEP40 = 5, WLAN_KEY_LEN_WEP104 = 13, WLAN_KEY_LEN_CCMP = 16, WLAN_KEY_LEN_CCMP_256 = 32, WLAN_KEY_LEN_TKIP = 32, WLAN_KEY_LEN_AES_CMAC = 16, WLAN_KEY_LEN_SMS4 = 32, WLAN_KEY_LEN_GCMP = 16, WLAN_KEY_LEN_GCMP_256 = 32, WLAN_KEY_LEN_BIP_CMAC_256 = 32, WLAN_KEY_LEN_BIP_GMAC_128 = 16, WLAN_KEY_LEN_BIP_GMAC_256 = 32, }; enum ieee80211_s1g_actioncode { WLAN_S1G_AID_SWITCH_REQUEST, WLAN_S1G_AID_SWITCH_RESPONSE, WLAN_S1G_SYNC_CONTROL, WLAN_S1G_STA_INFO_ANNOUNCE, WLAN_S1G_EDCA_PARAM_SET, WLAN_S1G_EL_OPERATION, WLAN_S1G_TWT_SETUP, WLAN_S1G_TWT_TEARDOWN, WLAN_S1G_SECT_GROUP_ID_LIST, WLAN_S1G_SECT_ID_FEEDBACK, WLAN_S1G_TWT_INFORMATION = 11, }; #define IEEE80211_WEP_IV_LEN 4 #define IEEE80211_WEP_ICV_LEN 4 #define IEEE80211_CCMP_HDR_LEN 8 #define IEEE80211_CCMP_MIC_LEN 8 #define IEEE80211_CCMP_PN_LEN 6 #define IEEE80211_CCMP_256_HDR_LEN 8 #define IEEE80211_CCMP_256_MIC_LEN 16 #define IEEE80211_CCMP_256_PN_LEN 6 #define IEEE80211_TKIP_IV_LEN 8 #define IEEE80211_TKIP_ICV_LEN 4 #define IEEE80211_CMAC_PN_LEN 6 #define IEEE80211_GMAC_PN_LEN 6 #define IEEE80211_GCMP_HDR_LEN 8 #define IEEE80211_GCMP_MIC_LEN 16 #define IEEE80211_GCMP_PN_LEN 6 #define FILS_NONCE_LEN 16 #define FILS_MAX_KEK_LEN 64 #define FILS_ERP_MAX_USERNAME_LEN 16 #define FILS_ERP_MAX_REALM_LEN 253 #define FILS_ERP_MAX_RRK_LEN 64 #define PMK_MAX_LEN 64 #define SAE_PASSWORD_MAX_LEN 128 /* Public action codes (IEEE Std 802.11-2016, 9.6.8.1, Table 9-307) */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_20_40_BSS_COEX = 0, WLAN_PUB_ACTION_DSE_ENABLEMENT = 1, WLAN_PUB_ACTION_DSE_DEENABLEMENT = 2, WLAN_PUB_ACTION_DSE_REG_LOC_ANN = 3, WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, WLAN_PUB_ACTION_DSE_MSMT_REQ = 5, WLAN_PUB_ACTION_DSE_MSMT_RESP = 6, WLAN_PUB_ACTION_MSMT_PILOT = 7, WLAN_PUB_ACTION_DSE_PC = 8, WLAN_PUB_ACTION_VENDOR_SPECIFIC = 9, WLAN_PUB_ACTION_GAS_INITIAL_REQ = 10, WLAN_PUB_ACTION_GAS_INITIAL_RESP = 11, WLAN_PUB_ACTION_GAS_COMEBACK_REQ = 12, WLAN_PUB_ACTION_GAS_COMEBACK_RESP = 13, WLAN_PUB_ACTION_TDLS_DISCOVER_RES = 14, WLAN_PUB_ACTION_LOC_TRACK_NOTI = 15, WLAN_PUB_ACTION_QAB_REQUEST_FRAME = 16, WLAN_PUB_ACTION_QAB_RESPONSE_FRAME = 17, WLAN_PUB_ACTION_QMF_POLICY = 18, WLAN_PUB_ACTION_QMF_POLICY_CHANGE = 19, WLAN_PUB_ACTION_QLOAD_REQUEST = 20, WLAN_PUB_ACTION_QLOAD_REPORT = 21, WLAN_PUB_ACTION_HCCA_TXOP_ADVERT = 22, WLAN_PUB_ACTION_HCCA_TXOP_RESPONSE = 23, WLAN_PUB_ACTION_PUBLIC_KEY = 24, WLAN_PUB_ACTION_CHANNEL_AVAIL_QUERY = 25, WLAN_PUB_ACTION_CHANNEL_SCHEDULE_MGMT = 26, WLAN_PUB_ACTION_CONTACT_VERI_SIGNAL = 27, WLAN_PUB_ACTION_GDD_ENABLEMENT_REQ = 28, WLAN_PUB_ACTION_GDD_ENABLEMENT_RESP = 29, WLAN_PUB_ACTION_NETWORK_CHANNEL_CONTROL = 30, WLAN_PUB_ACTION_WHITE_SPACE_MAP_ANN = 31, WLAN_PUB_ACTION_FTM_REQUEST = 32, WLAN_PUB_ACTION_FTM_RESPONSE = 33, WLAN_PUB_ACTION_FILS_DISCOVERY = 34, }; /* TDLS action codes */ enum ieee80211_tdls_actioncode { WLAN_TDLS_SETUP_REQUEST = 0, WLAN_TDLS_SETUP_RESPONSE = 1, WLAN_TDLS_SETUP_CONFIRM = 2, WLAN_TDLS_TEARDOWN = 3, WLAN_TDLS_PEER_TRAFFIC_INDICATION = 4, WLAN_TDLS_CHANNEL_SWITCH_REQUEST = 5, WLAN_TDLS_CHANNEL_SWITCH_RESPONSE = 6, WLAN_TDLS_PEER_PSM_REQUEST = 7, WLAN_TDLS_PEER_PSM_RESPONSE = 8, WLAN_TDLS_PEER_TRAFFIC_RESPONSE = 9, WLAN_TDLS_DISCOVERY_REQUEST = 10, }; /* Extended Channel Switching capability to be set in the 1st byte of * the @WLAN_EID_EXT_CAPABILITY information element */ #define WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING BIT(2) /* Multiple BSSID capability is set in the 6th bit of 3rd byte of the * @WLAN_EID_EXT_CAPABILITY information element */ #define WLAN_EXT_CAPA3_MULTI_BSSID_SUPPORT BIT(6) /* Timing Measurement protocol for time sync is set in the 7th bit of 3rd byte * of the @WLAN_EID_EXT_CAPABILITY information element */ #define WLAN_EXT_CAPA3_TIMING_MEASUREMENT_SUPPORT BIT(7) /* TDLS capabilities in the 4th byte of @WLAN_EID_EXT_CAPABILITY */ #define WLAN_EXT_CAPA4_TDLS_BUFFER_STA BIT(4) #define WLAN_EXT_CAPA4_TDLS_PEER_PSM BIT(5) #define WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH BIT(6) /* Interworking capabilities are set in 7th bit of 4th byte of the * @WLAN_EID_EXT_CAPABILITY information element */ #define WLAN_EXT_CAPA4_INTERWORKING_ENABLED BIT(7) /* * TDLS capabililites to be enabled in the 5th byte of the * @WLAN_EID_EXT_CAPABILITY information element */ #define WLAN_EXT_CAPA5_TDLS_ENABLED BIT(5) #define WLAN_EXT_CAPA5_TDLS_PROHIBITED BIT(6) #define WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED BIT(7) #define WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED BIT(5) #define WLAN_EXT_CAPA8_OPMODE_NOTIF BIT(6) /* Defines the maximal number of MSDUs in an A-MSDU. */ #define WLAN_EXT_CAPA8_MAX_MSDU_IN_AMSDU_LSB BIT(7) #define WLAN_EXT_CAPA9_MAX_MSDU_IN_AMSDU_MSB BIT(0) /* * Fine Timing Measurement Initiator - bit 71 of @WLAN_EID_EXT_CAPABILITY * information element */ #define WLAN_EXT_CAPA9_FTM_INITIATOR BIT(7) /* Defines support for TWT Requester and TWT Responder */ #define WLAN_EXT_CAPA10_TWT_REQUESTER_SUPPORT BIT(5) #define WLAN_EXT_CAPA10_TWT_RESPONDER_SUPPORT BIT(6) /* * When set, indicates that the AP is able to tolerate 26-tone RU UL * OFDMA transmissions using HE TB PPDU from OBSS (not falsely classify the * 26-tone RU UL OFDMA transmissions as radar pulses). */ #define WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT BIT(7) /* Defines support for enhanced multi-bssid advertisement*/ #define WLAN_EXT_CAPA11_EMA_SUPPORT BIT(3) /* TDLS specific payload type in the LLC/SNAP header */ #define WLAN_TDLS_SNAP_RFTYPE 0x2 /* BSS Coex IE information field bits */ #define WLAN_BSS_COEX_INFORMATION_REQUEST BIT(0) /** * enum ieee80211_mesh_sync_method - mesh synchronization method identifier * * @IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET: the default synchronization method * @IEEE80211_SYNC_METHOD_VENDOR: a vendor specific synchronization method * that will be specified in a vendor specific information element */ enum ieee80211_mesh_sync_method { IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET = 1, IEEE80211_SYNC_METHOD_VENDOR = 255, }; /** * enum ieee80211_mesh_path_protocol - mesh path selection protocol identifier * * @IEEE80211_PATH_PROTOCOL_HWMP: the default path selection protocol * @IEEE80211_PATH_PROTOCOL_VENDOR: a vendor specific protocol that will * be specified in a vendor specific information element */ enum ieee80211_mesh_path_protocol { IEEE80211_PATH_PROTOCOL_HWMP = 1, IEEE80211_PATH_PROTOCOL_VENDOR = 255, }; /** * enum ieee80211_mesh_path_metric - mesh path selection metric identifier * * @IEEE80211_PATH_METRIC_AIRTIME: the default path selection metric * @IEEE80211_PATH_METRIC_VENDOR: a vendor specific metric that will be * specified in a vendor specific information element */ enum ieee80211_mesh_path_metric { IEEE80211_PATH_METRIC_AIRTIME = 1, IEEE80211_PATH_METRIC_VENDOR = 255, }; /** * enum ieee80211_root_mode_identifier - root mesh STA mode identifier * * These attribute are used by dot11MeshHWMPRootMode to set root mesh STA mode * * @IEEE80211_ROOTMODE_NO_ROOT: the mesh STA is not a root mesh STA (default) * @IEEE80211_ROOTMODE_ROOT: the mesh STA is a root mesh STA if greater than * this value * @IEEE80211_PROACTIVE_PREQ_NO_PREP: the mesh STA is a root mesh STA supports * the proactive PREQ with proactive PREP subfield set to 0 * @IEEE80211_PROACTIVE_PREQ_WITH_PREP: the mesh STA is a root mesh STA * supports the proactive PREQ with proactive PREP subfield set to 1 * @IEEE80211_PROACTIVE_RANN: the mesh STA is a root mesh STA supports * the proactive RANN */ enum ieee80211_root_mode_identifier { IEEE80211_ROOTMODE_NO_ROOT = 0, IEEE80211_ROOTMODE_ROOT = 1, IEEE80211_PROACTIVE_PREQ_NO_PREP = 2, IEEE80211_PROACTIVE_PREQ_WITH_PREP = 3, IEEE80211_PROACTIVE_RANN = 4, }; /* * IEEE 802.11-2007 7.3.2.9 Country information element * * Minimum length is 8 octets, ie len must be evenly * divisible by 2 */ /* Although the spec says 8 I'm seeing 6 in practice */ #define IEEE80211_COUNTRY_IE_MIN_LEN 6 /* The Country String field of the element shall be 3 octets in length */ #define IEEE80211_COUNTRY_STRING_LEN 3 /* * For regulatory extension stuff see IEEE 802.11-2007 * Annex I (page 1141) and Annex J (page 1147). Also * review 7.3.2.9. * * When dot11RegulatoryClassesRequired is true and the * first_channel/reg_extension_id is >= 201 then the IE * compromises of the 'ext' struct represented below: * * - Regulatory extension ID - when generating IE this just needs * to be monotonically increasing for each triplet passed in * the IE * - Regulatory class - index into set of rules * - Coverage class - index into air propagation time (Table 7-27), * in microseconds, you can compute the air propagation time from * the index by multiplying by 3, so index 10 yields a propagation * of 10 us. Valid values are 0-31, values 32-255 are not defined * yet. A value of 0 inicates air propagation of <= 1 us. * * See also Table I.2 for Emission limit sets and table * I.3 for Behavior limit sets. Table J.1 indicates how to map * a reg_class to an emission limit set and behavior limit set. */ #define IEEE80211_COUNTRY_EXTENSION_ID 201 /* * Channels numbers in the IE must be monotonically increasing * if dot11RegulatoryClassesRequired is not true. * * If dot11RegulatoryClassesRequired is true consecutive * subband triplets following a regulatory triplet shall * have monotonically increasing first_channel number fields. * * Channel numbers shall not overlap. * * Note that max_power is signed. */ struct ieee80211_country_ie_triplet { union { struct { u8 first_channel; u8 num_channels; s8 max_power; } __packed chans; struct { u8 reg_extension_id; u8 reg_class; u8 coverage_class; } __packed ext; }; } __packed; enum ieee80211_timeout_interval_type { WLAN_TIMEOUT_REASSOC_DEADLINE = 1 /* 802.11r */, WLAN_TIMEOUT_KEY_LIFETIME = 2 /* 802.11r */, WLAN_TIMEOUT_ASSOC_COMEBACK = 3 /* 802.11w */, }; /** * struct ieee80211_timeout_interval_ie - Timeout Interval element * @type: type, see &enum ieee80211_timeout_interval_type * @value: timeout interval value */ struct ieee80211_timeout_interval_ie { u8 type; __le32 value; } __packed; /** * enum ieee80211_idle_options - BSS idle options * @WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE: the station should send an RSN * protected frame to the AP to reset the idle timer at the AP for * the station. */ enum ieee80211_idle_options { WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE = BIT(0), }; /** * struct ieee80211_bss_max_idle_period_ie - BSS max idle period element struct * * This structure refers to "BSS Max idle period element" * * @max_idle_period: indicates the time period during which a station can * refrain from transmitting frames to its associated AP without being * disassociated. In units of 1000 TUs. * @idle_options: indicates the options associated with the BSS idle capability * as specified in &enum ieee80211_idle_options. */ struct ieee80211_bss_max_idle_period_ie { __le16 max_idle_period; u8 idle_options; } __packed; /* BACK action code */ enum ieee80211_back_actioncode { WLAN_ACTION_ADDBA_REQ = 0, WLAN_ACTION_ADDBA_RESP = 1, WLAN_ACTION_DELBA = 2, }; /* BACK (block-ack) parties */ enum ieee80211_back_parties { WLAN_BACK_RECIPIENT = 0, WLAN_BACK_INITIATOR = 1, }; /* SA Query action */ enum ieee80211_sa_query_action { WLAN_ACTION_SA_QUERY_REQUEST = 0, WLAN_ACTION_SA_QUERY_RESPONSE = 1, }; /** * struct ieee80211_bssid_index - multiple BSSID index element structure * * This structure refers to "Multiple BSSID-index element" * * @bssid_index: BSSID index * @dtim_period: optional, overrides transmitted BSS dtim period * @dtim_count: optional, overrides transmitted BSS dtim count */ struct ieee80211_bssid_index { u8 bssid_index; u8 dtim_period; u8 dtim_count; }; /** * struct ieee80211_multiple_bssid_configuration - multiple BSSID configuration * element structure * * This structure refers to "Multiple BSSID Configuration element" * * @bssid_count: total number of active BSSIDs in the set * @profile_periodicity: the least number of beacon frames need to be received * in order to discover all the nontransmitted BSSIDs in the set. */ struct ieee80211_multiple_bssid_configuration { u8 bssid_count; u8 profile_periodicity; }; #define SUITE(oui, id) (((oui) << 8) | (id)) /* cipher suite selectors */ #define WLAN_CIPHER_SUITE_USE_GROUP SUITE(0x000FAC, 0) #define WLAN_CIPHER_SUITE_WEP40 SUITE(0x000FAC, 1) #define WLAN_CIPHER_SUITE_TKIP SUITE(0x000FAC, 2) /* reserved: SUITE(0x000FAC, 3) */ #define WLAN_CIPHER_SUITE_CCMP SUITE(0x000FAC, 4) #define WLAN_CIPHER_SUITE_WEP104 SUITE(0x000FAC, 5) #define WLAN_CIPHER_SUITE_AES_CMAC SUITE(0x000FAC, 6) #define WLAN_CIPHER_SUITE_GCMP SUITE(0x000FAC, 8) #define WLAN_CIPHER_SUITE_GCMP_256 SUITE(0x000FAC, 9) #define WLAN_CIPHER_SUITE_CCMP_256 SUITE(0x000FAC, 10) #define WLAN_CIPHER_SUITE_BIP_GMAC_128 SUITE(0x000FAC, 11) #define WLAN_CIPHER_SUITE_BIP_GMAC_256 SUITE(0x000FAC, 12) #define WLAN_CIPHER_SUITE_BIP_CMAC_256 SUITE(0x000FAC, 13) #define WLAN_CIPHER_SUITE_SMS4 SUITE(0x001472, 1) /* AKM suite selectors */ #define WLAN_AKM_SUITE_8021X SUITE(0x000FAC, 1) #define WLAN_AKM_SUITE_PSK SUITE(0x000FAC, 2) #define WLAN_AKM_SUITE_FT_8021X SUITE(0x000FAC, 3) #define WLAN_AKM_SUITE_FT_PSK SUITE(0x000FAC, 4) #define WLAN_AKM_SUITE_8021X_SHA256 SUITE(0x000FAC, 5) #define WLAN_AKM_SUITE_PSK_SHA256 SUITE(0x000FAC, 6) #define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) #define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) #define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) #define WLAN_AKM_SUITE_AP_PEER_KEY SUITE(0x000FAC, 10) #define WLAN_AKM_SUITE_8021X_SUITE_B SUITE(0x000FAC, 11) #define WLAN_AKM_SUITE_8021X_SUITE_B_192 SUITE(0x000FAC, 12) #define WLAN_AKM_SUITE_FT_8021X_SHA384 SUITE(0x000FAC, 13) #define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) #define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) #define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) #define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) #define WLAN_AKM_SUITE_OWE SUITE(0x000FAC, 18) #define WLAN_AKM_SUITE_FT_PSK_SHA384 SUITE(0x000FAC, 19) #define WLAN_AKM_SUITE_PSK_SHA384 SUITE(0x000FAC, 20) #define WLAN_AKM_SUITE_WFA_DPP SUITE(WLAN_OUI_WFA, 2) #define WLAN_MAX_KEY_LEN 32 #define WLAN_PMK_NAME_LEN 16 #define WLAN_PMKID_LEN 16 #define WLAN_PMK_LEN_EAP_LEAP 16 #define WLAN_PMK_LEN 32 #define WLAN_PMK_LEN_SUITE_B_192 48 #define WLAN_OUI_WFA 0x506f9a #define WLAN_OUI_TYPE_WFA_P2P 9 #define WLAN_OUI_TYPE_WFA_DPP 0x1A #define WLAN_OUI_MICROSOFT 0x0050f2 #define WLAN_OUI_TYPE_MICROSOFT_WPA 1 #define WLAN_OUI_TYPE_MICROSOFT_WMM 2 #define WLAN_OUI_TYPE_MICROSOFT_WPS 4 #define WLAN_OUI_TYPE_MICROSOFT_TPC 8 /* * WMM/802.11e Tspec Element */ #define IEEE80211_WMM_IE_TSPEC_TID_MASK 0x0F #define IEEE80211_WMM_IE_TSPEC_TID_SHIFT 1 enum ieee80211_tspec_status_code { IEEE80211_TSPEC_STATUS_ADMISS_ACCEPTED = 0, IEEE80211_TSPEC_STATUS_ADDTS_INVAL_PARAMS = 0x1, }; struct ieee80211_tspec_ie { u8 element_id; u8 len; u8 oui[3]; u8 oui_type; u8 oui_subtype; u8 version; __le16 tsinfo; u8 tsinfo_resvd; __le16 nominal_msdu; __le16 max_msdu; __le32 min_service_int; __le32 max_service_int; __le32 inactivity_int; __le32 suspension_int; __le32 service_start_time; __le32 min_data_rate; __le32 mean_data_rate; __le32 peak_data_rate; __le32 max_burst_size; __le32 delay_bound; __le32 min_phy_rate; __le16 sba; __le16 medium_time; } __packed; struct ieee80211_he_6ghz_capa { /* uses IEEE80211_HE_6GHZ_CAP_* below */ __le16 capa; } __packed; /* HE 6 GHz band capabilities */ /* uses enum ieee80211_min_mpdu_spacing values */ #define IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START 0x0007 /* uses enum ieee80211_vht_max_ampdu_length_exp values */ #define IEEE80211_HE_6GHZ_CAP_MAX_AMPDU_LEN_EXP 0x0038 /* uses IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_* values */ #define IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN 0x00c0 /* WLAN_HT_CAP_SM_PS_* values */ #define IEEE80211_HE_6GHZ_CAP_SM_PS 0x0600 #define IEEE80211_HE_6GHZ_CAP_RD_RESPONDER 0x0800 #define IEEE80211_HE_6GHZ_CAP_RX_ANTPAT_CONS 0x1000 #define IEEE80211_HE_6GHZ_CAP_TX_ANTPAT_CONS 0x2000 /** * ieee80211_get_qos_ctl - get pointer to qos control bytes * @hdr: the frame * Return: a pointer to the QoS control field in the frame header * * The qos ctrl bytes come after the frame_control, duration, seq_num * and 3 or 4 addresses of length ETH_ALEN. Checks frame_control to choose * between struct ieee80211_qos_hdr_4addr and struct ieee80211_qos_hdr. */ static inline u8 *ieee80211_get_qos_ctl(struct ieee80211_hdr *hdr) { union { struct ieee80211_qos_hdr addr3; struct ieee80211_qos_hdr_4addr addr4; } *qos; qos = (void *)hdr; if (ieee80211_has_a4(qos->addr3.frame_control)) return (u8 *)&qos->addr4.qos_ctrl; else return (u8 *)&qos->addr3.qos_ctrl; } /** * ieee80211_get_tid - get qos TID * @hdr: the frame * Return: the TID from the QoS control field */ static inline u8 ieee80211_get_tid(struct ieee80211_hdr *hdr) { u8 *qc = ieee80211_get_qos_ctl(hdr); return qc[0] & IEEE80211_QOS_CTL_TID_MASK; } /** * ieee80211_get_SA - get pointer to SA * @hdr: the frame * Return: a pointer to the source address (SA) * * Given an 802.11 frame, this function returns the offset * to the source address (SA). It does not verify that the * header is long enough to contain the address, and the * header must be long enough to contain the frame control * field. */ static inline u8 *ieee80211_get_SA(struct ieee80211_hdr *hdr) { if (ieee80211_has_a4(hdr->frame_control)) return hdr->addr4; if (ieee80211_has_fromds(hdr->frame_control)) return hdr->addr3; return hdr->addr2; } /** * ieee80211_get_DA - get pointer to DA * @hdr: the frame * Return: a pointer to the destination address (DA) * * Given an 802.11 frame, this function returns the offset * to the destination address (DA). It does not verify that * the header is long enough to contain the address, and the * header must be long enough to contain the frame control * field. */ static inline u8 *ieee80211_get_DA(struct ieee80211_hdr *hdr) { if (ieee80211_has_tods(hdr->frame_control)) return hdr->addr3; else return hdr->addr1; } /** * ieee80211_is_bufferable_mmpdu - check if frame is bufferable MMPDU * @skb: the skb to check, starting with the 802.11 header * Return: whether or not the MMPDU is bufferable */ static inline bool ieee80211_is_bufferable_mmpdu(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; __le16 fc = mgmt->frame_control; /* * IEEE 802.11 REVme D2.0 definition of bufferable MMPDU; * note that this ignores the IBSS special case. */ if (!ieee80211_is_mgmt(fc)) return false; if (ieee80211_is_disassoc(fc) || ieee80211_is_deauth(fc)) return true; if (!ieee80211_is_action(fc)) return false; if (skb->len < offsetofend(typeof(*mgmt), u.action.u.ftm.action_code)) return true; /* action frame - additionally check for non-bufferable FTM */ if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC && mgmt->u.action.category != WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) return true; if (mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_REQUEST || mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_RESPONSE) return false; return true; } /** * _ieee80211_is_robust_mgmt_frame - check if frame is a robust management frame * @hdr: the frame (buffer must include at least the first octet of payload) * Return: whether or not the frame is a robust management frame */ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) { if (ieee80211_is_disassoc(hdr->frame_control) || ieee80211_is_deauth(hdr->frame_control)) return true; if (ieee80211_is_action(hdr->frame_control)) { u8 *category; /* * Action frames, excluding Public Action frames, are Robust * Management Frames. However, if we are looking at a Protected * frame, skip the check since the data may be encrypted and * the frame has already been found to be a Robust Management * Frame (by the other end). */ if (ieee80211_has_protected(hdr->frame_control)) return true; category = ((u8 *) hdr) + 24; return *category != WLAN_CATEGORY_PUBLIC && *category != WLAN_CATEGORY_HT && *category != WLAN_CATEGORY_WNM_UNPROTECTED && *category != WLAN_CATEGORY_SELF_PROTECTED && *category != WLAN_CATEGORY_UNPROT_DMG && *category != WLAN_CATEGORY_VHT && *category != WLAN_CATEGORY_S1G && *category != WLAN_CATEGORY_VENDOR_SPECIFIC; } return false; } /** * ieee80211_is_robust_mgmt_frame - check if skb contains a robust mgmt frame * @skb: the skb containing the frame, length will be checked * Return: whether or not the frame is a robust management frame */ static inline bool ieee80211_is_robust_mgmt_frame(struct sk_buff *skb) { if (skb->len < IEEE80211_MIN_ACTION_SIZE) return false; return _ieee80211_is_robust_mgmt_frame((void *)skb->data); } /** * ieee80211_is_public_action - check if frame is a public action frame * @hdr: the frame * @len: length of the frame * Return: whether or not the frame is a public action frame */ static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr, size_t len) { struct ieee80211_mgmt *mgmt = (void *)hdr; if (len < IEEE80211_MIN_ACTION_SIZE) return false; if (!ieee80211_is_action(hdr->frame_control)) return false; return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC; } /** * ieee80211_is_protected_dual_of_public_action - check if skb contains a * protected dual of public action management frame * @skb: the skb containing the frame, length will be checked * * Return: true if the skb contains a protected dual of public action * management frame, false otherwise. */ static inline bool ieee80211_is_protected_dual_of_public_action(struct sk_buff *skb) { u8 action; if (!ieee80211_is_public_action((void *)skb->data, skb->len) || skb->len < IEEE80211_MIN_ACTION_SIZE + 1) return false; action = *(u8 *)(skb->data + IEEE80211_MIN_ACTION_SIZE); return action != WLAN_PUB_ACTION_20_40_BSS_COEX && action != WLAN_PUB_ACTION_DSE_REG_LOC_ANN && action != WLAN_PUB_ACTION_MSMT_PILOT && action != WLAN_PUB_ACTION_TDLS_DISCOVER_RES && action != WLAN_PUB_ACTION_LOC_TRACK_NOTI && action != WLAN_PUB_ACTION_FTM_REQUEST && action != WLAN_PUB_ACTION_FTM_RESPONSE && action != WLAN_PUB_ACTION_FILS_DISCOVERY && action != WLAN_PUB_ACTION_VENDOR_SPECIFIC; } /** * _ieee80211_is_group_privacy_action - check if frame is a group addressed * privacy action frame * @hdr: the frame * Return: whether or not the frame is a group addressed privacy action frame */ static inline bool _ieee80211_is_group_privacy_action(struct ieee80211_hdr *hdr) { struct ieee80211_mgmt *mgmt = (void *)hdr; if (!ieee80211_is_action(hdr->frame_control) || !is_multicast_ether_addr(hdr->addr1)) return false; return mgmt->u.action.category == WLAN_CATEGORY_MESH_ACTION || mgmt->u.action.category == WLAN_CATEGORY_MULTIHOP_ACTION; } /** * ieee80211_is_group_privacy_action - check if frame is a group addressed * privacy action frame * @skb: the skb containing the frame, length will be checked * Return: whether or not the frame is a group addressed privacy action frame */ static inline bool ieee80211_is_group_privacy_action(struct sk_buff *skb) { if (skb->len < IEEE80211_MIN_ACTION_SIZE) return false; return _ieee80211_is_group_privacy_action((void *)skb->data); } /** * ieee80211_tu_to_usec - convert time units (TU) to microseconds * @tu: the TUs * Return: the time value converted to microseconds */ static inline unsigned long ieee80211_tu_to_usec(unsigned long tu) { return 1024 * tu; } /** * ieee80211_check_tim - check if AID bit is set in TIM * @tim: the TIM IE * @tim_len: length of the TIM IE * @aid: the AID to look for * Return: whether or not traffic is indicated in the TIM for the given AID */ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim, u8 tim_len, u16 aid) { u8 mask; u8 index, indexn1, indexn2; if (unlikely(!tim || tim_len < sizeof(*tim))) return false; aid &= 0x3fff; index = aid / 8; mask = 1 << (aid & 7); indexn1 = tim->bitmap_ctrl & 0xfe; indexn2 = tim_len + indexn1 - 4; if (index < indexn1 || index > indexn2) return false; index -= indexn1; return !!(tim->virtual_map[index] & mask); } /** * ieee80211_get_tdls_action - get TDLS action code * @skb: the skb containing the frame, length will not be checked * Return: the TDLS action code, or -1 if it's not an encapsulated TDLS action * frame * * This function assumes the frame is a data frame, and that the network header * is in the correct place. */ static inline int ieee80211_get_tdls_action(struct sk_buff *skb) { if (!skb_is_nonlinear(skb) && skb->len > (skb_network_offset(skb) + 2)) { /* Point to where the indication of TDLS should start */ const u8 *tdls_data = skb_network_header(skb) - 2; if (get_unaligned_be16(tdls_data) == ETH_P_TDLS && tdls_data[2] == WLAN_TDLS_SNAP_RFTYPE && tdls_data[3] == WLAN_CATEGORY_TDLS) return tdls_data[4]; } return -1; } /* convert time units */ #define TU_TO_JIFFIES(x) (usecs_to_jiffies((x) * 1024)) #define TU_TO_EXP_TIME(x) (jiffies + TU_TO_JIFFIES(x)) /* convert frequencies */ #define MHZ_TO_KHZ(freq) ((freq) * 1000) #define KHZ_TO_MHZ(freq) ((freq) / 1000) #define PR_KHZ(f) KHZ_TO_MHZ(f), f % 1000 #define KHZ_F "%d.%03d" /* convert powers */ #define DBI_TO_MBI(gain) ((gain) * 100) #define MBI_TO_DBI(gain) ((gain) / 100) #define DBM_TO_MBM(gain) ((gain) * 100) #define MBM_TO_DBM(gain) ((gain) / 100) /** * ieee80211_action_contains_tpc - checks if the frame contains TPC element * @skb: the skb containing the frame, length will be checked * Return: %true if the frame contains a TPC element, %false otherwise * * This function checks if it's either TPC report action frame or Link * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5 * and 8.5.7.5 accordingly. */ static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; if (!ieee80211_is_action(mgmt->frame_control)) return false; if (skb->len < IEEE80211_MIN_ACTION_SIZE + sizeof(mgmt->u.action.u.tpc_report)) return false; /* * TPC report - check that: * category = 0 (Spectrum Management) or 5 (Radio Measurement) * spectrum management action = 3 (TPC/Link Measurement report) * TPC report EID = 35 * TPC report element length = 2 * * The spectrum management's tpc_report struct is used here both for * parsing tpc_report and radio measurement's link measurement report * frame, since the relevant part is identical in both frames. */ if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT && mgmt->u.action.category != WLAN_CATEGORY_RADIO_MEASUREMENT) return false; /* both spectrum mgmt and link measurement have same action code */ if (mgmt->u.action.u.tpc_report.action_code != WLAN_ACTION_SPCT_TPC_RPRT) return false; if (mgmt->u.action.u.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT || mgmt->u.action.u.tpc_report.tpc_elem_length != sizeof(struct ieee80211_tpc_report_ie)) return false; return true; } /** * ieee80211_is_timing_measurement - check if frame is timing measurement response * @skb: the SKB to check * Return: whether or not the frame is a valid timing measurement response */ static inline bool ieee80211_is_timing_measurement(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; if (skb->len < IEEE80211_MIN_ACTION_SIZE) return false; if (!ieee80211_is_action(mgmt->frame_control)) return false; if (mgmt->u.action.category == WLAN_CATEGORY_WNM_UNPROTECTED && mgmt->u.action.u.wnm_timing_msr.action_code == WLAN_UNPROTECTED_WNM_ACTION_TIMING_MEASUREMENT_RESPONSE && skb->len >= offsetofend(typeof(*mgmt), u.action.u.wnm_timing_msr)) return true; return false; } /** * ieee80211_is_ftm - check if frame is FTM response * @skb: the SKB to check * Return: whether or not the frame is a valid FTM response action frame */ static inline bool ieee80211_is_ftm(struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; if (!ieee80211_is_public_action((void *)mgmt, skb->len)) return false; if (mgmt->u.action.u.ftm.action_code == WLAN_PUB_ACTION_FTM_RESPONSE && skb->len >= offsetofend(typeof(*mgmt), u.action.u.ftm)) return true; return false; } struct element { u8 id; u8 datalen; u8 data[]; } __packed; /* element iteration helpers */ #define for_each_element(_elem, _data, _datalen) \ for (_elem = (const struct element *)(_data); \ (const u8 *)(_data) + (_datalen) - (const u8 *)_elem >= \ (int)sizeof(*_elem) && \ (const u8 *)(_data) + (_datalen) - (const u8 *)_elem >= \ (int)sizeof(*_elem) + _elem->datalen; \ _elem = (const struct element *)(_elem->data + _elem->datalen)) #define for_each_element_id(element, _id, data, datalen) \ for_each_element(element, data, datalen) \ if (element->id == (_id)) #define for_each_element_extid(element, extid, _data, _datalen) \ for_each_element(element, _data, _datalen) \ if (element->id == WLAN_EID_EXTENSION && \ element->datalen > 0 && \ element->data[0] == (extid)) #define for_each_subelement(sub, element) \ for_each_element(sub, (element)->data, (element)->datalen) #define for_each_subelement_id(sub, id, element) \ for_each_element_id(sub, id, (element)->data, (element)->datalen) #define for_each_subelement_extid(sub, extid, element) \ for_each_element_extid(sub, extid, (element)->data, (element)->datalen) /** * for_each_element_completed - determine if element parsing consumed all data * @element: element pointer after for_each_element() or friends * @data: same data pointer as passed to for_each_element() or friends * @datalen: same data length as passed to for_each_element() or friends * Return: %true if all elements were iterated, %false otherwise; see notes * * This function returns %true if all the data was parsed or considered * while walking the elements. Only use this if your for_each_element() * loop cannot be broken out of, otherwise it always returns %false. * * If some data was malformed, this returns %false since the last parsed * element will not fill the whole remaining data. */ static inline bool for_each_element_completed(const struct element *element, const void *data, size_t datalen) { return (const u8 *)element == (const u8 *)data + datalen; } /* * RSNX Capabilities: * bits 0-3: Field length (n-1) */ #define WLAN_RSNX_CAPA_PROTECTED_TWT BIT(4) #define WLAN_RSNX_CAPA_SAE_H2E BIT(5) /* * reduced neighbor report, based on Draft P802.11ax_D6.1, * section 9.4.2.170 and accepted contributions. */ #define IEEE80211_AP_INFO_TBTT_HDR_TYPE 0x03 #define IEEE80211_AP_INFO_TBTT_HDR_FILTERED 0x04 #define IEEE80211_AP_INFO_TBTT_HDR_COLOC 0x08 #define IEEE80211_AP_INFO_TBTT_HDR_COUNT 0xF0 #define IEEE80211_TBTT_INFO_TYPE_TBTT 0 #define IEEE80211_TBTT_INFO_TYPE_MLD 1 #define IEEE80211_RNR_TBTT_PARAMS_OCT_RECOMMENDED 0x01 #define IEEE80211_RNR_TBTT_PARAMS_SAME_SSID 0x02 #define IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID 0x04 #define IEEE80211_RNR_TBTT_PARAMS_TRANSMITTED_BSSID 0x08 #define IEEE80211_RNR_TBTT_PARAMS_COLOC_ESS 0x10 #define IEEE80211_RNR_TBTT_PARAMS_PROBE_ACTIVE 0x20 #define IEEE80211_RNR_TBTT_PARAMS_COLOC_AP 0x40 #define IEEE80211_RNR_TBTT_PARAMS_PSD_NO_LIMIT 127 #define IEEE80211_RNR_TBTT_PARAMS_PSD_RESERVED -128 struct ieee80211_neighbor_ap_info { u8 tbtt_info_hdr; u8 tbtt_info_len; u8 op_class; u8 channel; } __packed; enum ieee80211_range_params_max_total_ltf { IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_4 = 0, IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_8, IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_16, IEEE80211_RANGE_PARAMS_MAX_TOTAL_LTF_UNSPECIFIED, }; /* * reduced neighbor report, based on Draft P802.11be_D3.0, * section 9.4.2.170.2. */ struct ieee80211_rnr_mld_params { u8 mld_id; __le16 params; } __packed; #define IEEE80211_RNR_MLD_PARAMS_LINK_ID 0x000F #define IEEE80211_RNR_MLD_PARAMS_BSS_CHANGE_COUNT 0x0FF0 #define IEEE80211_RNR_MLD_PARAMS_UPDATES_INCLUDED 0x1000 #define IEEE80211_RNR_MLD_PARAMS_DISABLED_LINK 0x2000 /* Format of the TBTT information element if it has 7, 8 or 9 bytes */ struct ieee80211_tbtt_info_7_8_9 { u8 tbtt_offset; u8 bssid[ETH_ALEN]; /* The following element is optional, structure may not grow */ u8 bss_params; s8 psd_20; } __packed; /* Format of the TBTT information element if it has >= 11 bytes */ struct ieee80211_tbtt_info_ge_11 { u8 tbtt_offset; u8 bssid[ETH_ALEN]; __le32 short_ssid; /* The following elements are optional, structure may grow */ u8 bss_params; s8 psd_20; struct ieee80211_rnr_mld_params mld_params; } __packed; /* multi-link device */ #define IEEE80211_MLD_MAX_NUM_LINKS 15 #define IEEE80211_ML_CONTROL_TYPE 0x0007 #define IEEE80211_ML_CONTROL_TYPE_BASIC 0 #define IEEE80211_ML_CONTROL_TYPE_PREQ 1 #define IEEE80211_ML_CONTROL_TYPE_RECONF 2 #define IEEE80211_ML_CONTROL_TYPE_TDLS 3 #define IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS 4 #define IEEE80211_ML_CONTROL_PRESENCE_MASK 0xfff0 struct ieee80211_multi_link_elem { __le16 control; u8 variable[]; } __packed; #define IEEE80211_MLC_BASIC_PRES_LINK_ID 0x0010 #define IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT 0x0020 #define IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY 0x0040 #define IEEE80211_MLC_BASIC_PRES_EML_CAPA 0x0080 #define IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP 0x0100 #define IEEE80211_MLC_BASIC_PRES_MLD_ID 0x0200 #define IEEE80211_MED_SYNC_DELAY_DURATION 0x00ff #define IEEE80211_MED_SYNC_DELAY_SYNC_OFDM_ED_THRESH 0x0f00 #define IEEE80211_MED_SYNC_DELAY_SYNC_MAX_NUM_TXOPS 0xf000 /* * Described in P802.11be_D3.0 * dot11MSDTimerDuration should default to 5484 (i.e. 171.375) * dot11MSDOFDMEDthreshold defaults to -72 (i.e. 0) * dot11MSDTXOPMAX defaults to 1 */ #define IEEE80211_MED_SYNC_DELAY_DEFAULT 0x10ac #define IEEE80211_EML_CAP_EMLSR_SUPP 0x0001 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY 0x000e #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_0US 0 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_32US 1 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_64US 2 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_128US 3 #define IEEE80211_EML_CAP_EMLSR_PADDING_DELAY_256US 4 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY 0x0070 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_0US 0 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_16US 1 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_32US 2 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_64US 3 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_128US 4 #define IEEE80211_EML_CAP_EMLSR_TRANSITION_DELAY_256US 5 #define IEEE80211_EML_CAP_EMLMR_SUPPORT 0x0080 #define IEEE80211_EML_CAP_EMLMR_DELAY 0x0700 #define IEEE80211_EML_CAP_EMLMR_DELAY_0US 0 #define IEEE80211_EML_CAP_EMLMR_DELAY_32US 1 #define IEEE80211_EML_CAP_EMLMR_DELAY_64US 2 #define IEEE80211_EML_CAP_EMLMR_DELAY_128US 3 #define IEEE80211_EML_CAP_EMLMR_DELAY_256US 4 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT 0x7800 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_0 0 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128US 1 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_256US 2 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_512US 3 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_1TU 4 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_2TU 5 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_4TU 6 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_8TU 7 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_16TU 8 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_32TU 9 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_64TU 10 #define IEEE80211_EML_CAP_TRANSITION_TIMEOUT_128TU 11 #define IEEE80211_MLD_CAP_OP_MAX_SIMUL_LINKS 0x000f #define IEEE80211_MLD_CAP_OP_SRS_SUPPORT 0x0010 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP 0x0060 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_NO_SUPP 0 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_SAME 1 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_RESERVED 2 #define IEEE80211_MLD_CAP_OP_TID_TO_LINK_MAP_NEG_SUPP_DIFF 3 #define IEEE80211_MLD_CAP_OP_FREQ_SEP_TYPE_IND 0x0f80 #define IEEE80211_MLD_CAP_OP_AAR_SUPPORT 0x1000 struct ieee80211_mle_basic_common_info { u8 len; u8 mld_mac_addr[ETH_ALEN]; u8 variable[]; } __packed; #define IEEE80211_MLC_PREQ_PRES_MLD_ID 0x0010 struct ieee80211_mle_preq_common_info { u8 len; u8 variable[]; } __packed; #define IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR 0x0010 /* no fixed fields in RECONF */ struct ieee80211_mle_tdls_common_info { u8 len; u8 ap_mld_mac_addr[ETH_ALEN]; } __packed; #define IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR 0x0010 /* no fixed fields in PRIO_ACCESS */ /** * ieee80211_mle_common_size - check multi-link element common size * @data: multi-link element, must already be checked for size using * ieee80211_mle_size_ok() * Return: the size of the multi-link element's "common" subfield */ static inline u8 ieee80211_mle_common_size(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); u8 common = 0; switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: case IEEE80211_ML_CONTROL_TYPE_PREQ: case IEEE80211_ML_CONTROL_TYPE_TDLS: case IEEE80211_ML_CONTROL_TYPE_RECONF: /* * The length is the first octet pointed by mle->variable so no * need to add anything */ break; case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR) common += ETH_ALEN; return common; default: WARN_ON(1); return 0; } return sizeof(*mle) + common + mle->variable[0]; } /** * ieee80211_mle_get_link_id - returns the link ID * @data: the basic multi link element * Return: the link ID, or -1 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline int ieee80211_mle_get_link_id(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* common points now at the beginning of ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_LINK_ID)) return -1; return *common; } /** * ieee80211_mle_get_bss_param_ch_cnt - returns the BSS parameter change count * @data: pointer to the basic multi link element * Return: the BSS Parameter Change Count field value, or -1 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline int ieee80211_mle_get_bss_param_ch_cnt(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* common points now at the beginning of ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT)) return -1; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; return *common; } /** * ieee80211_mle_get_eml_med_sync_delay - returns the medium sync delay * @data: pointer to the multi-link element * Return: the medium synchronization delay field value from the multi-link * element, or the default value (%IEEE80211_MED_SYNC_DELAY_DEFAULT) * if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline u16 ieee80211_mle_get_eml_med_sync_delay(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* common points now at the beginning of ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY)) return IEEE80211_MED_SYNC_DELAY_DEFAULT; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; return get_unaligned_le16(common); } /** * ieee80211_mle_get_eml_cap - returns the EML capability * @data: pointer to the multi-link element * Return: the EML capability field value from the multi-link element, * or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline u16 ieee80211_mle_get_eml_cap(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* common points now at the beginning of ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_EML_CAPA)) return 0; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) common += 2; return get_unaligned_le16(common); } /** * ieee80211_mle_get_mld_capa_op - returns the MLD capabilities and operations. * @data: pointer to the multi-link element * Return: the MLD capabilities and operations field value from the multi-link * element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline u16 ieee80211_mle_get_mld_capa_op(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* * common points now at the beginning of * ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP)) return 0; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) common += 2; return get_unaligned_le16(common); } /** * ieee80211_mle_get_mld_id - returns the MLD ID * @data: pointer to the multi-link element * Return: The MLD ID in the given multi-link element, or 0 if not present * * The element is assumed to be of the correct type (BASIC) and big enough, * this must be checked using ieee80211_mle_type_ok(). */ static inline u8 ieee80211_mle_get_mld_id(const u8 *data) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control = le16_to_cpu(mle->control); const u8 *common = mle->variable; /* * common points now at the beginning of * ieee80211_mle_basic_common_info */ common += sizeof(struct ieee80211_mle_basic_common_info); if (!(control & IEEE80211_MLC_BASIC_PRES_MLD_ID)) return 0; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) common += 2; return *common; } /** * ieee80211_mle_size_ok - validate multi-link element size * @data: pointer to the element data * @len: length of the containing element * Return: whether or not the multi-link element size is OK */ static inline bool ieee80211_mle_size_ok(const u8 *data, size_t len) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u8 fixed = sizeof(*mle); u8 common = 0; bool check_common_len = false; u16 control; if (!data || len < fixed) return false; control = le16_to_cpu(mle->control); switch (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE)) { case IEEE80211_ML_CONTROL_TYPE_BASIC: common += sizeof(struct ieee80211_mle_basic_common_info); check_common_len = true; if (control & IEEE80211_MLC_BASIC_PRES_LINK_ID) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_BSS_PARAM_CH_CNT) common += 1; if (control & IEEE80211_MLC_BASIC_PRES_MED_SYNC_DELAY) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_EML_CAPA) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_MLD_CAPA_OP) common += 2; if (control & IEEE80211_MLC_BASIC_PRES_MLD_ID) common += 1; break; case IEEE80211_ML_CONTROL_TYPE_PREQ: common += sizeof(struct ieee80211_mle_preq_common_info); if (control & IEEE80211_MLC_PREQ_PRES_MLD_ID) common += 1; check_common_len = true; break; case IEEE80211_ML_CONTROL_TYPE_RECONF: if (control & IEEE80211_MLC_RECONF_PRES_MLD_MAC_ADDR) common += ETH_ALEN; break; case IEEE80211_ML_CONTROL_TYPE_TDLS: common += sizeof(struct ieee80211_mle_tdls_common_info); check_common_len = true; break; case IEEE80211_ML_CONTROL_TYPE_PRIO_ACCESS: if (control & IEEE80211_MLC_PRIO_ACCESS_PRES_AP_MLD_MAC_ADDR) common += ETH_ALEN; break; default: /* we don't know this type */ return true; } if (len < fixed + common) return false; if (!check_common_len) return true; /* if present, common length is the first octet there */ return mle->variable[0] >= common; } /** * ieee80211_mle_type_ok - validate multi-link element type and size * @data: pointer to the element data * @type: expected type of the element * @len: length of the containing element * Return: whether or not the multi-link element type matches and size is OK */ static inline bool ieee80211_mle_type_ok(const u8 *data, u8 type, size_t len) { const struct ieee80211_multi_link_elem *mle = (const void *)data; u16 control; if (!ieee80211_mle_size_ok(data, len)) return false; control = le16_to_cpu(mle->control); if (u16_get_bits(control, IEEE80211_ML_CONTROL_TYPE) == type) return true; return false; } enum ieee80211_mle_subelems { IEEE80211_MLE_SUBELEM_PER_STA_PROFILE = 0, IEEE80211_MLE_SUBELEM_FRAGMENT = 254, }; #define IEEE80211_MLE_STA_CONTROL_LINK_ID 0x000f #define IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE 0x0010 #define IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT 0x0020 #define IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT 0x0040 #define IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT 0x0080 #define IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT 0x0100 #define IEEE80211_MLE_STA_CONTROL_NSTR_LINK_PAIR_PRESENT 0x0200 #define IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE 0x0400 #define IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT 0x0800 struct ieee80211_mle_per_sta_profile { __le16 control; u8 sta_info_len; u8 variable[]; } __packed; /** * ieee80211_mle_basic_sta_prof_size_ok - validate basic multi-link element sta * profile size * @data: pointer to the sub element data * @len: length of the containing sub element * Return: %true if the STA profile is large enough, %false otherwise */ static inline bool ieee80211_mle_basic_sta_prof_size_ok(const u8 *data, size_t len) { const struct ieee80211_mle_per_sta_profile *prof = (const void *)data; u16 control; u8 fixed = sizeof(*prof); u8 info_len = 1; if (len < fixed) return false; control = le16_to_cpu(prof->control); if (control & IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT) info_len += 6; if (control & IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT) info_len += 2; if (control & IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT) info_len += 8; if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT) info_len += 2; if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE && control & IEEE80211_MLE_STA_CONTROL_NSTR_LINK_PAIR_PRESENT) { if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) info_len += 2; else info_len += 1; } if (control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT) info_len += 1; return prof->sta_info_len >= info_len && fixed + prof->sta_info_len - 1 <= len; } /** * ieee80211_mle_basic_sta_prof_bss_param_ch_cnt - get per-STA profile BSS * parameter change count * @prof: the per-STA profile, having been checked with * ieee80211_mle_basic_sta_prof_size_ok() for the correct length * * Return: The BSS parameter change count value if present, 0 otherwise. */ static inline u8 ieee80211_mle_basic_sta_prof_bss_param_ch_cnt(const struct ieee80211_mle_per_sta_profile *prof) { u16 control = le16_to_cpu(prof->control); const u8 *pos = prof->variable; if (!(control & IEEE80211_MLE_STA_CONTROL_BSS_PARAM_CHANGE_CNT_PRESENT)) return 0; if (control & IEEE80211_MLE_STA_CONTROL_STA_MAC_ADDR_PRESENT) pos += 6; if (control & IEEE80211_MLE_STA_CONTROL_BEACON_INT_PRESENT) pos += 2; if (control & IEEE80211_MLE_STA_CONTROL_TSF_OFFS_PRESENT) pos += 8; if (control & IEEE80211_MLE_STA_CONTROL_DTIM_INFO_PRESENT) pos += 2; if (control & IEEE80211_MLE_STA_CONTROL_COMPLETE_PROFILE && control & IEEE80211_MLE_STA_CONTROL_NSTR_LINK_PAIR_PRESENT) { if (control & IEEE80211_MLE_STA_CONTROL_NSTR_BITMAP_SIZE) pos += 2; else pos += 1; } return *pos; } #define IEEE80211_MLE_STA_RECONF_CONTROL_LINK_ID 0x000f #define IEEE80211_MLE_STA_RECONF_CONTROL_COMPLETE_PROFILE 0x0010 #define IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT 0x0020 #define IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT 0x0040 #define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_UPDATE_TYPE 0x0780 #define IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_PARAMS_PRESENT 0x0800 /** * ieee80211_mle_reconf_sta_prof_size_ok - validate reconfiguration multi-link * element sta profile size. * @data: pointer to the sub element data * @len: length of the containing sub element * Return: %true if the STA profile is large enough, %false otherwise */ static inline bool ieee80211_mle_reconf_sta_prof_size_ok(const u8 *data, size_t len) { const struct ieee80211_mle_per_sta_profile *prof = (const void *)data; u16 control; u8 fixed = sizeof(*prof); u8 info_len = 1; if (len < fixed) return false; control = le16_to_cpu(prof->control); if (control & IEEE80211_MLE_STA_RECONF_CONTROL_STA_MAC_ADDR_PRESENT) info_len += ETH_ALEN; if (control & IEEE80211_MLE_STA_RECONF_CONTROL_AP_REM_TIMER_PRESENT) info_len += 2; if (control & IEEE80211_MLE_STA_RECONF_CONTROL_OPERATION_PARAMS_PRESENT) info_len += 2; return prof->sta_info_len >= info_len && fixed + prof->sta_info_len - 1 <= len; } static inline bool ieee80211_tid_to_link_map_size_ok(const u8 *data, size_t len) { const struct ieee80211_ttlm_elem *t2l = (const void *)data; u8 control, fixed = sizeof(*t2l), elem_len = 0; if (len < fixed) return false; control = t2l->control; if (control & IEEE80211_TTLM_CONTROL_SWITCH_TIME_PRESENT) elem_len += 2; if (control & IEEE80211_TTLM_CONTROL_EXPECTED_DUR_PRESENT) elem_len += 3; if (!(control & IEEE80211_TTLM_CONTROL_DEF_LINK_MAP)) { u8 bm_size; elem_len += 1; if (len < fixed + elem_len) return false; if (control & IEEE80211_TTLM_CONTROL_LINK_MAP_SIZE) bm_size = 1; else bm_size = 2; elem_len += hweight8(t2l->optional[0]) * bm_size; } return len >= fixed + elem_len; } #define for_each_mle_subelement(_elem, _data, _len) \ if (ieee80211_mle_size_ok(_data, _len)) \ for_each_element(_elem, \ _data + ieee80211_mle_common_size(_data),\ _len - ieee80211_mle_common_size(_data)) #endif /* LINUX_IEEE80211_H */ |
| 16 5 4 1 1 860 866 65 64 65 65 26 39 39 37 39 39 39 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 | // SPDX-License-Identifier: GPL-2.0-only /* * lib/hexdump.c */ #include <linux/types.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/minmax.h> #include <linux/export.h> #include <asm/unaligned.h> const char hex_asc[] = "0123456789abcdef"; EXPORT_SYMBOL(hex_asc); const char hex_asc_upper[] = "0123456789ABCDEF"; EXPORT_SYMBOL(hex_asc_upper); /** * hex_to_bin - convert a hex digit to its real value * @ch: ascii character represents hex digit * * hex_to_bin() converts one hex digit to its actual value or -1 in case of bad * input. * * This function is used to load cryptographic keys, so it is coded in such a * way that there are no conditions or memory accesses that depend on data. * * Explanation of the logic: * (ch - '9' - 1) is negative if ch <= '9' * ('0' - 1 - ch) is negative if ch >= '0' * we "and" these two values, so the result is negative if ch is in the range * '0' ... '9' * we are only interested in the sign, so we do a shift ">> 8"; note that right * shift of a negative value is implementation-defined, so we cast the * value to (unsigned) before the shift --- we have 0xffffff if ch is in * the range '0' ... '9', 0 otherwise * we "and" this value with (ch - '0' + 1) --- we have a value 1 ... 10 if ch is * in the range '0' ... '9', 0 otherwise * we add this value to -1 --- we have a value 0 ... 9 if ch is in the range '0' * ... '9', -1 otherwise * the next line is similar to the previous one, but we need to decode both * uppercase and lowercase letters, so we use (ch & 0xdf), which converts * lowercase to uppercase */ int hex_to_bin(unsigned char ch) { unsigned char cu = ch & 0xdf; return -1 + ((ch - '0' + 1) & (unsigned)((ch - '9' - 1) & ('0' - 1 - ch)) >> 8) + ((cu - 'A' + 11) & (unsigned)((cu - 'F' - 1) & ('A' - 1 - cu)) >> 8); } EXPORT_SYMBOL(hex_to_bin); /** * hex2bin - convert an ascii hexadecimal string to its binary representation * @dst: binary result * @src: ascii hexadecimal string * @count: result length * * Return 0 on success, -EINVAL in case of bad input. */ int hex2bin(u8 *dst, const char *src, size_t count) { while (count--) { int hi, lo; hi = hex_to_bin(*src++); if (unlikely(hi < 0)) return -EINVAL; lo = hex_to_bin(*src++); if (unlikely(lo < 0)) return -EINVAL; *dst++ = (hi << 4) | lo; } return 0; } EXPORT_SYMBOL(hex2bin); /** * bin2hex - convert binary data to an ascii hexadecimal string * @dst: ascii hexadecimal result * @src: binary data * @count: binary data length */ char *bin2hex(char *dst, const void *src, size_t count) { const unsigned char *_src = src; while (count--) dst = hex_byte_pack(dst, *_src++); return dst; } EXPORT_SYMBOL(bin2hex); /** * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory * @buf: data blob to dump * @len: number of bytes in the @buf * @rowsize: number of bytes to print per line; must be 16 or 32 * @groupsize: number of bytes to print at a time (1, 2, 4, 8; default = 1) * @linebuf: where to put the converted data * @linebuflen: total size of @linebuf, including space for terminating NUL * @ascii: include ASCII after the hex output * * hex_dump_to_buffer() works on one "line" of output at a time, i.e., * 16 or 32 bytes of input data converted to hex + ASCII output. * * Given a buffer of u8 data, hex_dump_to_buffer() converts the input data * to a hex + ASCII dump at the supplied memory location. * The converted output is always NUL-terminated. * * E.g.: * hex_dump_to_buffer(frame->data, frame->len, 16, 1, * linebuf, sizeof(linebuf), true); * * example output buffer: * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO * * Return: * The amount of bytes placed in the buffer without terminating NUL. If the * output was truncated, then the return value is the number of bytes * (excluding the terminating NUL) which would have been written to the final * string if enough space had been available. */ int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize, char *linebuf, size_t linebuflen, bool ascii) { const u8 *ptr = buf; int ngroups; u8 ch; int j, lx = 0; int ascii_column; int ret; if (rowsize != 16 && rowsize != 32) rowsize = 16; if (len > rowsize) /* limit to one line at a time */ len = rowsize; if (!is_power_of_2(groupsize) || groupsize > 8) groupsize = 1; if ((len % groupsize) != 0) /* no mixed size output */ groupsize = 1; ngroups = len / groupsize; ascii_column = rowsize * 2 + rowsize / groupsize + 1; if (!linebuflen) goto overflow1; if (!len) goto nil; if (groupsize == 8) { const u64 *ptr8 = buf; for (j = 0; j < ngroups; j++) { ret = snprintf(linebuf + lx, linebuflen - lx, "%s%16.16llx", j ? " " : "", get_unaligned(ptr8 + j)); if (ret >= linebuflen - lx) goto overflow1; lx += ret; } } else if (groupsize == 4) { const u32 *ptr4 = buf; for (j = 0; j < ngroups; j++) { ret = snprintf(linebuf + lx, linebuflen - lx, "%s%8.8x", j ? " " : "", get_unaligned(ptr4 + j)); if (ret >= linebuflen - lx) goto overflow1; lx += ret; } } else if (groupsize == 2) { const u16 *ptr2 = buf; for (j = 0; j < ngroups; j++) { ret = snprintf(linebuf + lx, linebuflen - lx, "%s%4.4x", j ? " " : "", get_unaligned(ptr2 + j)); if (ret >= linebuflen - lx) goto overflow1; lx += ret; } } else { for (j = 0; j < len; j++) { if (linebuflen < lx + 2) goto overflow2; ch = ptr[j]; linebuf[lx++] = hex_asc_hi(ch); if (linebuflen < lx + 2) goto overflow2; linebuf[lx++] = hex_asc_lo(ch); if (linebuflen < lx + 2) goto overflow2; linebuf[lx++] = ' '; } if (j) lx--; } if (!ascii) goto nil; while (lx < ascii_column) { if (linebuflen < lx + 2) goto overflow2; linebuf[lx++] = ' '; } for (j = 0; j < len; j++) { if (linebuflen < lx + 2) goto overflow2; ch = ptr[j]; linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.'; } nil: linebuf[lx] = '\0'; return lx; overflow2: linebuf[lx++] = '\0'; overflow1: return ascii ? ascii_column + len : (groupsize * 2 + 1) * ngroups - 1; } EXPORT_SYMBOL(hex_dump_to_buffer); #ifdef CONFIG_PRINTK /** * print_hex_dump - print a text hex dump to syslog for a binary blob of data * @level: kernel log level (e.g. KERN_DEBUG) * @prefix_str: string to prefix each line with; * caller supplies trailing spaces for alignment if desired * @prefix_type: controls whether prefix of an offset, address, or none * is printed (%DUMP_PREFIX_OFFSET, %DUMP_PREFIX_ADDRESS, %DUMP_PREFIX_NONE) * @rowsize: number of bytes to print per line; must be 16 or 32 * @groupsize: number of bytes to print at a time (1, 2, 4, 8; default = 1) * @buf: data blob to dump * @len: number of bytes in the @buf * @ascii: include ASCII after the hex output * * Given a buffer of u8 data, print_hex_dump() prints a hex + ASCII dump * to the kernel log at the specified kernel log level, with an optional * leading prefix. * * print_hex_dump() works on one "line" of output at a time, i.e., * 16 or 32 bytes of input data converted to hex + ASCII output. * print_hex_dump() iterates over the entire input @buf, breaking it into * "line size" chunks to format and print. * * E.g.: * print_hex_dump(KERN_DEBUG, "raw data: ", DUMP_PREFIX_ADDRESS, * 16, 1, frame->data, frame->len, true); * * Example output using %DUMP_PREFIX_OFFSET and 1-byte mode: * 0009ab42: 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO * Example output using %DUMP_PREFIX_ADDRESS and 4-byte mode: * ffffffff88089af0: 73727170 77767574 7b7a7978 7f7e7d7c pqrstuvwxyz{|}~. */ void print_hex_dump(const char *level, const char *prefix_str, int prefix_type, int rowsize, int groupsize, const void *buf, size_t len, bool ascii) { const u8 *ptr = buf; int i, linelen, remaining = len; unsigned char linebuf[32 * 3 + 2 + 32 + 1]; if (rowsize != 16 && rowsize != 32) rowsize = 16; for (i = 0; i < len; i += rowsize) { linelen = min(remaining, rowsize); remaining -= rowsize; hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, linebuf, sizeof(linebuf), ascii); switch (prefix_type) { case DUMP_PREFIX_ADDRESS: printk("%s%s%p: %s\n", level, prefix_str, ptr + i, linebuf); break; case DUMP_PREFIX_OFFSET: printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); break; default: printk("%s%s%s\n", level, prefix_str, linebuf); break; } } } EXPORT_SYMBOL(print_hex_dump); #endif /* defined(CONFIG_PRINTK) */ |
| 10 1 2 3 3 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 | // SPDX-License-Identifier: GPL-2.0-only #define pr_fmt(fmt) "IPsec: " fmt #include <crypto/hash.h> #include <crypto/utils.h> #include <linux/err.h> #include <linux/module.h> #include <linux/slab.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/ah.h> #include <linux/crypto.h> #include <linux/pfkeyv2.h> #include <linux/scatterlist.h> #include <net/icmp.h> #include <net/protocol.h> struct ah_skb_cb { struct xfrm_skb_cb xfrm; void *tmp; }; #define AH_SKB_CB(__skb) ((struct ah_skb_cb *)&((__skb)->cb[0])) static void *ah_alloc_tmp(struct crypto_ahash *ahash, int nfrags, unsigned int size) { unsigned int len; len = size + crypto_ahash_digestsize(ahash); len = ALIGN(len, crypto_tfm_ctx_alignment()); len += sizeof(struct ahash_request) + crypto_ahash_reqsize(ahash); len = ALIGN(len, __alignof__(struct scatterlist)); len += sizeof(struct scatterlist) * nfrags; return kmalloc(len, GFP_ATOMIC); } static inline u8 *ah_tmp_auth(void *tmp, unsigned int offset) { return tmp + offset; } static inline u8 *ah_tmp_icv(void *tmp, unsigned int offset) { return tmp + offset; } static inline struct ahash_request *ah_tmp_req(struct crypto_ahash *ahash, u8 *icv) { struct ahash_request *req; req = (void *)PTR_ALIGN(icv + crypto_ahash_digestsize(ahash), crypto_tfm_ctx_alignment()); ahash_request_set_tfm(req, ahash); return req; } static inline struct scatterlist *ah_req_sg(struct crypto_ahash *ahash, struct ahash_request *req) { return (void *)ALIGN((unsigned long)(req + 1) + crypto_ahash_reqsize(ahash), __alignof__(struct scatterlist)); } /* Clear mutable options and find final destination to substitute * into IP header for icv calculation. Options are already checked * for validity, so paranoia is not required. */ static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr) { unsigned char *optptr = (unsigned char *)(iph+1); int l = iph->ihl*4 - sizeof(struct iphdr); int optlen; while (l > 0) { switch (*optptr) { case IPOPT_END: return 0; case IPOPT_NOOP: l--; optptr++; continue; } optlen = optptr[1]; if (optlen<2 || optlen>l) return -EINVAL; switch (*optptr) { case IPOPT_SEC: case 0x85: /* Some "Extended Security" crap. */ case IPOPT_CIPSO: case IPOPT_RA: case 0x80|21: /* RFC1770 */ break; case IPOPT_LSRR: case IPOPT_SSRR: if (optlen < 6) return -EINVAL; memcpy(daddr, optptr+optlen-4, 4); fallthrough; default: memset(optptr, 0, optlen); } l -= optlen; optptr += optlen; } return 0; } static void ah_output_done(void *data, int err) { u8 *icv; struct iphdr *iph; struct sk_buff *skb = data; struct xfrm_state *x = skb_dst(skb)->xfrm; struct ah_data *ahp = x->data; struct iphdr *top_iph = ip_hdr(skb); struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); iph = AH_SKB_CB(skb)->tmp; icv = ah_tmp_icv(iph, ihl); memcpy(ah->auth_data, icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; top_iph->ttl = iph->ttl; top_iph->frag_off = iph->frag_off; if (top_iph->ihl != 5) { top_iph->daddr = iph->daddr; memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); } kfree(AH_SKB_CB(skb)->tmp); xfrm_output_resume(skb->sk, skb, err); } static int ah_output(struct xfrm_state *x, struct sk_buff *skb) { int err; int nfrags; int ihl; u8 *icv; struct sk_buff *trailer; struct crypto_ahash *ahash; struct ahash_request *req; struct scatterlist *sg; struct iphdr *iph, *top_iph; struct ip_auth_hdr *ah; struct ah_data *ahp; int seqhi_len = 0; __be32 *seqhi; int sglists = 0; struct scatterlist *seqhisg; ahp = x->data; ahash = ahp->ahash; if ((err = skb_cow_data(skb, 0, &trailer)) < 0) goto out; nfrags = err; skb_push(skb, -skb_network_offset(skb)); ah = ip_auth_hdr(skb); ihl = ip_hdrlen(skb); if (x->props.flags & XFRM_STATE_ESN) { sglists = 1; seqhi_len = sizeof(*seqhi); } err = -ENOMEM; iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl + seqhi_len); if (!iph) goto out; seqhi = (__be32 *)((char *)iph + ihl); icv = ah_tmp_icv(seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); seqhisg = sg + nfrags; memset(ah->auth_data, 0, ahp->icv_trunc_len); top_iph = ip_hdr(skb); iph->tos = top_iph->tos; iph->ttl = top_iph->ttl; iph->frag_off = top_iph->frag_off; if (top_iph->ihl != 5) { iph->daddr = top_iph->daddr; memcpy(iph+1, top_iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); err = ip_clear_mutable_options(top_iph, &top_iph->daddr); if (err) goto out_free; } ah->nexthdr = *skb_mac_header(skb); *skb_mac_header(skb) = IPPROTO_AH; top_iph->tos = 0; top_iph->tot_len = htons(skb->len); top_iph->frag_off = 0; top_iph->ttl = 0; top_iph->check = 0; if (x->props.flags & XFRM_STATE_ALIGN4) ah->hdrlen = (XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; else ah->hdrlen = (XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len) >> 2) - 2; ah->reserved = 0; ah->spi = x->id.spi; ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); sg_init_table(sg, nfrags + sglists); err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); if (unlikely(err < 0)) goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); sg_set_buf(seqhisg, seqhi, seqhi_len); } ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len); ahash_request_set_callback(req, 0, ah_output_done, skb); AH_SKB_CB(skb)->tmp = iph; err = crypto_ahash_digest(req); if (err) { if (err == -EINPROGRESS) goto out; if (err == -ENOSPC) err = NET_XMIT_DROP; goto out_free; } memcpy(ah->auth_data, icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; top_iph->ttl = iph->ttl; top_iph->frag_off = iph->frag_off; if (top_iph->ihl != 5) { top_iph->daddr = iph->daddr; memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr)); } out_free: kfree(iph); out: return err; } static void ah_input_done(void *data, int err) { u8 *auth_data; u8 *icv; struct iphdr *work_iph; struct sk_buff *skb = data; struct xfrm_state *x = xfrm_input_state(skb); struct ah_data *ahp = x->data; struct ip_auth_hdr *ah = ip_auth_hdr(skb); int ihl = ip_hdrlen(skb); int ah_hlen = (ah->hdrlen + 2) << 2; if (err) goto out; work_iph = AH_SKB_CB(skb)->tmp; auth_data = ah_tmp_auth(work_iph, ihl); icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) goto out; err = ah->nexthdr; skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); if (x->props.mode == XFRM_MODE_TUNNEL) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -ihl); out: kfree(AH_SKB_CB(skb)->tmp); xfrm_input_resume(skb, err); } static int ah_input(struct xfrm_state *x, struct sk_buff *skb) { int ah_hlen; int ihl; int nexthdr; int nfrags; u8 *auth_data; u8 *icv; struct sk_buff *trailer; struct crypto_ahash *ahash; struct ahash_request *req; struct scatterlist *sg; struct iphdr *iph, *work_iph; struct ip_auth_hdr *ah; struct ah_data *ahp; int err = -ENOMEM; int seqhi_len = 0; __be32 *seqhi; int sglists = 0; struct scatterlist *seqhisg; if (!pskb_may_pull(skb, sizeof(*ah))) goto out; ah = (struct ip_auth_hdr *)skb->data; ahp = x->data; ahash = ahp->ahash; nexthdr = ah->nexthdr; ah_hlen = (ah->hdrlen + 2) << 2; if (x->props.flags & XFRM_STATE_ALIGN4) { if (ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_full_len) && ah_hlen != XFRM_ALIGN4(sizeof(*ah) + ahp->icv_trunc_len)) goto out; } else { if (ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_full_len) && ah_hlen != XFRM_ALIGN8(sizeof(*ah) + ahp->icv_trunc_len)) goto out; } if (!pskb_may_pull(skb, ah_hlen)) goto out; /* We are going to _remove_ AH header to keep sockets happy, * so... Later this can change. */ if (skb_unclone(skb, GFP_ATOMIC)) goto out; skb->ip_summed = CHECKSUM_NONE; if ((err = skb_cow_data(skb, 0, &trailer)) < 0) goto out; nfrags = err; ah = (struct ip_auth_hdr *)skb->data; iph = ip_hdr(skb); ihl = ip_hdrlen(skb); if (x->props.flags & XFRM_STATE_ESN) { sglists = 1; seqhi_len = sizeof(*seqhi); } work_iph = ah_alloc_tmp(ahash, nfrags + sglists, ihl + ahp->icv_trunc_len + seqhi_len); if (!work_iph) { err = -ENOMEM; goto out; } seqhi = (__be32 *)((char *)work_iph + ihl); auth_data = ah_tmp_auth(seqhi, seqhi_len); icv = ah_tmp_icv(auth_data, ahp->icv_trunc_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); seqhisg = sg + nfrags; memcpy(work_iph, iph, ihl); memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); memset(ah->auth_data, 0, ahp->icv_trunc_len); iph->ttl = 0; iph->tos = 0; iph->frag_off = 0; iph->check = 0; if (ihl > sizeof(*iph)) { __be32 dummy; err = ip_clear_mutable_options(iph, &dummy); if (err) goto out_free; } skb_push(skb, ihl); sg_init_table(sg, nfrags + sglists); err = skb_to_sgvec_nomark(skb, sg, 0, skb->len); if (unlikely(err < 0)) goto out_free; if (x->props.flags & XFRM_STATE_ESN) { /* Attach seqhi sg right after packet payload */ *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; sg_set_buf(seqhisg, seqhi, seqhi_len); } ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len); ahash_request_set_callback(req, 0, ah_input_done, skb); AH_SKB_CB(skb)->tmp = work_iph; err = crypto_ahash_digest(req); if (err) { if (err == -EINPROGRESS) goto out; goto out_free; } err = crypto_memneq(icv, auth_data, ahp->icv_trunc_len) ? -EBADMSG : 0; if (err) goto out_free; skb->network_header += ah_hlen; memcpy(skb_network_header(skb), work_iph, ihl); __skb_pull(skb, ah_hlen + ihl); if (x->props.mode == XFRM_MODE_TUNNEL) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -ihl); err = nexthdr; out_free: kfree (work_iph); out: return err; } static int ah4_err(struct sk_buff *skb, u32 info) { struct net *net = dev_net(skb->dev); const struct iphdr *iph = (const struct iphdr *)skb->data; struct ip_auth_hdr *ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; switch (icmp_hdr(skb)->type) { case ICMP_DEST_UNREACH: if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return 0; break; case ICMP_REDIRECT: break; default: return 0; } x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) return 0; if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, IPPROTO_AH); else ipv4_redirect(skb, net, 0, IPPROTO_AH); xfrm_state_put(x); return 0; } static int ah_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; struct crypto_ahash *ahash; if (!x->aalg) { NL_SET_ERR_MSG(extack, "AH requires a state with an AUTH algorithm"); goto error; } if (x->encap) { NL_SET_ERR_MSG(extack, "AH is not compatible with encapsulation"); goto error; } ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); if (!ahp) return -ENOMEM; ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0); if (IS_ERR(ahash)) { NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; } ahp->ahash = ahash; if (crypto_ahash_setkey(ahash, x->aalg->alg_key, (x->aalg->alg_key_len + 7) / 8)) { NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; } /* * Lookup the algorithm description maintained by xfrm_algo, * verify crypto transform properties, and store information * we need for AH processing. This lookup cannot fail here * after a successful crypto_alloc_ahash(). */ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_ahash_digestsize(ahash)) { NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; } ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; ahp->icv_trunc_len = x->aalg->alg_trunc_len/8; if (x->props.flags & XFRM_STATE_ALIGN4) x->props.header_len = XFRM_ALIGN4(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); else x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; return 0; error: if (ahp) { crypto_free_ahash(ahp->ahash); kfree(ahp); } return -EINVAL; } static void ah_destroy(struct xfrm_state *x) { struct ah_data *ahp = x->data; if (!ahp) return; crypto_free_ahash(ahp->ahash); kfree(ahp); } static int ah4_rcv_cb(struct sk_buff *skb, int err) { return 0; } static const struct xfrm_type ah_type = { .owner = THIS_MODULE, .proto = IPPROTO_AH, .flags = XFRM_TYPE_REPLAY_PROT, .init_state = ah_init_state, .destructor = ah_destroy, .input = ah_input, .output = ah_output }; static struct xfrm4_protocol ah4_protocol = { .handler = xfrm4_rcv, .input_handler = xfrm_input, .cb_handler = ah4_rcv_cb, .err_handler = ah4_err, .priority = 0, }; static int __init ah4_init(void) { if (xfrm_register_type(&ah_type, AF_INET) < 0) { pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (xfrm4_protocol_register(&ah4_protocol, IPPROTO_AH) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ah_type, AF_INET); return -EAGAIN; } return 0; } static void __exit ah4_fini(void) { if (xfrm4_protocol_deregister(&ah4_protocol, IPPROTO_AH) < 0) pr_info("%s: can't remove protocol\n", __func__); xfrm_unregister_type(&ah_type, AF_INET); } module_init(ah4_init); module_exit(ah4_fini); MODULE_DESCRIPTION("IPv4 AH transformation library"); MODULE_LICENSE("GPL"); MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_AH); |
| 222 188 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Definitions for the ICMP protocol. * * Version: @(#)icmp.h 1.0.3 04/28/93 * * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> */ #ifndef _LINUX_ICMP_H #define _LINUX_ICMP_H #include <linux/skbuff.h> #include <uapi/linux/icmp.h> #include <uapi/linux/errqueue.h> static inline struct icmphdr *icmp_hdr(const struct sk_buff *skb) { return (struct icmphdr *)skb_transport_header(skb); } static inline bool icmp_is_err(int type) { switch (type) { case ICMP_DEST_UNREACH: case ICMP_SOURCE_QUENCH: case ICMP_REDIRECT: case ICMP_TIME_EXCEEDED: case ICMP_PARAMETERPROB: return true; } return false; } void ip_icmp_error_rfc4884(const struct sk_buff *skb, struct sock_ee_data_rfc4884 *out, int thlen, int off); #endif /* _LINUX_ICMP_H */ |
| 128 127 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM snd_pcm #if !defined(_PCM_PARAMS_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) #define _PCM_PARAMS_TRACE_H #include <linux/tracepoint.h> #define HW_PARAM_ENTRY(param) {SNDRV_PCM_HW_PARAM_##param, #param} #define hw_param_labels \ HW_PARAM_ENTRY(ACCESS), \ HW_PARAM_ENTRY(FORMAT), \ HW_PARAM_ENTRY(SUBFORMAT), \ HW_PARAM_ENTRY(SAMPLE_BITS), \ HW_PARAM_ENTRY(FRAME_BITS), \ HW_PARAM_ENTRY(CHANNELS), \ HW_PARAM_ENTRY(RATE), \ HW_PARAM_ENTRY(PERIOD_TIME), \ HW_PARAM_ENTRY(PERIOD_SIZE), \ HW_PARAM_ENTRY(PERIOD_BYTES), \ HW_PARAM_ENTRY(PERIODS), \ HW_PARAM_ENTRY(BUFFER_TIME), \ HW_PARAM_ENTRY(BUFFER_SIZE), \ HW_PARAM_ENTRY(BUFFER_BYTES), \ HW_PARAM_ENTRY(TICK_TIME) TRACE_EVENT(hw_mask_param, TP_PROTO(struct snd_pcm_substream *substream, snd_pcm_hw_param_t type, int index, const struct snd_mask *prev, const struct snd_mask *curr), TP_ARGS(substream, type, index, prev, curr), TP_STRUCT__entry( __field(int, card) __field(int, device) __field(int, subdevice) __field(int, direction) __field(snd_pcm_hw_param_t, type) __field(int, index) __field(int, total) __array(__u32, prev_bits, 8) __array(__u32, curr_bits, 8) ), TP_fast_assign( __entry->card = substream->pcm->card->number; __entry->device = substream->pcm->device; __entry->subdevice = substream->number; __entry->direction = substream->stream; __entry->type = type; __entry->index = index; __entry->total = substream->runtime->hw_constraints.rules_num; memcpy(__entry->prev_bits, prev->bits, sizeof(__u32) * 8); memcpy(__entry->curr_bits, curr->bits, sizeof(__u32) * 8); ), TP_printk("pcmC%dD%d%s:%d %03d/%03d %s %08x%08x%08x%08x %08x%08x%08x%08x", __entry->card, __entry->device, __entry->direction ? "c" : "p", __entry->subdevice, __entry->index, __entry->total, __print_symbolic(__entry->type, hw_param_labels), __entry->prev_bits[3], __entry->prev_bits[2], __entry->prev_bits[1], __entry->prev_bits[0], __entry->curr_bits[3], __entry->curr_bits[2], __entry->curr_bits[1], __entry->curr_bits[0] ) ); TRACE_EVENT(hw_interval_param, TP_PROTO(struct snd_pcm_substream *substream, snd_pcm_hw_param_t type, int index, const struct snd_interval *prev, const struct snd_interval *curr), TP_ARGS(substream, type, index, prev, curr), TP_STRUCT__entry( __field(int, card) __field(int, device) __field(int, subdevice) __field(int, direction) __field(snd_pcm_hw_param_t, type) __field(int, index) __field(int, total) __field(unsigned int, prev_min) __field(unsigned int, prev_max) __field(unsigned int, prev_openmin) __field(unsigned int, prev_openmax) __field(unsigned int, prev_integer) __field(unsigned int, prev_empty) __field(unsigned int, curr_min) __field(unsigned int, curr_max) __field(unsigned int, curr_openmin) __field(unsigned int, curr_openmax) __field(unsigned int, curr_integer) __field(unsigned int, curr_empty) ), TP_fast_assign( __entry->card = substream->pcm->card->number; __entry->device = substream->pcm->device; __entry->subdevice = substream->number; __entry->direction = substream->stream; __entry->type = type; __entry->index = index; __entry->total = substream->runtime->hw_constraints.rules_num; __entry->prev_min = prev->min; __entry->prev_max = prev->max; __entry->prev_openmin = prev->openmin; __entry->prev_openmax = prev->openmax; __entry->prev_integer = prev->integer; __entry->prev_empty = prev->empty; __entry->curr_min = curr->min; __entry->curr_max = curr->max; __entry->curr_openmin = curr->openmin; __entry->curr_openmax = curr->openmax; __entry->curr_integer = curr->integer; __entry->curr_empty = curr->empty; ), TP_printk("pcmC%dD%d%s:%d %03d/%03d %s %d %d %s%u %u%s %d %d %s%u %u%s", __entry->card, __entry->device, __entry->direction ? "c" : "p", __entry->subdevice, __entry->index, __entry->total, __print_symbolic(__entry->type, hw_param_labels), __entry->prev_empty, __entry->prev_integer, __entry->prev_openmin ? "(" : "[", __entry->prev_min, __entry->prev_max, __entry->prev_openmax ? ")" : "]", __entry->curr_empty, __entry->curr_integer, __entry->curr_openmin ? "(" : "[", __entry->curr_min, __entry->curr_max, __entry->curr_openmax ? ")" : "]" ) ); #endif /* _PCM_PARAMS_TRACE_H */ /* This part must be outside protection */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH . #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE pcm_param_trace #include <trace/define_trace.h> |
| 58 58 90 90 2 86 90 8 2 33 32 10 1 33 33 8 8 1 32 33 15 1 14 14 2 13 8 4 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 | // SPDX-License-Identifier: GPL-2.0-or-later /* * dcache.c * * dentry cache handling code * * Copyright (C) 2002, 2004 Oracle. All rights reserved. */ #include <linux/fs.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/namei.h> #include <cluster/masklog.h> #include "ocfs2.h" #include "alloc.h" #include "dcache.h" #include "dlmglue.h" #include "file.h" #include "inode.h" #include "ocfs2_trace.h" void ocfs2_dentry_attach_gen(struct dentry *dentry) { unsigned long gen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen; BUG_ON(d_inode(dentry)); dentry->d_fsdata = (void *)gen; } static int ocfs2_dentry_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; int ret = 0; /* if all else fails, just return false */ struct ocfs2_super *osb; if (flags & LOOKUP_RCU) return -ECHILD; inode = d_inode(dentry); osb = OCFS2_SB(dentry->d_sb); trace_ocfs2_dentry_revalidate(dentry, dentry->d_name.len, dentry->d_name.name); /* For a negative dentry - * check the generation number of the parent and compare with the * one stored in the inode. */ if (inode == NULL) { unsigned long gen = (unsigned long) dentry->d_fsdata; unsigned long pgen; spin_lock(&dentry->d_lock); pgen = OCFS2_I(d_inode(dentry->d_parent))->ip_dir_lock_gen; spin_unlock(&dentry->d_lock); trace_ocfs2_dentry_revalidate_negative(dentry->d_name.len, dentry->d_name.name, pgen, gen); if (gen != pgen) goto bail; goto valid; } BUG_ON(!osb); if (inode == osb->root_inode || is_bad_inode(inode)) goto bail; spin_lock(&OCFS2_I(inode)->ip_lock); /* did we or someone else delete this inode? */ if (OCFS2_I(inode)->ip_flags & OCFS2_INODE_DELETED) { spin_unlock(&OCFS2_I(inode)->ip_lock); trace_ocfs2_dentry_revalidate_delete( (unsigned long long)OCFS2_I(inode)->ip_blkno); goto bail; } spin_unlock(&OCFS2_I(inode)->ip_lock); /* * We don't need a cluster lock to test this because once an * inode nlink hits zero, it never goes back. */ if (inode->i_nlink == 0) { trace_ocfs2_dentry_revalidate_orphaned( (unsigned long long)OCFS2_I(inode)->ip_blkno, S_ISDIR(inode->i_mode)); goto bail; } /* * If the last lookup failed to create dentry lock, let us * redo it. */ if (!dentry->d_fsdata) { trace_ocfs2_dentry_revalidate_nofsdata( (unsigned long long)OCFS2_I(inode)->ip_blkno); goto bail; } valid: ret = 1; bail: trace_ocfs2_dentry_revalidate_ret(ret); return ret; } static int ocfs2_match_dentry(struct dentry *dentry, u64 parent_blkno, int skip_unhashed) { struct inode *parent; /* * ocfs2_lookup() does a d_splice_alias() _before_ attaching * to the lock data, so we skip those here, otherwise * ocfs2_dentry_attach_lock() will get its original dentry * back. */ if (!dentry->d_fsdata) return 0; if (skip_unhashed && d_unhashed(dentry)) return 0; parent = d_inode(dentry->d_parent); /* Name is in a different directory. */ if (OCFS2_I(parent)->ip_blkno != parent_blkno) return 0; return 1; } /* * Walk the inode alias list, and find a dentry which has a given * parent. ocfs2_dentry_attach_lock() wants to find _any_ alias as it * is looking for a dentry_lock reference. The downconvert thread is * looking to unhash aliases, so we allow it to skip any that already * have that property. */ struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, int skip_unhashed) { struct dentry *dentry; spin_lock(&inode->i_lock); hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) { spin_lock(&dentry->d_lock); if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) { trace_ocfs2_find_local_alias(dentry->d_name.len, dentry->d_name.name); dget_dlock(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); return dentry; } spin_unlock(&dentry->d_lock); } spin_unlock(&inode->i_lock); return NULL; } DEFINE_SPINLOCK(dentry_attach_lock); /* * Attach this dentry to a cluster lock. * * Dentry locks cover all links in a given directory to a particular * inode. We do this so that ocfs2 can build a lock name which all * nodes in the cluster can agree on at all times. Shoving full names * in the cluster lock won't work due to size restrictions. Covering * links inside of a directory is a good compromise because it still * allows us to use the parent directory lock to synchronize * operations. * * Call this function with the parent dir semaphore and the parent dir * cluster lock held. * * The dir semaphore will protect us from having to worry about * concurrent processes on our node trying to attach a lock at the * same time. * * The dir cluster lock (held at either PR or EX mode) protects us * from unlink and rename on other nodes. * * A dput() can happen asynchronously due to pruning, so we cover * attaching and detaching the dentry lock with a * dentry_attach_lock. * * A node which has done lookup on a name retains a protected read * lock until final dput. If the user requests and unlink or rename, * the protected read is upgraded to an exclusive lock. Other nodes * who have seen the dentry will then be informed that they need to * downgrade their lock, which will involve d_delete on the * dentry. This happens in ocfs2_dentry_convert_worker(). */ int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, u64 parent_blkno) { int ret; struct dentry *alias; struct ocfs2_dentry_lock *dl = dentry->d_fsdata; trace_ocfs2_dentry_attach_lock(dentry->d_name.len, dentry->d_name.name, (unsigned long long)parent_blkno, dl); /* * Negative dentry. We ignore these for now. * * XXX: Could we can improve ocfs2_dentry_revalidate() by * tracking these? */ if (!inode) return 0; if (d_really_is_negative(dentry) && dentry->d_fsdata) { /* Converting a negative dentry to positive Clear dentry->d_fsdata */ dentry->d_fsdata = dl = NULL; } if (dl) { mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, " \"%pd\": old parent: %llu, new: %llu\n", dentry, (unsigned long long)parent_blkno, (unsigned long long)dl->dl_parent_blkno); return 0; } alias = ocfs2_find_local_alias(inode, parent_blkno, 0); if (alias) { /* * Great, an alias exists, which means we must have a * dentry lock already. We can just grab the lock off * the alias and add it to the list. * * We're depending here on the fact that this dentry * was found and exists in the dcache and so must have * a reference to the dentry_lock because we can't * race creates. Final dput() cannot happen on it * since we have it pinned, so our reference is safe. */ dl = alias->d_fsdata; mlog_bug_on_msg(!dl, "parent %llu, ino %llu\n", (unsigned long long)parent_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno); mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, " \"%pd\": old parent: %llu, new: %llu\n", dentry, (unsigned long long)parent_blkno, (unsigned long long)dl->dl_parent_blkno); trace_ocfs2_dentry_attach_lock_found(dl->dl_lockres.l_name, (unsigned long long)parent_blkno, (unsigned long long)OCFS2_I(inode)->ip_blkno); goto out_attach; } /* * There are no other aliases */ dl = kmalloc(sizeof(*dl), GFP_NOFS); if (!dl) { ret = -ENOMEM; mlog_errno(ret); return ret; } dl->dl_count = 0; /* * Does this have to happen below, for all attaches, in case * the struct inode gets blown away by the downconvert thread? */ dl->dl_inode = igrab(inode); dl->dl_parent_blkno = parent_blkno; ocfs2_dentry_lock_res_init(dl, parent_blkno, inode); out_attach: spin_lock(&dentry_attach_lock); if (unlikely(dentry->d_fsdata && !alias)) { /* d_fsdata is set by a racing thread which is doing * the same thing as this thread is doing. Leave the racing * thread going ahead and we return here. */ spin_unlock(&dentry_attach_lock); iput(dl->dl_inode); ocfs2_lock_res_free(&dl->dl_lockres); kfree(dl); return 0; } dentry->d_fsdata = dl; dl->dl_count++; spin_unlock(&dentry_attach_lock); /* * This actually gets us our PRMODE level lock. From now on, * we'll have a notification if one of these names is * destroyed on another node. */ ret = ocfs2_dentry_lock(dentry, 0); if (!ret) ocfs2_dentry_unlock(dentry, 0); else mlog_errno(ret); /* * In case of error, manually free the allocation and do the iput(). * We need to do this because error here means no d_instantiate(), * which means iput() will not be called during dput(dentry). */ if (ret < 0 && !alias) { ocfs2_lock_res_free(&dl->dl_lockres); BUG_ON(dl->dl_count != 1); spin_lock(&dentry_attach_lock); dentry->d_fsdata = NULL; spin_unlock(&dentry_attach_lock); kfree(dl); iput(inode); } dput(alias); return ret; } /* * ocfs2_dentry_iput() and friends. * * At this point, our particular dentry is detached from the inodes * alias list, so there's no way that the locking code can find it. * * The interesting stuff happens when we determine that our lock needs * to go away because this is the last subdir alias in the * system. This function needs to handle a couple things: * * 1) Synchronizing lock shutdown with the downconvert threads. This * is already handled for us via the lockres release drop function * called in ocfs2_release_dentry_lock() * * 2) A race may occur when we're doing our lock shutdown and * another process wants to create a new dentry lock. Right now we * let them race, which means that for a very short while, this * node might have two locks on a lock resource. This should be a * problem though because one of them is in the process of being * thrown out. */ static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, struct ocfs2_dentry_lock *dl) { iput(dl->dl_inode); ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); ocfs2_lock_res_free(&dl->dl_lockres); kfree(dl); } void ocfs2_dentry_lock_put(struct ocfs2_super *osb, struct ocfs2_dentry_lock *dl) { int unlock = 0; BUG_ON(dl->dl_count == 0); spin_lock(&dentry_attach_lock); dl->dl_count--; unlock = !dl->dl_count; spin_unlock(&dentry_attach_lock); if (unlock) ocfs2_drop_dentry_lock(osb, dl); } static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) { struct ocfs2_dentry_lock *dl = dentry->d_fsdata; if (!dl) { /* * No dentry lock is ok if we're disconnected or * unhashed. */ if (!(dentry->d_flags & DCACHE_DISCONNECTED) && !d_unhashed(dentry)) { unsigned long long ino = 0ULL; if (inode) ino = (unsigned long long)OCFS2_I(inode)->ip_blkno; mlog(ML_ERROR, "Dentry is missing cluster lock. " "inode: %llu, d_flags: 0x%x, d_name: %pd\n", ino, dentry->d_flags, dentry); } goto out; } mlog_bug_on_msg(dl->dl_count == 0, "dentry: %pd, count: %u\n", dentry, dl->dl_count); ocfs2_dentry_lock_put(OCFS2_SB(dentry->d_sb), dl); out: iput(inode); } /* * d_move(), but keep the locks in sync. * * When we are done, "dentry" will have the parent dir and name of * "target", which will be thrown away. * * We manually update the lock of "dentry" if need be. * * "target" doesn't have it's dentry lock touched - we allow the later * dput() to handle this for us. * * This is called during ocfs2_rename(), while holding parent * directory locks. The dentries have already been deleted on other * nodes via ocfs2_remote_dentry_delete(). * * Normally, the VFS handles the d_move() for the file system, after * the ->rename() callback. OCFS2 wants to handle this internally, so * the new lock can be created atomically with respect to the cluster. */ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, struct inode *old_dir, struct inode *new_dir) { int ret; struct ocfs2_super *osb = OCFS2_SB(old_dir->i_sb); struct inode *inode = d_inode(dentry); /* * Move within the same directory, so the actual lock info won't * change. * * XXX: Is there any advantage to dropping the lock here? */ if (old_dir == new_dir) goto out_move; ocfs2_dentry_lock_put(osb, dentry->d_fsdata); dentry->d_fsdata = NULL; ret = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(new_dir)->ip_blkno); if (ret) mlog_errno(ret); out_move: d_move(dentry, target); } const struct dentry_operations ocfs2_dentry_ops = { .d_revalidate = ocfs2_dentry_revalidate, .d_iput = ocfs2_dentry_iput, }; |
| 1097 1097 181 6 6 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 | // SPDX-License-Identifier: GPL-2.0-or-later /* * vrf.c: device driver to encapsulate a VRF space * * Copyright (c) 2015 Cumulus Networks. All rights reserved. * Copyright (c) 2015 Shrijeet Mukherjee <shm@cumulusnetworks.com> * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com> * * Based on dummy, team and ipvlan drivers */ #include <linux/ethtool.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/ip.h> #include <linux/init.h> #include <linux/moduleparam.h> #include <linux/netfilter.h> #include <linux/rtnetlink.h> #include <net/rtnetlink.h> #include <linux/u64_stats_sync.h> #include <linux/hashtable.h> #include <linux/spinlock_types.h> #include <linux/inetdevice.h> #include <net/arp.h> #include <net/ip.h> #include <net/ip_fib.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> #include <net/route.h> #include <net/addrconf.h> #include <net/l3mdev.h> #include <net/fib_rules.h> #include <net/sch_generic.h> #include <net/netns/generic.h> #include <net/netfilter/nf_conntrack.h> #define DRV_NAME "vrf" #define DRV_VERSION "1.1" #define FIB_RULE_PREF 1000 /* default preference for FIB rules */ #define HT_MAP_BITS 4 #define HASH_INITVAL ((u32)0xcafef00d) struct vrf_map { DECLARE_HASHTABLE(ht, HT_MAP_BITS); spinlock_t vmap_lock; /* shared_tables: * count how many distinct tables do not comply with the strict mode * requirement. * shared_tables value must be 0 in order to enable the strict mode. * * example of the evolution of shared_tables: * | time * add vrf0 --> table 100 shared_tables = 0 | t0 * add vrf1 --> table 101 shared_tables = 0 | t1 * add vrf2 --> table 100 shared_tables = 1 | t2 * add vrf3 --> table 100 shared_tables = 1 | t3 * add vrf4 --> table 101 shared_tables = 2 v t4 * * shared_tables is a "step function" (or "staircase function") * and it is increased by one when the second vrf is associated to a * table. * * at t2, vrf0 and vrf2 are bound to table 100: shared_tables = 1. * * at t3, another dev (vrf3) is bound to the same table 100 but the * value of shared_tables is still 1. * This means that no matter how many new vrfs will register on the * table 100, the shared_tables will not increase (considering only * table 100). * * at t4, vrf4 is bound to table 101, and shared_tables = 2. * * Looking at the value of shared_tables we can immediately know if * the strict_mode can or cannot be enforced. Indeed, strict_mode * can be enforced iff shared_tables = 0. * * Conversely, shared_tables is decreased when a vrf is de-associated * from a table with exactly two associated vrfs. */ u32 shared_tables; bool strict_mode; }; struct vrf_map_elem { struct hlist_node hnode; struct list_head vrf_list; /* VRFs registered to this table */ u32 table_id; int users; int ifindex; }; static unsigned int vrf_net_id; /* per netns vrf data */ struct netns_vrf { /* protected by rtnl lock */ bool add_fib_rules; struct vrf_map vmap; struct ctl_table_header *ctl_hdr; }; struct net_vrf { struct rtable __rcu *rth; struct rt6_info __rcu *rt6; #if IS_ENABLED(CONFIG_IPV6) struct fib6_table *fib6_table; #endif u32 tb_id; struct list_head me_list; /* entry in vrf_map_elem */ int ifindex; }; static void vrf_rx_stats(struct net_device *dev, int len) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); u64_stats_inc(&dstats->rx_packets); u64_stats_add(&dstats->rx_bytes, len); u64_stats_update_end(&dstats->syncp); } static void vrf_tx_error(struct net_device *vrf_dev, struct sk_buff *skb) { vrf_dev->stats.tx_errors++; kfree_skb(skb); } static struct vrf_map *netns_vrf_map(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); return &nn_vrf->vmap; } static struct vrf_map *netns_vrf_map_by_dev(struct net_device *dev) { return netns_vrf_map(dev_net(dev)); } static int vrf_map_elem_get_vrf_ifindex(struct vrf_map_elem *me) { struct list_head *me_head = &me->vrf_list; struct net_vrf *vrf; if (list_empty(me_head)) return -ENODEV; vrf = list_first_entry(me_head, struct net_vrf, me_list); return vrf->ifindex; } static struct vrf_map_elem *vrf_map_elem_alloc(gfp_t flags) { struct vrf_map_elem *me; me = kmalloc(sizeof(*me), flags); if (!me) return NULL; return me; } static void vrf_map_elem_free(struct vrf_map_elem *me) { kfree(me); } static void vrf_map_elem_init(struct vrf_map_elem *me, int table_id, int ifindex, int users) { me->table_id = table_id; me->ifindex = ifindex; me->users = users; INIT_LIST_HEAD(&me->vrf_list); } static struct vrf_map_elem *vrf_map_lookup_elem(struct vrf_map *vmap, u32 table_id) { struct vrf_map_elem *me; u32 key; key = jhash_1word(table_id, HASH_INITVAL); hash_for_each_possible(vmap->ht, me, hnode, key) { if (me->table_id == table_id) return me; } return NULL; } static void vrf_map_add_elem(struct vrf_map *vmap, struct vrf_map_elem *me) { u32 table_id = me->table_id; u32 key; key = jhash_1word(table_id, HASH_INITVAL); hash_add(vmap->ht, &me->hnode, key); } static void vrf_map_del_elem(struct vrf_map_elem *me) { hash_del(&me->hnode); } static void vrf_map_lock(struct vrf_map *vmap) __acquires(&vmap->vmap_lock) { spin_lock(&vmap->vmap_lock); } static void vrf_map_unlock(struct vrf_map *vmap) __releases(&vmap->vmap_lock) { spin_unlock(&vmap->vmap_lock); } /* called with rtnl lock held */ static int vrf_map_register_dev(struct net_device *dev, struct netlink_ext_ack *extack) { struct vrf_map *vmap = netns_vrf_map_by_dev(dev); struct net_vrf *vrf = netdev_priv(dev); struct vrf_map_elem *new_me, *me; u32 table_id = vrf->tb_id; bool free_new_me = false; int users; int res; /* we pre-allocate elements used in the spin-locked section (so that we * keep the spinlock as short as possible). */ new_me = vrf_map_elem_alloc(GFP_KERNEL); if (!new_me) return -ENOMEM; vrf_map_elem_init(new_me, table_id, dev->ifindex, 0); vrf_map_lock(vmap); me = vrf_map_lookup_elem(vmap, table_id); if (!me) { me = new_me; vrf_map_add_elem(vmap, me); goto link_vrf; } /* we already have an entry in the vrf_map, so it means there is (at * least) a vrf registered on the specific table. */ free_new_me = true; if (vmap->strict_mode) { /* vrfs cannot share the same table */ NL_SET_ERR_MSG(extack, "Table is used by another VRF"); res = -EBUSY; goto unlock; } link_vrf: users = ++me->users; if (users == 2) ++vmap->shared_tables; list_add(&vrf->me_list, &me->vrf_list); res = 0; unlock: vrf_map_unlock(vmap); /* clean-up, if needed */ if (free_new_me) vrf_map_elem_free(new_me); return res; } /* called with rtnl lock held */ static void vrf_map_unregister_dev(struct net_device *dev) { struct vrf_map *vmap = netns_vrf_map_by_dev(dev); struct net_vrf *vrf = netdev_priv(dev); u32 table_id = vrf->tb_id; struct vrf_map_elem *me; int users; vrf_map_lock(vmap); me = vrf_map_lookup_elem(vmap, table_id); if (!me) goto unlock; list_del(&vrf->me_list); users = --me->users; if (users == 1) { --vmap->shared_tables; } else if (users == 0) { vrf_map_del_elem(me); /* no one will refer to this element anymore */ vrf_map_elem_free(me); } unlock: vrf_map_unlock(vmap); } /* return the vrf device index associated with the table_id */ static int vrf_ifindex_lookup_by_table_id(struct net *net, u32 table_id) { struct vrf_map *vmap = netns_vrf_map(net); struct vrf_map_elem *me; int ifindex; vrf_map_lock(vmap); if (!vmap->strict_mode) { ifindex = -EPERM; goto unlock; } me = vrf_map_lookup_elem(vmap, table_id); if (!me) { ifindex = -ENODEV; goto unlock; } ifindex = vrf_map_elem_get_vrf_ifindex(me); unlock: vrf_map_unlock(vmap); return ifindex; } /* by default VRF devices do not have a qdisc and are expected * to be created with only a single queue. */ static bool qdisc_tx_is_default(const struct net_device *dev) { struct netdev_queue *txq; struct Qdisc *qdisc; if (dev->num_tx_queues > 1) return false; txq = netdev_get_tx_queue(dev, 0); qdisc = rcu_access_pointer(txq->qdisc); return !qdisc->enqueue; } /* Local traffic destined to local address. Reinsert the packet to rx * path, similar to loopback handling. */ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, struct dst_entry *dst) { int len = skb->len; skb_orphan(skb); skb_dst_set(skb, dst); /* set pkt_type to avoid skb hitting packet taps twice - * once on Tx and again in Rx processing */ skb->pkt_type = PACKET_LOOPBACK; skb->protocol = eth_type_trans(skb, dev); if (likely(__netif_rx(skb) == NET_RX_SUCCESS)) { vrf_rx_stats(dev, len); } else { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); u64_stats_update_begin(&dstats->syncp); u64_stats_inc(&dstats->rx_drops); u64_stats_update_end(&dstats->syncp); } return NETDEV_TX_OK; } static void vrf_nf_set_untracked(struct sk_buff *skb) { if (skb_get_nfct(skb) == 0) nf_ct_set(skb, NULL, IP_CT_UNTRACKED); } static void vrf_nf_reset_ct(struct sk_buff *skb) { if (skb_get_nfct(skb) == IP_CT_UNTRACKED) nf_reset_ct(skb); } #if IS_ENABLED(CONFIG_IPV6) static int vrf_ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; vrf_nf_reset_ct(skb); err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); if (likely(err == 1)) err = dst_output(net, sk, skb); return err; } static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { const struct ipv6hdr *iph; struct net *net = dev_net(skb->dev); struct flowi6 fl6; int ret = NET_XMIT_DROP; struct dst_entry *dst; struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst; if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr))) goto err; iph = ipv6_hdr(skb); memset(&fl6, 0, sizeof(fl6)); /* needed to match OIF rule */ fl6.flowi6_l3mdev = dev->ifindex; fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = iph->nexthdr; dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); if (IS_ERR(dst) || dst == dst_null) goto err; skb_dst_drop(skb); /* if dst.dev is the VRF device again this is locally originated traffic * destined to a local address. Short circuit to Rx path. */ if (dst->dev == dev) return vrf_local_xmit(skb, dev, dst); skb_dst_set(skb, dst); /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); ret = vrf_ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(ret))) dev->stats.tx_errors++; else ret = NET_XMIT_SUCCESS; return ret; err: vrf_tx_error(dev, skb); return NET_XMIT_DROP; } #else static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { vrf_tx_error(dev, skb); return NET_XMIT_DROP; } #endif /* based on ip_local_out; can't use it b/c the dst is switched pointing to us */ static int vrf_ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; vrf_nf_reset_ct(skb); err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); if (likely(err == 1)) err = dst_output(net, sk, skb); return err; } static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { struct iphdr *ip4h; int ret = NET_XMIT_DROP; struct flowi4 fl4; struct net *net = dev_net(vrf_dev); struct rtable *rt; if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr))) goto err; ip4h = ip_hdr(skb); memset(&fl4, 0, sizeof(fl4)); /* needed to match OIF rule */ fl4.flowi4_l3mdev = vrf_dev->ifindex; fl4.flowi4_iif = LOOPBACK_IFINDEX; fl4.flowi4_tos = RT_TOS(ip4h->tos); fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; fl4.flowi4_proto = ip4h->protocol; fl4.daddr = ip4h->daddr; fl4.saddr = ip4h->saddr; rt = ip_route_output_flow(net, &fl4, NULL); if (IS_ERR(rt)) goto err; skb_dst_drop(skb); /* if dst.dev is the VRF device again this is locally originated traffic * destined to a local address. Short circuit to Rx path. */ if (rt->dst.dev == vrf_dev) return vrf_local_xmit(skb, vrf_dev, &rt->dst); skb_dst_set(skb, &rt->dst); /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); if (!ip4h->saddr) { ip4h->saddr = inet_select_addr(skb_dst(skb)->dev, 0, RT_SCOPE_LINK); } memset(IPCB(skb), 0, sizeof(*IPCB(skb))); ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else ret = NET_XMIT_SUCCESS; out: return ret; err: vrf_tx_error(vrf_dev, skb); goto out; } static netdev_tx_t is_ip_tx_frame(struct sk_buff *skb, struct net_device *dev) { switch (skb->protocol) { case htons(ETH_P_IP): return vrf_process_v4_outbound(skb, dev); case htons(ETH_P_IPV6): return vrf_process_v6_outbound(skb, dev); default: vrf_tx_error(dev, skb); return NET_XMIT_DROP; } } static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev) { struct pcpu_dstats *dstats = this_cpu_ptr(dev->dstats); int len = skb->len; netdev_tx_t ret = is_ip_tx_frame(skb, dev); u64_stats_update_begin(&dstats->syncp); if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) { u64_stats_inc(&dstats->tx_packets); u64_stats_add(&dstats->tx_bytes, len); } else { u64_stats_inc(&dstats->tx_drops); } u64_stats_update_end(&dstats->syncp); return ret; } static void vrf_finish_direct(struct sk_buff *skb) { struct net_device *vrf_dev = skb->dev; if (!list_empty(&vrf_dev->ptype_all) && likely(skb_headroom(skb) >= ETH_HLEN)) { struct ethhdr *eth = skb_push(skb, ETH_HLEN); ether_addr_copy(eth->h_source, vrf_dev->dev_addr); eth_zero_addr(eth->h_dest); eth->h_proto = skb->protocol; dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, ETH_HLEN); } vrf_nf_reset_ct(skb); } #if IS_ENABLED(CONFIG_IPV6) /* modelled after ip6_finish_output2 */ static int vrf_finish_output6(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; const struct in6_addr *nexthop; struct neighbour *neigh; int ret; vrf_nf_reset_ct(skb); skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; rcu_read_lock(); nexthop = rt6_nexthop(dst_rt6_info(dst), &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); if (!IS_ERR(neigh)) { sock_confirm_neigh(skb, neigh); ret = neigh_output(neigh, skb, false); rcu_read_unlock(); return ret; } rcu_read_unlock(); IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } /* modelled after ip6_output */ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) { return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb_dst(skb)->dev, vrf_finish_output6, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } /* set dst on skb to send packet to us via dev_xmit path. Allows * packet to go through device based features such as qdisc, netfilter * hooks and packet sockets with skb->dev set to vrf device. */ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); struct dst_entry *dst = NULL; struct rt6_info *rt6; rcu_read_lock(); rt6 = rcu_dereference(vrf->rt6); if (likely(rt6)) { dst = &rt6->dst; dst_hold(dst); } rcu_read_unlock(); if (unlikely(!dst)) { vrf_tx_error(vrf_dev, skb); return NULL; } skb_dst_drop(skb); skb_dst_set(skb, dst); return skb; } static int vrf_output6_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { vrf_finish_direct(skb); return vrf_ip6_local_out(net, sk, skb); } static int vrf_output6_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { int err = 1; skb->protocol = htons(ETH_P_IPV6); if (!(IPCB(skb)->flags & IPSKB_REROUTED)) err = nf_hook(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb->dev, vrf_output6_direct_finish); if (likely(err == 1)) vrf_finish_direct(skb); return err; } static int vrf_ip6_out_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; err = vrf_output6_direct(net, sk, skb); if (likely(err == 1)) err = vrf_ip6_local_out(net, sk, skb); return err; } static struct sk_buff *vrf_ip6_out_direct(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(vrf_dev); int err; skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip6_out_direct_finish); if (likely(err == 1)) err = vrf_output6_direct(net, sk, skb); if (likely(err == 1)) return skb; return NULL; } static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { /* don't divert link scope packets */ if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) return skb; vrf_nf_set_untracked(skb); if (qdisc_tx_is_default(vrf_dev) || IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) return vrf_ip6_out_direct(vrf_dev, sk, skb); return vrf_ip6_out_redirect(vrf_dev, skb); } /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { struct rt6_info *rt6 = rtnl_dereference(vrf->rt6); struct net *net = dev_net(dev); struct dst_entry *dst; RCU_INIT_POINTER(vrf->rt6, NULL); synchronize_rcu(); /* move dev in dst's to loopback so this VRF device can be deleted * - based on dst_ifdown */ if (rt6) { dst = &rt6->dst; netdev_ref_replace(dst->dev, net->loopback_dev, &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } } static int vrf_rt6_create(struct net_device *dev) { int flags = DST_NOPOLICY | DST_NOXFRM; struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); struct rt6_info *rt6; int rc = -ENOMEM; /* IPv6 can be CONFIG enabled and then disabled runtime */ if (!ipv6_mod_enabled()) return 0; vrf->fib6_table = fib6_new_table(net, vrf->tb_id); if (!vrf->fib6_table) goto out; /* create a dst for routing packets out a VRF device */ rt6 = ip6_dst_alloc(net, dev, flags); if (!rt6) goto out; rt6->dst.output = vrf_output6; rcu_assign_pointer(vrf->rt6, rt6); rc = 0; out: return rc; } #else static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { return skb; } static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { } static int vrf_rt6_create(struct net_device *dev) { return 0; } #endif /* modelled after ip_finish_output2 */ static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct rtable *rt = dst_rtable(dst); struct net_device *dev = dst->dev; unsigned int hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; vrf_nf_reset_ct(skb); /* Be paranoid, rather than too clever. */ if (unlikely(skb_headroom(skb) < hh_len && dev->header_ops)) { skb = skb_expand_head(skb, hh_len); if (!skb) { dev->stats.tx_errors++; return -ENOMEM; } } rcu_read_lock(); neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); if (!IS_ERR(neigh)) { int ret; sock_confirm_neigh(skb, neigh); /* if crossing protocols, can not use the cached header */ ret = neigh_output(neigh, skb, is_v6gw); rcu_read_unlock(); return ret; } rcu_read_unlock(); vrf_tx_error(skb->dev, skb); return -EINVAL; } static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len); skb->dev = dev; skb->protocol = htons(ETH_P_IP); return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, dev, vrf_finish_output, !(IPCB(skb)->flags & IPSKB_REROUTED)); } /* set dst on skb to send packet to us via dev_xmit path. Allows * packet to go through device based features such as qdisc, netfilter * hooks and packet sockets with skb->dev set to vrf device. */ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_vrf *vrf = netdev_priv(vrf_dev); struct dst_entry *dst = NULL; struct rtable *rth; rcu_read_lock(); rth = rcu_dereference(vrf->rth); if (likely(rth)) { dst = &rth->dst; dst_hold(dst); } rcu_read_unlock(); if (unlikely(!dst)) { vrf_tx_error(vrf_dev, skb); return NULL; } skb_dst_drop(skb); skb_dst_set(skb, dst); return skb; } static int vrf_output_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { vrf_finish_direct(skb); return vrf_ip_local_out(net, sk, skb); } static int vrf_output_direct(struct net *net, struct sock *sk, struct sk_buff *skb) { int err = 1; skb->protocol = htons(ETH_P_IP); if (!(IPCB(skb)->flags & IPSKB_REROUTED)) err = nf_hook(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb, NULL, skb->dev, vrf_output_direct_finish); if (likely(err == 1)) vrf_finish_direct(skb); return err; } static int vrf_ip_out_direct_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { int err; err = vrf_output_direct(net, sk, skb); if (likely(err == 1)) err = vrf_ip_local_out(net, sk, skb); return err; } static struct sk_buff *vrf_ip_out_direct(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(vrf_dev); int err; skb->dev = vrf_dev; err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, vrf_dev, vrf_ip_out_direct_finish); if (likely(err == 1)) err = vrf_output_direct(net, sk, skb); if (likely(err == 1)) return skb; return NULL; } static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb) { /* don't divert multicast or local broadcast */ if (ipv4_is_multicast(ip_hdr(skb)->daddr) || ipv4_is_lbcast(ip_hdr(skb)->daddr)) return skb; vrf_nf_set_untracked(skb); if (qdisc_tx_is_default(vrf_dev) || IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) return vrf_ip_out_direct(vrf_dev, sk, skb); return vrf_ip_out_redirect(vrf_dev, skb); } /* called with rcu lock held */ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, struct sock *sk, struct sk_buff *skb, u16 proto) { switch (proto) { case AF_INET: return vrf_ip_out(vrf_dev, sk, skb); case AF_INET6: return vrf_ip6_out(vrf_dev, sk, skb); } return skb; } /* holding rtnl */ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { struct rtable *rth = rtnl_dereference(vrf->rth); struct net *net = dev_net(dev); struct dst_entry *dst; RCU_INIT_POINTER(vrf->rth, NULL); synchronize_rcu(); /* move dev in dst's to loopback so this VRF device can be deleted * - based on dst_ifdown */ if (rth) { dst = &rth->dst; netdev_ref_replace(dst->dev, net->loopback_dev, &dst->dev_tracker, GFP_KERNEL); dst->dev = net->loopback_dev; dst_release(dst); } } static int vrf_rtable_create(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); struct rtable *rth; if (!fib_new_table(dev_net(dev), vrf->tb_id)) return -ENOMEM; /* create a dst for routing packets out through a VRF device */ rth = rt_dst_alloc(dev, 0, RTN_UNICAST, 1); if (!rth) return -ENOMEM; rth->dst.output = vrf_output; rcu_assign_pointer(vrf->rth, rth); return 0; } /**************************** device handling ********************/ /* cycle interface to flush neighbor cache and move routes across tables */ static void cycle_netdev(struct net_device *dev, struct netlink_ext_ack *extack) { unsigned int flags = dev->flags; int ret; if (!netif_running(dev)) return; ret = dev_change_flags(dev, flags & ~IFF_UP, extack); if (ret >= 0) ret = dev_change_flags(dev, flags, extack); if (ret < 0) { netdev_err(dev, "Failed to cycle device %s; route tables might be wrong!\n", dev->name); } } static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev, struct netlink_ext_ack *extack) { int ret; /* do not allow loopback device to be enslaved to a VRF. * The vrf device acts as the loopback for the vrf. */ if (port_dev == dev_net(dev)->loopback_dev) { NL_SET_ERR_MSG(extack, "Can not enslave loopback device to a VRF"); return -EOPNOTSUPP; } port_dev->priv_flags |= IFF_L3MDEV_SLAVE; ret = netdev_master_upper_dev_link(port_dev, dev, NULL, NULL, extack); if (ret < 0) goto err; cycle_netdev(port_dev, extack); return 0; err: port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; return ret; } static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev, struct netlink_ext_ack *extack) { if (netif_is_l3_master(port_dev)) { NL_SET_ERR_MSG(extack, "Can not enslave an L3 master device to a VRF"); return -EINVAL; } if (netif_is_l3_slave(port_dev)) return -EINVAL; return do_vrf_add_slave(dev, port_dev, extack); } /* inverse of do_vrf_add_slave */ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev) { netdev_upper_dev_unlink(port_dev, dev); port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE; cycle_netdev(port_dev, NULL); return 0; } static int vrf_del_slave(struct net_device *dev, struct net_device *port_dev) { return do_vrf_del_slave(dev, port_dev); } static void vrf_dev_uninit(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); vrf_rtable_release(dev, vrf); vrf_rt6_release(dev, vrf); } static int vrf_dev_init(struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); /* create the default dst which points back to us */ if (vrf_rtable_create(dev) != 0) goto out_nomem; if (vrf_rt6_create(dev) != 0) goto out_rth; dev->flags = IFF_MASTER | IFF_NOARP; /* similarly, oper state is irrelevant; set to up to avoid confusion */ dev->operstate = IF_OPER_UP; netdev_lockdep_set_classes(dev); return 0; out_rth: vrf_rtable_release(dev, vrf); out_nomem: return -ENOMEM; } static const struct net_device_ops vrf_netdev_ops = { .ndo_init = vrf_dev_init, .ndo_uninit = vrf_dev_uninit, .ndo_start_xmit = vrf_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_add_slave = vrf_add_slave, .ndo_del_slave = vrf_del_slave, }; static u32 vrf_fib_table(const struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); return vrf->tb_id; } static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { kfree_skb(skb); return 0; } static struct sk_buff *vrf_rcv_nfhook(u8 pf, unsigned int hook, struct sk_buff *skb, struct net_device *dev) { struct net *net = dev_net(dev); if (nf_hook(pf, hook, net, NULL, skb, dev, NULL, vrf_rcv_finish) != 1) skb = NULL; /* kfree_skb(skb) handled by nf code */ return skb; } static int vrf_prepare_mac_header(struct sk_buff *skb, struct net_device *vrf_dev, u16 proto) { struct ethhdr *eth; int err; /* in general, we do not know if there is enough space in the head of * the packet for hosting the mac header. */ err = skb_cow_head(skb, LL_RESERVED_SPACE(vrf_dev)); if (unlikely(err)) /* no space in the skb head */ return -ENOBUFS; __skb_push(skb, ETH_HLEN); eth = (struct ethhdr *)skb->data; skb_reset_mac_header(skb); skb_reset_mac_len(skb); /* we set the ethernet destination and the source addresses to the * address of the VRF device. */ ether_addr_copy(eth->h_dest, vrf_dev->dev_addr); ether_addr_copy(eth->h_source, vrf_dev->dev_addr); eth->h_proto = htons(proto); /* the destination address of the Ethernet frame corresponds to the * address set on the VRF interface; therefore, the packet is intended * to be processed locally. */ skb->protocol = eth->h_proto; skb->pkt_type = PACKET_HOST; skb_postpush_rcsum(skb, skb->data, ETH_HLEN); skb_pull_inline(skb, ETH_HLEN); return 0; } /* prepare and add the mac header to the packet if it was not set previously. * In this way, packet sniffers such as tcpdump can parse the packet correctly. * If the mac header was already set, the original mac header is left * untouched and the function returns immediately. */ static int vrf_add_mac_header_if_unset(struct sk_buff *skb, struct net_device *vrf_dev, u16 proto, struct net_device *orig_dev) { if (skb_mac_header_was_set(skb) && dev_has_header(orig_dev)) return 0; return vrf_prepare_mac_header(skb, vrf_dev, proto); } #if IS_ENABLED(CONFIG_IPV6) /* neighbor handling is done with actual device; do not want * to flip skb->dev for those ndisc packets. This really fails * for multiple next protocols (e.g., NEXTHDR_HOP). But it is * a start. */ static bool ipv6_ndisc_frame(const struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); bool rc = false; if (iph->nexthdr == NEXTHDR_ICMP) { const struct icmp6hdr *icmph; struct icmp6hdr _icmph; icmph = skb_header_pointer(skb, sizeof(*iph), sizeof(_icmph), &_icmph); if (!icmph) goto out; switch (icmph->icmp6_type) { case NDISC_ROUTER_SOLICITATION: case NDISC_ROUTER_ADVERTISEMENT: case NDISC_NEIGHBOUR_SOLICITATION: case NDISC_NEIGHBOUR_ADVERTISEMENT: case NDISC_REDIRECT: rc = true; break; } } out: return rc; } static struct rt6_info *vrf_ip6_route_lookup(struct net *net, const struct net_device *dev, struct flowi6 *fl6, int ifindex, const struct sk_buff *skb, int flags) { struct net_vrf *vrf = netdev_priv(dev); return ip6_pol_route(net, vrf->fib6_table, ifindex, fl6, skb, flags); } static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, int ifindex) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct flowi6 fl6 = { .flowi6_iif = ifindex, .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), }; struct net *net = dev_net(vrf_dev); struct rt6_info *rt6; rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb, RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE); if (unlikely(!rt6)) return; if (unlikely(&rt6->dst == &net->ipv6.ip6_null_entry->dst)) return; skb_dst_set(skb, &rt6->dst); } static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { int orig_iif = skb->skb_iif; bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); /* loopback, multicast & non-ND link-local traffic; do not push through * packet taps again. Reset pkt_type for upper layers to process skb. * For non-loopback strict packets, determine the dst using the original * ifindex. */ if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; if (skb->pkt_type == PACKET_LOOPBACK) skb->pkt_type = PACKET_HOST; else vrf_ip6_input_dst(skb, vrf_dev, orig_iif); goto out; } /* if packet is NDISC then keep the ingress interface */ if (!is_ndisc) { struct net_device *orig_dev = skb->dev; vrf_rx_stats(vrf_dev, skb->len); skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; if (!list_empty(&vrf_dev->ptype_all)) { int err; err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IPV6, orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); } } IP6CB(skb)->flags |= IP6SKB_L3SLAVE; } if (need_strict) vrf_ip6_input_dst(skb, vrf_dev, orig_iif); skb = vrf_rcv_nfhook(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, vrf_dev); out: return skb; } #else static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { return skb; } #endif static struct sk_buff *vrf_ip_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { struct net_device *orig_dev = skb->dev; skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IPCB(skb)->flags |= IPSKB_L3SLAVE; if (ipv4_is_multicast(ip_hdr(skb)->daddr)) goto out; /* loopback traffic; do not push through packet taps again. * Reset pkt_type for upper layers to process skb */ if (skb->pkt_type == PACKET_LOOPBACK) { skb->pkt_type = PACKET_HOST; goto out; } vrf_rx_stats(vrf_dev, skb->len); if (!list_empty(&vrf_dev->ptype_all)) { int err; err = vrf_add_mac_header_if_unset(skb, vrf_dev, ETH_P_IP, orig_dev); if (likely(!err)) { skb_push(skb, skb->mac_len); dev_queue_xmit_nit(skb, vrf_dev); skb_pull(skb, skb->mac_len); } } skb = vrf_rcv_nfhook(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, vrf_dev); out: return skb; } /* called with rcu lock held */ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, struct sk_buff *skb, u16 proto) { switch (proto) { case AF_INET: return vrf_ip_rcv(vrf_dev, skb); case AF_INET6: return vrf_ip6_rcv(vrf_dev, skb); } return skb; } #if IS_ENABLED(CONFIG_IPV6) /* send to link-local or multicast address via interface enslaved to * VRF device. Force lookup to VRF table without changing flow struct * Note: Caller to this function must hold rcu_read_lock() and no refcnt * is taken on the dst by this function. */ static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, struct flowi6 *fl6) { struct net *net = dev_net(dev); int flags = RT6_LOOKUP_F_IFACE | RT6_LOOKUP_F_DST_NOREF; struct dst_entry *dst = NULL; struct rt6_info *rt; /* VRF device does not have a link-local address and * sending packets to link-local or mcast addresses over * a VRF device does not make sense */ if (fl6->flowi6_oif == dev->ifindex) { dst = &net->ipv6.ip6_null_entry->dst; return dst; } if (!ipv6_addr_any(&fl6->saddr)) flags |= RT6_LOOKUP_F_HAS_SADDR; rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, NULL, flags); if (rt) dst = &rt->dst; return dst; } #endif static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, .l3mdev_l3_rcv = vrf_l3_rcv, .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) .l3mdev_link_scope_lookup = vrf_link_scope_lookup, #endif }; static void vrf_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { strscpy(info->driver, DRV_NAME, sizeof(info->driver)); strscpy(info->version, DRV_VERSION, sizeof(info->version)); } static const struct ethtool_ops vrf_ethtool_ops = { .get_drvinfo = vrf_get_drvinfo, }; static inline size_t vrf_fib_rule_nl_size(void) { size_t sz; sz = NLMSG_ALIGN(sizeof(struct fib_rule_hdr)); sz += nla_total_size(sizeof(u8)); /* FRA_L3MDEV */ sz += nla_total_size(sizeof(u32)); /* FRA_PRIORITY */ sz += nla_total_size(sizeof(u8)); /* FRA_PROTOCOL */ return sz; } static int vrf_fib_rule(const struct net_device *dev, __u8 family, bool add_it) { struct fib_rule_hdr *frh; struct nlmsghdr *nlh; struct sk_buff *skb; int err; if ((family == AF_INET6 || family == RTNL_FAMILY_IP6MR) && !ipv6_mod_enabled()) return 0; skb = nlmsg_new(vrf_fib_rule_nl_size(), GFP_KERNEL); if (!skb) return -ENOMEM; nlh = nlmsg_put(skb, 0, 0, 0, sizeof(*frh), 0); if (!nlh) goto nla_put_failure; /* rule only needs to appear once */ nlh->nlmsg_flags |= NLM_F_EXCL; frh = nlmsg_data(nlh); memset(frh, 0, sizeof(*frh)); frh->family = family; frh->action = FR_ACT_TO_TBL; if (nla_put_u8(skb, FRA_PROTOCOL, RTPROT_KERNEL)) goto nla_put_failure; if (nla_put_u8(skb, FRA_L3MDEV, 1)) goto nla_put_failure; if (nla_put_u32(skb, FRA_PRIORITY, FIB_RULE_PREF)) goto nla_put_failure; nlmsg_end(skb, nlh); /* fib_nl_{new,del}rule handling looks for net from skb->sk */ skb->sk = dev_net(dev)->rtnl; if (add_it) { err = fib_nl_newrule(skb, nlh, NULL); if (err == -EEXIST) err = 0; } else { err = fib_nl_delrule(skb, nlh, NULL); if (err == -ENOENT) err = 0; } nlmsg_free(skb); return err; nla_put_failure: nlmsg_free(skb); return -EMSGSIZE; } static int vrf_add_fib_rules(const struct net_device *dev) { int err; err = vrf_fib_rule(dev, AF_INET, true); if (err < 0) goto out_err; err = vrf_fib_rule(dev, AF_INET6, true); if (err < 0) goto ipv6_err; #if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES) err = vrf_fib_rule(dev, RTNL_FAMILY_IPMR, true); if (err < 0) goto ipmr_err; #endif #if IS_ENABLED(CONFIG_IPV6_MROUTE_MULTIPLE_TABLES) err = vrf_fib_rule(dev, RTNL_FAMILY_IP6MR, true); if (err < 0) goto ip6mr_err; #endif return 0; #if IS_ENABLED(CONFIG_IPV6_MROUTE_MULTIPLE_TABLES) ip6mr_err: vrf_fib_rule(dev, RTNL_FAMILY_IPMR, false); #endif #if IS_ENABLED(CONFIG_IP_MROUTE_MULTIPLE_TABLES) ipmr_err: vrf_fib_rule(dev, AF_INET6, false); #endif ipv6_err: vrf_fib_rule(dev, AF_INET, false); out_err: netdev_err(dev, "Failed to add FIB rules.\n"); return err; } static void vrf_setup(struct net_device *dev) { ether_setup(dev); /* Initialize the device structure. */ dev->netdev_ops = &vrf_netdev_ops; dev->l3mdev_ops = &vrf_l3mdev_ops; dev->ethtool_ops = &vrf_ethtool_ops; dev->needs_free_netdev = true; /* Fill in device structure with ethernet-generic values. */ eth_hw_addr_random(dev); /* don't acquire vrf device's netif_tx_lock when transmitting */ dev->features |= NETIF_F_LLTX; /* don't allow vrf devices to change network namespaces. */ dev->features |= NETIF_F_NETNS_LOCAL; /* does not make sense for a VLAN to be added to a vrf device */ dev->features |= NETIF_F_VLAN_CHALLENGED; /* enable offload features */ dev->features |= NETIF_F_GSO_SOFTWARE; dev->features |= NETIF_F_RXCSUM | NETIF_F_HW_CSUM | NETIF_F_SCTP_CRC; dev->features |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA; dev->hw_features = dev->features; dev->hw_enc_features = dev->features; /* default to no qdisc; user can add if desired */ dev->priv_flags |= IFF_NO_QUEUE; dev->priv_flags |= IFF_NO_RX_HANDLER; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; /* VRF devices do not care about MTU, but if the MTU is set * too low then the ipv4 and ipv6 protocols are disabled * which breaks networking. */ dev->min_mtu = IPV6_MIN_MTU; dev->max_mtu = IP6_MAX_MTU; dev->mtu = dev->max_mtu; dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; } static int vrf_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (tb[IFLA_ADDRESS]) { if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) { NL_SET_ERR_MSG(extack, "Invalid hardware address"); return -EINVAL; } if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) { NL_SET_ERR_MSG(extack, "Invalid hardware address"); return -EADDRNOTAVAIL; } } return 0; } static void vrf_dellink(struct net_device *dev, struct list_head *head) { struct net_device *port_dev; struct list_head *iter; netdev_for_each_lower_dev(dev, port_dev, iter) vrf_del_slave(dev, port_dev); vrf_map_unregister_dev(dev); unregister_netdevice_queue(dev, head); } static int vrf_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct net_vrf *vrf = netdev_priv(dev); struct netns_vrf *nn_vrf; bool *add_fib_rules; struct net *net; int err; if (!data || !data[IFLA_VRF_TABLE]) { NL_SET_ERR_MSG(extack, "VRF table id is missing"); return -EINVAL; } vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]); if (vrf->tb_id == RT_TABLE_UNSPEC) { NL_SET_ERR_MSG_ATTR(extack, data[IFLA_VRF_TABLE], "Invalid VRF table id"); return -EINVAL; } dev->priv_flags |= IFF_L3MDEV_MASTER; err = register_netdevice(dev); if (err) goto out; /* mapping between table_id and vrf; * note: such binding could not be done in the dev init function * because dev->ifindex id is not available yet. */ vrf->ifindex = dev->ifindex; err = vrf_map_register_dev(dev, extack); if (err) { unregister_netdevice(dev); goto out; } net = dev_net(dev); nn_vrf = net_generic(net, vrf_net_id); add_fib_rules = &nn_vrf->add_fib_rules; if (*add_fib_rules) { err = vrf_add_fib_rules(dev); if (err) { vrf_map_unregister_dev(dev); unregister_netdevice(dev); goto out; } *add_fib_rules = false; } out: return err; } static size_t vrf_nl_getsize(const struct net_device *dev) { return nla_total_size(sizeof(u32)); /* IFLA_VRF_TABLE */ } static int vrf_fillinfo(struct sk_buff *skb, const struct net_device *dev) { struct net_vrf *vrf = netdev_priv(dev); return nla_put_u32(skb, IFLA_VRF_TABLE, vrf->tb_id); } static size_t vrf_get_slave_size(const struct net_device *bond_dev, const struct net_device *slave_dev) { return nla_total_size(sizeof(u32)); /* IFLA_VRF_PORT_TABLE */ } static int vrf_fill_slave_info(struct sk_buff *skb, const struct net_device *vrf_dev, const struct net_device *slave_dev) { struct net_vrf *vrf = netdev_priv(vrf_dev); if (nla_put_u32(skb, IFLA_VRF_PORT_TABLE, vrf->tb_id)) return -EMSGSIZE; return 0; } static const struct nla_policy vrf_nl_policy[IFLA_VRF_MAX + 1] = { [IFLA_VRF_TABLE] = { .type = NLA_U32 }, }; static struct rtnl_link_ops vrf_link_ops __read_mostly = { .kind = DRV_NAME, .priv_size = sizeof(struct net_vrf), .get_size = vrf_nl_getsize, .policy = vrf_nl_policy, .validate = vrf_validate, .fill_info = vrf_fillinfo, .get_slave_size = vrf_get_slave_size, .fill_slave_info = vrf_fill_slave_info, .newlink = vrf_newlink, .dellink = vrf_dellink, .setup = vrf_setup, .maxtype = IFLA_VRF_MAX, }; static int vrf_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); /* only care about unregister events to drop slave references */ if (event == NETDEV_UNREGISTER) { struct net_device *vrf_dev; if (!netif_is_l3_slave(dev)) goto out; vrf_dev = netdev_master_upper_dev_get(dev); vrf_del_slave(vrf_dev, dev); } out: return NOTIFY_DONE; } static struct notifier_block vrf_notifier_block __read_mostly = { .notifier_call = vrf_device_event, }; static int vrf_map_init(struct vrf_map *vmap) { spin_lock_init(&vmap->vmap_lock); hash_init(vmap->ht); vmap->strict_mode = false; return 0; } #ifdef CONFIG_SYSCTL static bool vrf_strict_mode(struct vrf_map *vmap) { bool strict_mode; vrf_map_lock(vmap); strict_mode = vmap->strict_mode; vrf_map_unlock(vmap); return strict_mode; } static int vrf_strict_mode_change(struct vrf_map *vmap, bool new_mode) { bool *cur_mode; int res = 0; vrf_map_lock(vmap); cur_mode = &vmap->strict_mode; if (*cur_mode == new_mode) goto unlock; if (*cur_mode) { /* disable strict mode */ *cur_mode = false; } else { if (vmap->shared_tables) { /* we cannot allow strict_mode because there are some * vrfs that share one or more tables. */ res = -EBUSY; goto unlock; } /* no tables are shared among vrfs, so we can go back * to 1:1 association between a vrf with its table. */ *cur_mode = true; } unlock: vrf_map_unlock(vmap); return res; } static int vrf_shared_table_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = (struct net *)table->extra1; struct vrf_map *vmap = netns_vrf_map(net); int proc_strict_mode = 0; struct ctl_table tmp = { .procname = table->procname, .data = &proc_strict_mode, .maxlen = sizeof(int), .mode = table->mode, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }; int ret; if (!write) proc_strict_mode = vrf_strict_mode(vmap); ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && ret == 0) ret = vrf_strict_mode_change(vmap, (bool)proc_strict_mode); return ret; } static const struct ctl_table vrf_table[] = { { .procname = "strict_mode", .data = NULL, .maxlen = sizeof(int), .mode = 0644, .proc_handler = vrf_shared_table_handler, /* set by the vrf_netns_init */ .extra1 = NULL, }, }; static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf) { struct ctl_table *table; table = kmemdup(vrf_table, sizeof(vrf_table), GFP_KERNEL); if (!table) return -ENOMEM; /* init the extra1 parameter with the reference to current netns */ table[0].extra1 = net; nn_vrf->ctl_hdr = register_net_sysctl_sz(net, "net/vrf", table, ARRAY_SIZE(vrf_table)); if (!nn_vrf->ctl_hdr) { kfree(table); return -ENOMEM; } return 0; } static void vrf_netns_exit_sysctl(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); const struct ctl_table *table; table = nn_vrf->ctl_hdr->ctl_table_arg; unregister_net_sysctl_table(nn_vrf->ctl_hdr); kfree(table); } #else static int vrf_netns_init_sysctl(struct net *net, struct netns_vrf *nn_vrf) { return 0; } static void vrf_netns_exit_sysctl(struct net *net) { } #endif /* Initialize per network namespace state */ static int __net_init vrf_netns_init(struct net *net) { struct netns_vrf *nn_vrf = net_generic(net, vrf_net_id); nn_vrf->add_fib_rules = true; vrf_map_init(&nn_vrf->vmap); return vrf_netns_init_sysctl(net, nn_vrf); } static void __net_exit vrf_netns_exit(struct net *net) { vrf_netns_exit_sysctl(net); } static struct pernet_operations vrf_net_ops __net_initdata = { .init = vrf_netns_init, .exit = vrf_netns_exit, .id = &vrf_net_id, .size = sizeof(struct netns_vrf), }; static int __init vrf_init_module(void) { int rc; register_netdevice_notifier(&vrf_notifier_block); rc = register_pernet_subsys(&vrf_net_ops); if (rc < 0) goto error; rc = l3mdev_table_lookup_register(L3MDEV_TYPE_VRF, vrf_ifindex_lookup_by_table_id); if (rc < 0) goto unreg_pernet; rc = rtnl_link_register(&vrf_link_ops); if (rc < 0) goto table_lookup_unreg; return 0; table_lookup_unreg: l3mdev_table_lookup_unregister(L3MDEV_TYPE_VRF, vrf_ifindex_lookup_by_table_id); unreg_pernet: unregister_pernet_subsys(&vrf_net_ops); error: unregister_netdevice_notifier(&vrf_notifier_block); return rc; } module_init(vrf_init_module); MODULE_AUTHOR("Shrijeet Mukherjee, David Ahern"); MODULE_DESCRIPTION("Device driver to instantiate VRF domains"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK(DRV_NAME); MODULE_VERSION(DRV_VERSION); |
| 252 251 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2014 Christoph Hellwig. */ #include "xfs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_trans.h" #include "xfs_bmap.h" #include "xfs_iomap.h" #include "xfs_pnfs.h" /* * Ensure that we do not have any outstanding pNFS layouts that can be used by * clients to directly read from or write to this inode. This must be called * before every operation that can remove blocks from the extent map. * Additionally we call it during the write operation, where aren't concerned * about exposing unallocated blocks but just want to provide basic * synchronization between a local writer and pNFS clients. mmap writes would * also benefit from this sort of synchronization, but due to the tricky locking * rules in the page fault path we don't bother. */ int xfs_break_leased_layouts( struct inode *inode, uint *iolock, bool *did_unlock) { struct xfs_inode *ip = XFS_I(inode); int error; while ((error = break_layout(inode, false)) == -EWOULDBLOCK) { xfs_iunlock(ip, *iolock); *did_unlock = true; error = break_layout(inode, true); *iolock &= ~XFS_IOLOCK_SHARED; *iolock |= XFS_IOLOCK_EXCL; xfs_ilock(ip, *iolock); } return error; } /* * Get a unique ID including its location so that the client can identify * the exported device. */ int xfs_fs_get_uuid( struct super_block *sb, u8 *buf, u32 *len, u64 *offset) { struct xfs_mount *mp = XFS_M(sb); xfs_notice_once(mp, "Using experimental pNFS feature, use at your own risk!"); if (*len < sizeof(uuid_t)) return -EINVAL; memcpy(buf, &mp->m_sb.sb_uuid, sizeof(uuid_t)); *len = sizeof(uuid_t); *offset = offsetof(struct xfs_dsb, sb_uuid); return 0; } /* * We cannot use file based VFS helpers such as file_modified() to update * inode state as we modify the data/metadata in the inode here. Hence we have * to open code the timestamp updates and SUID/SGID stripping. We also need * to set the inode prealloc flag to ensure that the extents we allocate are not * removed if the inode is reclaimed from memory before xfs_fs_block_commit() * is from the client to indicate that data has been written and the file size * can be extended. */ static int xfs_fs_map_update_inode( struct xfs_inode *ip) { struct xfs_trans *tp; int error; error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid, 0, 0, 0, &tp); if (error) return error; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); VFS_I(ip)->i_mode &= ~S_ISUID; if (VFS_I(ip)->i_mode & S_IXGRP) VFS_I(ip)->i_mode &= ~S_ISGID; xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); ip->i_diflags |= XFS_DIFLAG_PREALLOC; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); return xfs_trans_commit(tp); } /* * Get a layout for the pNFS client. */ int xfs_fs_map_blocks( struct inode *inode, loff_t offset, u64 length, struct iomap *iomap, bool write, u32 *device_generation) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_bmbt_irec imap; xfs_fileoff_t offset_fsb, end_fsb; loff_t limit; int bmapi_flags = XFS_BMAPI_ENTIRE; int nimaps = 1; uint lock_flags; int error = 0; u64 seq; if (xfs_is_shutdown(mp)) return -EIO; /* * We can't export inodes residing on the realtime device. The realtime * device doesn't have a UUID to identify it, so the client has no way * to find it. */ if (XFS_IS_REALTIME_INODE(ip)) return -ENXIO; /* * The pNFS block layout spec actually supports reflink like * functionality, but the Linux pNFS server doesn't implement it yet. */ if (xfs_is_reflink_inode(ip)) return -ENXIO; /* * Lock out any other I/O before we flush and invalidate the pagecache, * and then hand out a layout to the remote system. This is very * similar to direct I/O, except that the synchronization is much more * complicated. See the comment near xfs_break_leased_layouts * for a detailed explanation. */ xfs_ilock(ip, XFS_IOLOCK_EXCL); error = -EINVAL; limit = mp->m_super->s_maxbytes; if (!write) limit = max(limit, round_up(i_size_read(inode), inode->i_sb->s_blocksize)); if (offset > limit) goto out_unlock; if (offset > limit - length) length = limit - offset; error = filemap_write_and_wait(inode->i_mapping); if (error) goto out_unlock; error = invalidate_inode_pages2(inode->i_mapping); if (WARN_ON_ONCE(error)) goto out_unlock; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length); offset_fsb = XFS_B_TO_FSBT(mp, offset); lock_flags = xfs_ilock_data_map_shared(ip); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, &nimaps, bmapi_flags); seq = xfs_iomap_inode_sequence(ip, 0); ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK); if (!error && write && (!nimaps || imap.br_startblock == HOLESTARTBLOCK)) { if (offset + length > XFS_ISIZE(ip)) end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb); else if (nimaps && imap.br_startblock == HOLESTARTBLOCK) end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount); xfs_iunlock(ip, lock_flags); error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb, 0, &imap, &seq); if (error) goto out_unlock; /* * Ensure the next transaction is committed synchronously so * that the blocks allocated and handed out to the client are * guaranteed to be present even after a server crash. */ error = xfs_fs_map_update_inode(ip); if (!error) error = xfs_log_force_inode(ip); if (error) goto out_unlock; } else { xfs_iunlock(ip, lock_flags); } xfs_iunlock(ip, XFS_IOLOCK_EXCL); error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq); *device_generation = mp->m_generation; return error; out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } /* * Ensure the size update falls into a valid allocated block. */ static int xfs_pnfs_validate_isize( struct xfs_inode *ip, xfs_off_t isize) { struct xfs_bmbt_irec imap; int nimaps = 1; int error = 0; xfs_ilock(ip, XFS_ILOCK_SHARED); error = xfs_bmapi_read(ip, XFS_B_TO_FSBT(ip->i_mount, isize - 1), 1, &imap, &nimaps, 0); xfs_iunlock(ip, XFS_ILOCK_SHARED); if (error) return error; if (imap.br_startblock == HOLESTARTBLOCK || imap.br_startblock == DELAYSTARTBLOCK || imap.br_state == XFS_EXT_UNWRITTEN) return -EIO; return 0; } /* * Make sure the blocks described by maps are stable on disk. This includes * converting any unwritten extents, flushing the disk cache and updating the * time stamps. * * Note that we rely on the caller to always send us a timestamp update so that * we always commit a transaction here. If that stops being true we will have * to manually flush the cache here similar to what the fsync code path does * for datasyncs on files that have no dirty metadata. */ int xfs_fs_commit_blocks( struct inode *inode, struct iomap *maps, int nr_maps, struct iattr *iattr) { struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; struct xfs_trans *tp; bool update_isize = false; int error, i; loff_t size; ASSERT(iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)); xfs_ilock(ip, XFS_IOLOCK_EXCL); size = i_size_read(inode); if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size > size) { update_isize = true; size = iattr->ia_size; } for (i = 0; i < nr_maps; i++) { u64 start, length, end; start = maps[i].offset; if (start > size) continue; end = start + maps[i].length; if (end > size) end = size; length = end - start; if (!length) continue; /* * Make sure reads through the pagecache see the new data. */ error = invalidate_inode_pages2_range(inode->i_mapping, start >> PAGE_SHIFT, (end - 1) >> PAGE_SHIFT); WARN_ON_ONCE(error); error = xfs_iomap_write_unwritten(ip, start, length, false); if (error) goto out_drop_iolock; } if (update_isize) { error = xfs_pnfs_validate_isize(ip, size); if (error) goto out_drop_iolock; } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); if (error) goto out_drop_iolock; xfs_ilock(ip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID))); setattr_copy(&nop_mnt_idmap, inode, iattr); if (update_isize) { i_size_write(inode, iattr->ia_size); ip->i_disk_size = iattr->ia_size; } xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); out_drop_iolock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); return error; } |
| 2 2 2 1 1 1 4 3 4 1 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | /* * linux/drivers/video/fb_sys_read.c - Generic file operations where * framebuffer is in system RAM * * Copyright (C) 2007 Antonino Daplas <adaplas@pol.net> * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive * for more details. * */ #include <linux/fb.h> #include <linux/module.h> #include <linux/uaccess.h> ssize_t fb_sys_read(struct fb_info *info, char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos; void *src; int err = 0; unsigned long total_size, c; ssize_t ret; if (!(info->flags & FBINFO_VIRTFB)) fb_warn_once(info, "Framebuffer is not in virtual address space."); if (!info->screen_buffer) return -ENODEV; total_size = info->screen_size; if (total_size == 0) total_size = info->fix.smem_len; if (p >= total_size) return 0; if (count >= total_size) count = total_size; if (count + p > total_size) count = total_size - p; src = info->screen_buffer + p; if (info->fbops->fb_sync) info->fbops->fb_sync(info); c = copy_to_user(buf, src, count); if (c) err = -EFAULT; ret = count - c; *ppos += ret; return ret ? ret : err; } EXPORT_SYMBOL_GPL(fb_sys_read); ssize_t fb_sys_write(struct fb_info *info, const char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos; void *dst; int err = 0; unsigned long total_size, c; size_t ret; if (!(info->flags & FBINFO_VIRTFB)) fb_warn_once(info, "Framebuffer is not in virtual address space."); if (!info->screen_buffer) return -ENODEV; total_size = info->screen_size; if (total_size == 0) total_size = info->fix.smem_len; if (p > total_size) return -EFBIG; if (count > total_size) { err = -EFBIG; count = total_size; } if (count + p > total_size) { if (!err) err = -ENOSPC; count = total_size - p; } dst = info->screen_buffer + p; if (info->fbops->fb_sync) info->fbops->fb_sync(info); c = copy_from_user(dst, buf, count); if (c) err = -EFAULT; ret = count - c; *ppos += ret; return ret ? ret : err; } EXPORT_SYMBOL_GPL(fb_sys_write); MODULE_AUTHOR("Antonino Daplas <adaplas@pol.net>"); MODULE_DESCRIPTION("Generic file read (fb in system RAM)"); MODULE_LICENSE("GPL"); |
| 1 1 12 1 1 2 2 2 1 12 9 9 9 2 15 1 1 1 12 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 | // SPDX-License-Identifier: GPL-2.0-or-later /* * SR-IPv6 implementation * * Authors: * David Lebrun <david.lebrun@uclouvain.be> * eBPF support: Mathieu Xhonneux <m.xhonneux@gmail.com> */ #include <linux/filter.h> #include <linux/types.h> #include <linux/skbuff.h> #include <linux/net.h> #include <linux/module.h> #include <net/ip.h> #include <net/lwtunnel.h> #include <net/netevent.h> #include <net/netns/generic.h> #include <net/ip6_fib.h> #include <net/route.h> #include <net/seg6.h> #include <linux/seg6.h> #include <linux/seg6_local.h> #include <net/addrconf.h> #include <net/ip6_route.h> #include <net/dst_cache.h> #include <net/ip_tunnels.h> #ifdef CONFIG_IPV6_SEG6_HMAC #include <net/seg6_hmac.h> #endif #include <net/seg6_local.h> #include <linux/etherdevice.h> #include <linux/bpf.h> #include <linux/netfilter.h> #define SEG6_F_ATTR(i) BIT(i) struct seg6_local_lwt; /* callbacks used for customizing the creation and destruction of a behavior */ struct seg6_local_lwtunnel_ops { int (*build_state)(struct seg6_local_lwt *slwt, const void *cfg, struct netlink_ext_ack *extack); void (*destroy_state)(struct seg6_local_lwt *slwt); }; struct seg6_action_desc { int action; unsigned long attrs; /* The optattrs field is used for specifying all the optional * attributes supported by a specific behavior. * It means that if one of these attributes is not provided in the * netlink message during the behavior creation, no errors will be * returned to the userspace. * * Each attribute can be only of two types (mutually exclusive): * 1) required or 2) optional. * Every user MUST obey to this rule! If you set an attribute as * required the same attribute CANNOT be set as optional and vice * versa. */ unsigned long optattrs; int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt); int static_headroom; struct seg6_local_lwtunnel_ops slwt_ops; }; struct bpf_lwt_prog { struct bpf_prog *prog; char *name; }; /* default length values (expressed in bits) for both Locator-Block and * Locator-Node Function. * * Both SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS *must* be: * i) greater than 0; * ii) evenly divisible by 8. In other terms, the lengths of the * Locator-Block and Locator-Node Function must be byte-aligned (we can * relax this constraint in the future if really needed). * * Moreover, a third condition must hold: * iii) SEG6_LOCAL_LCBLOCK_DBITS + SEG6_LOCAL_LCNODE_FN_DBITS <= 128. * * The correctness of SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS * values are checked during the kernel compilation. If the compilation stops, * check the value of these parameters to see if they meet conditions (i), (ii) * and (iii). */ #define SEG6_LOCAL_LCBLOCK_DBITS 32 #define SEG6_LOCAL_LCNODE_FN_DBITS 16 /* The following next_csid_chk_{cntr,lcblock,lcblock_fn}_bits macros can be * used directly to check whether the lengths (in bits) of Locator-Block and * Locator-Node Function are valid according to (i), (ii), (iii). */ #define next_csid_chk_cntr_bits(blen, flen) \ ((blen) + (flen) > 128) #define next_csid_chk_lcblock_bits(blen) \ ({ \ typeof(blen) __tmp = blen; \ (!__tmp || __tmp > 120 || (__tmp & 0x07)); \ }) #define next_csid_chk_lcnode_fn_bits(flen) \ next_csid_chk_lcblock_bits(flen) /* flag indicating that flavors are set up for a given End* behavior */ #define SEG6_F_LOCAL_FLAVORS SEG6_F_ATTR(SEG6_LOCAL_FLAVORS) #define SEG6_F_LOCAL_FLV_OP(flvname) BIT(SEG6_LOCAL_FLV_OP_##flvname) #define SEG6_F_LOCAL_FLV_NEXT_CSID SEG6_F_LOCAL_FLV_OP(NEXT_CSID) #define SEG6_F_LOCAL_FLV_PSP SEG6_F_LOCAL_FLV_OP(PSP) /* Supported RFC8986 Flavor operations are reported in this bitmask */ #define SEG6_LOCAL_FLV8986_SUPP_OPS SEG6_F_LOCAL_FLV_PSP #define SEG6_LOCAL_END_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_NEXT_CSID | \ SEG6_LOCAL_FLV8986_SUPP_OPS) #define SEG6_LOCAL_END_X_FLV_SUPP_OPS SEG6_F_LOCAL_FLV_NEXT_CSID struct seg6_flavors_info { /* Flavor operations */ __u32 flv_ops; /* Locator-Block length, expressed in bits */ __u8 lcblock_bits; /* Locator-Node Function length, expressed in bits*/ __u8 lcnode_func_bits; }; enum seg6_end_dt_mode { DT_INVALID_MODE = -EINVAL, DT_LEGACY_MODE = 0, DT_VRF_MODE = 1, }; struct seg6_end_dt_info { enum seg6_end_dt_mode mode; struct net *net; /* VRF device associated to the routing table used by the SRv6 * End.DT4/DT6 behavior for routing IPv4/IPv6 packets. */ int vrf_ifindex; int vrf_table; /* tunneled packet family (IPv4 or IPv6). * Protocol and header length are inferred from family. */ u16 family; }; struct pcpu_seg6_local_counters { u64_stats_t packets; u64_stats_t bytes; u64_stats_t errors; struct u64_stats_sync syncp; }; /* This struct groups all the SRv6 Behavior counters supported so far. * * put_nla_counters() makes use of this data structure to collect all counter * values after the per-CPU counter evaluation has been performed. * Finally, each counter value (in seg6_local_counters) is stored in the * corresponding netlink attribute and sent to user space. * * NB: we don't want to expose this structure to user space! */ struct seg6_local_counters { __u64 packets; __u64 bytes; __u64 errors; }; #define seg6_local_alloc_pcpu_counters(__gfp) \ __netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters, \ ((__gfp) | __GFP_ZERO)) #define SEG6_F_LOCAL_COUNTERS SEG6_F_ATTR(SEG6_LOCAL_COUNTERS) struct seg6_local_lwt { int action; struct ipv6_sr_hdr *srh; int table; struct in_addr nh4; struct in6_addr nh6; int iif; int oif; struct bpf_lwt_prog bpf; #ifdef CONFIG_NET_L3_MASTER_DEV struct seg6_end_dt_info dt_info; #endif struct seg6_flavors_info flv_info; struct pcpu_seg6_local_counters __percpu *pcpu_counters; int headroom; struct seg6_action_desc *desc; /* unlike the required attrs, we have to track the optional attributes * that have been effectively parsed. */ unsigned long parsed_optattrs; }; static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) { return (struct seg6_local_lwt *)lwt->data; } static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb) { struct ipv6_sr_hdr *srh; srh = seg6_get_srh(skb, IP6_FH_F_SKIP_RH); if (!srh) return NULL; #ifdef CONFIG_IPV6_SEG6_HMAC if (!seg6_hmac_validate_skb(skb)) return NULL; #endif return srh; } static bool decap_and_validate(struct sk_buff *skb, int proto) { struct ipv6_sr_hdr *srh; unsigned int off = 0; srh = seg6_get_srh(skb, 0); if (srh && srh->segments_left > 0) return false; #ifdef CONFIG_IPV6_SEG6_HMAC if (srh && !seg6_hmac_validate_skb(skb)) return false; #endif if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0) return false; if (!pskb_pull(skb, off)) return false; skb_postpull_rcsum(skb, skb_network_header(skb), off); skb_reset_network_header(skb); skb_reset_transport_header(skb); if (iptunnel_pull_offloads(skb)) return false; return true; } static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr) { struct in6_addr *addr; srh->segments_left--; addr = srh->segments + srh->segments_left; *daddr = *addr; } static int seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, u32 tbl_id, bool local_delivery) { struct net *net = dev_net(skb->dev); struct ipv6hdr *hdr = ipv6_hdr(skb); int flags = RT6_LOOKUP_F_HAS_SADDR; struct dst_entry *dst = NULL; struct rt6_info *rt; struct flowi6 fl6; int dev_flags = 0; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_iif = skb->dev->ifindex; fl6.daddr = nhaddr ? *nhaddr : hdr->daddr; fl6.saddr = hdr->saddr; fl6.flowlabel = ip6_flowinfo(hdr); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = hdr->nexthdr; if (nhaddr) fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; if (!tbl_id) { dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags); } else { struct fib6_table *table; table = fib6_get_table(net, tbl_id); if (!table) goto out; rt = ip6_pol_route(net, table, 0, &fl6, skb, flags); dst = &rt->dst; } /* we want to discard traffic destined for local packet processing, * if @local_delivery is set to false. */ if (!local_delivery) dev_flags |= IFF_LOOPBACK; if (dst && (dst->dev->flags & dev_flags) && !dst->error) { dst_release(dst); dst = NULL; } out: if (!dst) { rt = net->ipv6.ip6_blk_hole_entry; dst = &rt->dst; dst_hold(dst); } skb_dst_drop(skb); skb_dst_set(skb, dst); return dst->error; } int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, u32 tbl_id) { return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false); } static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo) { return finfo->lcblock_bits >> 3; } static __u8 seg6_flv_lcnode_func_octects(const struct seg6_flavors_info *finfo) { return finfo->lcnode_func_bits >> 3; } static bool seg6_next_csid_is_arg_zero(const struct in6_addr *addr, const struct seg6_flavors_info *finfo) { __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo); __u8 blk_octects = seg6_flv_lcblock_octects(finfo); __u8 arg_octects; int i; arg_octects = 16 - blk_octects - fnc_octects; for (i = 0; i < arg_octects; ++i) { if (addr->s6_addr[blk_octects + fnc_octects + i] != 0x00) return false; } return true; } /* assume that DA.Argument length > 0 */ static void seg6_next_csid_advance_arg(struct in6_addr *addr, const struct seg6_flavors_info *finfo) { __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo); __u8 blk_octects = seg6_flv_lcblock_octects(finfo); /* advance DA.Argument */ memmove(&addr->s6_addr[blk_octects], &addr->s6_addr[blk_octects + fnc_octects], 16 - blk_octects - fnc_octects); memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects); } static int input_action_end_finish(struct sk_buff *skb, struct seg6_local_lwt *slwt) { seg6_lookup_nexthop(skb, NULL, 0); return dst_input(skb); } static int input_action_end_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) { struct ipv6_sr_hdr *srh; srh = get_and_validate_srh(skb); if (!srh) goto drop; advance_nextseg(srh, &ipv6_hdr(skb)->daddr); return input_action_end_finish(skb, slwt); drop: kfree_skb(skb); return -EINVAL; } static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) { const struct seg6_flavors_info *finfo = &slwt->flv_info; struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; if (seg6_next_csid_is_arg_zero(daddr, finfo)) return input_action_end_core(skb, slwt); /* update DA */ seg6_next_csid_advance_arg(daddr, finfo); return input_action_end_finish(skb, slwt); } static int input_action_end_x_finish(struct sk_buff *skb, struct seg6_local_lwt *slwt) { seg6_lookup_nexthop(skb, &slwt->nh6, 0); return dst_input(skb); } static int input_action_end_x_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) { struct ipv6_sr_hdr *srh; srh = get_and_validate_srh(skb); if (!srh) goto drop; advance_nextseg(srh, &ipv6_hdr(skb)->daddr); return input_action_end_x_finish(skb, slwt); drop: kfree_skb(skb); return -EINVAL; } static int end_x_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) { const struct seg6_flavors_info *finfo = &slwt->flv_info; struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; if (seg6_next_csid_is_arg_zero(daddr, finfo)) return input_action_end_x_core(skb, slwt); /* update DA */ seg6_next_csid_advance_arg(daddr, finfo); return input_action_end_x_finish(skb, slwt); } static bool seg6_next_csid_enabled(__u32 fops) { return fops & SEG6_F_LOCAL_FLV_NEXT_CSID; } /* Processing of SRv6 End, End.X, and End.T behaviors can be extended through * the flavors framework. These behaviors must report the subset of (flavor) * operations they currently implement. In this way, if a user specifies a * flavor combination that is not supported by a given End* behavior, the * kernel refuses to instantiate the tunnel reporting the error. */ static int seg6_flv_supp_ops_by_action(int action, __u32 *fops) { switch (action) { case SEG6_LOCAL_ACTION_END: *fops = SEG6_LOCAL_END_FLV_SUPP_OPS; break; case SEG6_LOCAL_ACTION_END_X: *fops = SEG6_LOCAL_END_X_FLV_SUPP_OPS; break; default: return -EOPNOTSUPP; } return 0; } /* We describe the packet state in relation to the absence/presence of the SRH * and the Segment Left (SL) field. * For our purposes, it is not necessary to record the exact value of the SL * when the SID List consists of two or more segments. */ enum seg6_local_pktinfo { /* the order really matters! */ SEG6_LOCAL_PKTINFO_NOHDR = 0, SEG6_LOCAL_PKTINFO_SL_ZERO, SEG6_LOCAL_PKTINFO_SL_ONE, SEG6_LOCAL_PKTINFO_SL_MORE, __SEG6_LOCAL_PKTINFO_MAX, }; #define SEG6_LOCAL_PKTINFO_MAX (__SEG6_LOCAL_PKTINFO_MAX - 1) static enum seg6_local_pktinfo seg6_get_srh_pktinfo(struct ipv6_sr_hdr *srh) { __u8 sgl; if (!srh) return SEG6_LOCAL_PKTINFO_NOHDR; sgl = srh->segments_left; if (sgl < 2) return SEG6_LOCAL_PKTINFO_SL_ZERO + sgl; return SEG6_LOCAL_PKTINFO_SL_MORE; } enum seg6_local_flv_action { SEG6_LOCAL_FLV_ACT_UNSPEC = 0, SEG6_LOCAL_FLV_ACT_END, SEG6_LOCAL_FLV_ACT_PSP, SEG6_LOCAL_FLV_ACT_USP, SEG6_LOCAL_FLV_ACT_USD, __SEG6_LOCAL_FLV_ACT_MAX }; #define SEG6_LOCAL_FLV_ACT_MAX (__SEG6_LOCAL_FLV_ACT_MAX - 1) /* The action table for RFC8986 flavors (see the flv8986_act_tbl below) * contains the actions (i.e. processing operations) to be applied on packets * when flavors are configured for an End* behavior. * By combining the pkinfo data and from the flavors mask, the macro * computes the index used to access the elements (actions) stored in the * action table. The index is structured as follows: * * index * _______________/\________________ * / \ * +----------------+----------------+ * | pf | afm | * +----------------+----------------+ * ph-1 ... p1 p0 fk-1 ... f1 f0 * MSB LSB * * where: * - 'afm' (adjusted flavor mask) is the mask containing a combination of the * RFC8986 flavors currently supported. 'afm' corresponds to the @fm * argument of the macro whose value is righ-shifted by 1 bit. By doing so, * we discard the SEG6_LOCAL_FLV_OP_UNSPEC flag (bit 0 in @fm) which is * never used here; * - 'pf' encodes the packet info (pktinfo) regarding the presence/absence of * the SRH, SL = 0, etc. 'pf' is set with the value of @pf provided as * argument to the macro. */ #define flv8986_act_tbl_idx(pf, fm) \ ((((pf) << bits_per(SEG6_LOCAL_FLV8986_SUPP_OPS)) | \ ((fm) & SEG6_LOCAL_FLV8986_SUPP_OPS)) >> SEG6_LOCAL_FLV_OP_PSP) /* We compute the size of the action table by considering the RFC8986 flavors * actually supported by the kernel. In this way, the size is automatically * adjusted when new flavors are supported. */ #define FLV8986_ACT_TBL_SIZE \ roundup_pow_of_two(flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_MAX, \ SEG6_LOCAL_FLV8986_SUPP_OPS)) /* tbl_cfg(act, pf, fm) macro is used to easily configure the action * table; it accepts 3 arguments: * i) @act, the suffix from SEG6_LOCAL_FLV_ACT_{act} representing * the action that should be applied on the packet; * ii) @pf, the suffix from SEG6_LOCAL_PKTINFO_{pf} reporting the packet * info about the lack/presence of SRH, SRH with SL = 0, etc; * iii) @fm, the mask of flavors. */ #define tbl_cfg(act, pf, fm) \ [flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_##pf, \ (fm))] = SEG6_LOCAL_FLV_ACT_##act /* shorthand for improving readability */ #define F_PSP SEG6_F_LOCAL_FLV_PSP /* The table contains, for each combination of the pktinfo data and * flavors, the action that should be taken on a packet (e.g. * "standard" Endpoint processing, Penultimate Segment Pop, etc). * * By default, table entries not explicitly configured are initialized with the * SEG6_LOCAL_FLV_ACT_UNSPEC action, which generally has the effect of * discarding the processed packet. */ static const u8 flv8986_act_tbl[FLV8986_ACT_TBL_SIZE] = { /* PSP variant for packet where SRH with SL = 1 */ tbl_cfg(PSP, SL_ONE, F_PSP), /* End for packet where the SRH with SL > 1*/ tbl_cfg(END, SL_MORE, F_PSP), }; #undef F_PSP #undef tbl_cfg /* For each flavor defined in RFC8986 (or a combination of them) an action is * performed on the packet. The specific action depends on: * - info extracted from the packet (i.e. pktinfo data) regarding the * lack/presence of the SRH, and if the SRH is available, on the value of * Segment Left field; * - the mask of flavors configured for the specific SRv6 End* behavior. * * The function combines both the pkinfo and the flavors mask to evaluate the * corresponding action to be taken on the packet. */ static enum seg6_local_flv_action seg6_local_flv8986_act_lookup(enum seg6_local_pktinfo pinfo, __u32 flvmask) { unsigned long index; /* check if the provided mask of flavors is supported */ if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS)) return SEG6_LOCAL_FLV_ACT_UNSPEC; index = flv8986_act_tbl_idx(pinfo, flvmask); if (unlikely(index >= FLV8986_ACT_TBL_SIZE)) return SEG6_LOCAL_FLV_ACT_UNSPEC; return flv8986_act_tbl[index]; } /* skb->data must be aligned with skb->network_header */ static bool seg6_pop_srh(struct sk_buff *skb, int srhoff) { struct ipv6_sr_hdr *srh; struct ipv6hdr *iph; __u8 srh_nexthdr; int thoff = -1; int srhlen; int nhlen; if (unlikely(srhoff < sizeof(*iph) || !pskb_may_pull(skb, srhoff + sizeof(*srh)))) return false; srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srhlen = ipv6_optlen(srh); /* we are about to mangle the pkt, let's check if we can write on it */ if (unlikely(skb_ensure_writable(skb, srhoff + srhlen))) return false; /* skb_ensure_writable() may change skb pointers; evaluate srh again */ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); srh_nexthdr = srh->nexthdr; if (unlikely(!skb_transport_header_was_set(skb))) goto pull; nhlen = skb_network_header_len(skb); /* we have to deal with the transport header: it could be set before * the SRH, after the SRH, or within it (which is considered wrong, * however). */ if (likely(nhlen <= srhoff)) thoff = nhlen; else if (nhlen >= srhoff + srhlen) /* transport_header is set after the SRH */ thoff = nhlen - srhlen; else /* transport_header falls inside the SRH; hence, we can't * restore the transport_header pointer properly after * SRH removing operation. */ return false; pull: /* we need to pop the SRH: * 1) first of all, we pull out everything from IPv6 header up to SRH * (included) evaluating also the rcsum; * 2) we overwrite (and then remove) the SRH by properly moving the * IPv6 along with any extension header that precedes the SRH; * 3) At the end, we push back the pulled headers (except for SRH, * obviously). */ skb_pull_rcsum(skb, srhoff + srhlen); memmove(skb_network_header(skb) + srhlen, skb_network_header(skb), srhoff); skb_push(skb, srhoff); skb_reset_network_header(skb); skb_mac_header_rebuild(skb); if (likely(thoff >= 0)) skb_set_transport_header(skb, thoff); iph = ipv6_hdr(skb); if (iph->nexthdr == NEXTHDR_ROUTING) { iph->nexthdr = srh_nexthdr; } else { /* we must look for the extension header (EXTH, for short) that * immediately precedes the SRH we have just removed. * Then, we update the value of the EXTH nexthdr with the one * contained in the SRH nexthdr. */ unsigned int off = sizeof(*iph); struct ipv6_opt_hdr *hp, _hdr; __u8 nexthdr = iph->nexthdr; for (;;) { if (unlikely(!ipv6_ext_hdr(nexthdr) || nexthdr == NEXTHDR_NONE)) return false; hp = skb_header_pointer(skb, off, sizeof(_hdr), &_hdr); if (unlikely(!hp)) return false; if (hp->nexthdr == NEXTHDR_ROUTING) { hp->nexthdr = srh_nexthdr; break; } switch (nexthdr) { case NEXTHDR_FRAGMENT: fallthrough; case NEXTHDR_AUTH: /* we expect SRH before FRAG and AUTH */ return false; default: off += ipv6_optlen(hp); break; } nexthdr = hp->nexthdr; } } iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_postpush_rcsum(skb, iph, srhoff); return true; } /* process the packet on the basis of the RFC8986 flavors set for the given * SRv6 End behavior instance. */ static int end_flv8986_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) { const struct seg6_flavors_info *finfo = &slwt->flv_info; enum seg6_local_flv_action action; enum seg6_local_pktinfo pinfo; struct ipv6_sr_hdr *srh; __u32 flvmask; int srhoff; srh = seg6_get_srh(skb, 0); srhoff = srh ? ((unsigned char *)srh - skb->data) : 0; pinfo = seg6_get_srh_pktinfo(srh); #ifdef CONFIG_IPV6_SEG6_HMAC if (srh && !seg6_hmac_validate_skb(skb)) goto drop; #endif flvmask = finfo->flv_ops; if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS)) { pr_warn_once("seg6local: invalid RFC8986 flavors\n"); goto drop; } /* retrieve the action triggered by the combination of pktinfo data and * the flavors mask. */ action = seg6_local_flv8986_act_lookup(pinfo, flvmask); switch (action) { case SEG6_LOCAL_FLV_ACT_END: /* process the packet as the "standard" End behavior */ advance_nextseg(srh, &ipv6_hdr(skb)->daddr); break; case SEG6_LOCAL_FLV_ACT_PSP: advance_nextseg(srh, &ipv6_hdr(skb)->daddr); if (unlikely(!seg6_pop_srh(skb, srhoff))) goto drop; break; case SEG6_LOCAL_FLV_ACT_UNSPEC: fallthrough; default: /* by default, we drop the packet since we could not find a * suitable action. */ goto drop; } return input_action_end_finish(skb, slwt); drop: kfree_skb(skb); return -EINVAL; } /* regular endpoint function */ static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) { const struct seg6_flavors_info *finfo = &slwt->flv_info; __u32 fops = finfo->flv_ops; if (!fops) return input_action_end_core(skb, slwt); /* check for the presence of NEXT-C-SID since it applies first */ if (seg6_next_csid_enabled(fops)) return end_next_csid_core(skb, slwt); /* the specific processing function to be performed on the packet * depends on the combination of flavors defined in RFC8986 and some * information extracted from the packet, e.g. presence/absence of SRH, * Segment Left = 0, etc. */ return end_flv8986_core(skb, slwt); } /* regular endpoint, and forward to specified nexthop */ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt) { const struct seg6_flavors_info *finfo = &slwt->flv_info; __u32 fops = finfo->flv_ops; /* check for the presence of NEXT-C-SID since it applies first */ if (seg6_next_csid_enabled(fops)) return end_x_next_csid_core(skb, slwt); return input_action_end_x_core(skb, slwt); } static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt) { struct ipv6_sr_hdr *srh; srh = get_and_validate_srh(skb); if (!srh) goto drop; advance_nextseg(srh, &ipv6_hdr(skb)->daddr); seg6_lookup_nexthop(skb, NULL, slwt->table); return dst_input(skb); drop: kfree_skb(skb); return -EINVAL; } /* decapsulate and forward inner L2 frame on specified interface */ static int input_action_end_dx2(struct sk_buff *skb, struct seg6_local_lwt *slwt) { struct net *net = dev_net(skb->dev); struct net_device *odev; struct ethhdr *eth; if (!decap_and_validate(skb, IPPROTO_ETHERNET)) goto drop; if (!pskb_may_pull(skb, ETH_HLEN)) goto drop; skb_reset_mac_header(skb); eth = (struct ethhdr *)skb->data; /* To determine the frame's protocol, we assume it is 802.3. This avoids * a call to eth_type_trans(), which is not really relevant for our * use case. */ if (!eth_proto_is_802_3(eth->h_proto)) goto drop; odev = dev_get_by_index_rcu(net, slwt->oif); if (!odev) goto drop; /* As we accept Ethernet frames, make sure the egress device is of * the correct type. */ if (odev->type != ARPHRD_ETHER) goto drop; if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev)) goto drop; skb_orphan(skb); if (skb_warn_if_lro(skb)) goto drop; skb_forward_csum(skb); if (skb->len - ETH_HLEN > odev->mtu) goto drop; skb->dev = odev; skb->protocol = eth->h_proto; return dev_queue_xmit(skb); drop: kfree_skb(skb); return -EINVAL; } static int input_action_end_dx6_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct in6_addr *nhaddr = NULL; struct seg6_local_lwt *slwt; slwt = seg6_local_lwtunnel(orig_dst->lwtstate); /* The inner packet is not associated to any local interface, * so we do not call netif_rx(). * * If slwt->nh6 is set to ::, then lookup the nexthop for the * inner packet's DA. Otherwise, use the specified nexthop. */ if (!ipv6_addr_any(&slwt->nh6)) nhaddr = &slwt->nh6; seg6_lookup_nexthop(skb, nhaddr, 0); return dst_input(skb); } /* decapsulate and forward to specified nexthop */ static int input_action_end_dx6(struct sk_buff *skb, struct seg6_local_lwt *slwt) { /* this function accepts IPv6 encapsulated packets, with either * an SRH with SL=0, or no SRH. */ if (!decap_and_validate(skb, IPPROTO_IPV6)) goto drop; if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; skb_set_transport_header(skb, sizeof(struct ipv6hdr)); nf_reset_ct(skb); if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, input_action_end_dx6_finish); return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return -EINVAL; } static int input_action_end_dx4_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { struct dst_entry *orig_dst = skb_dst(skb); struct seg6_local_lwt *slwt; struct iphdr *iph; __be32 nhaddr; int err; slwt = seg6_local_lwtunnel(orig_dst->lwtstate); iph = ip_hdr(skb); nhaddr = slwt->nh4.s_addr ?: iph->daddr; skb_dst_drop(skb); err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); if (err) { kfree_skb(skb); return -EINVAL; } return dst_input(skb); } static int input_action_end_dx4(struct sk_buff *skb, struct seg6_local_lwt *slwt) { if (!decap_and_validate(skb, IPPROTO_IPIP)) goto drop; if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; skb->protocol = htons(ETH_P_IP); skb_set_transport_header(skb, sizeof(struct iphdr)); nf_reset_ct(skb); if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled)) return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, dev_net(skb->dev), NULL, skb, skb->dev, NULL, input_action_end_dx4_finish); return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb); drop: kfree_skb(skb); return -EINVAL; } #ifdef CONFIG_NET_L3_MASTER_DEV static struct net *fib6_config_get_net(const struct fib6_config *fib6_cfg) { const struct nl_info *nli = &fib6_cfg->fc_nlinfo; return nli->nl_net; } static int __seg6_end_dt_vrf_build(struct seg6_local_lwt *slwt, const void *cfg, u16 family, struct netlink_ext_ack *extack) { struct seg6_end_dt_info *info = &slwt->dt_info; int vrf_ifindex; struct net *net; net = fib6_config_get_net(cfg); /* note that vrf_table was already set by parse_nla_vrftable() */ vrf_ifindex = l3mdev_ifindex_lookup_by_table_id(L3MDEV_TYPE_VRF, net, info->vrf_table); if (vrf_ifindex < 0) { if (vrf_ifindex == -EPERM) { NL_SET_ERR_MSG(extack, "Strict mode for VRF is disabled"); } else if (vrf_ifindex == -ENODEV) { NL_SET_ERR_MSG(extack, "Table has no associated VRF device"); } else { pr_debug("seg6local: SRv6 End.DT* creation error=%d\n", vrf_ifindex); } return vrf_ifindex; } info->net = net; info->vrf_ifindex = vrf_ifindex; info->family = family; info->mode = DT_VRF_MODE; return 0; } /* The SRv6 End.DT4/DT6 behavior extracts the inner (IPv4/IPv6) packet and * routes the IPv4/IPv6 packet by looking at the configured routing table. * * In the SRv6 End.DT4/DT6 use case, we can receive traf |