Total coverage: 234728 (14%)of 1777101
99 99 18 18 18 18 18 22 22 1 2 19 7 21 2 18 18 4 4 18 18 22 18 18 8 18 18 77 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * * This is a replacement of the old ipt_recent module, which carried the * following copyright notice: * * Author: Stephen Frost <sfrost@snowman.net> * Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/init.h> #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/string.h> #include <linux/ctype.h> #include <linux/list.h> #include <linux/random.h> #include <linux/jhash.h> #include <linux/bitops.h> #include <linux/skbuff.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_recent.h> MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_recent"); MODULE_ALIAS("ip6t_recent"); static unsigned int ip_list_tot __read_mostly = 100; static unsigned int ip_list_hash_size __read_mostly; static unsigned int ip_list_perms __read_mostly = 0644; static unsigned int ip_list_uid __read_mostly; static unsigned int ip_list_gid __read_mostly; module_param(ip_list_tot, uint, 0400); module_param(ip_list_hash_size, uint, 0400); module_param(ip_list_perms, uint, 0400); module_param(ip_list_uid, uint, 0644); module_param(ip_list_gid, uint, 0644); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files"); MODULE_PARM_DESC(ip_list_uid, "default owner of /proc/net/xt_recent/* files"); MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* files"); /* retained for backwards compatibility */ static unsigned int ip_pkt_list_tot __read_mostly; module_param(ip_pkt_list_tot, uint, 0400); MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 65535)"); #define XT_RECENT_MAX_NSTAMPS 65536 struct recent_entry { struct list_head list; struct list_head lru_list; union nf_inet_addr addr; u_int16_t family; u_int8_t ttl; u_int16_t index; u_int16_t nstamps; unsigned long stamps[]; }; struct recent_table { struct list_head list; char name[XT_RECENT_NAME_LEN]; union nf_inet_addr mask; unsigned int refcnt; unsigned int entries; u_int16_t nstamps_max_mask; struct list_head lru_list; struct list_head iphash[]; }; struct recent_net { struct list_head tables; #ifdef CONFIG_PROC_FS struct proc_dir_entry *xt_recent; #endif }; static unsigned int recent_net_id __read_mostly; static inline struct recent_net *recent_pernet(struct net *net) { return net_generic(net, recent_net_id); } static DEFINE_SPINLOCK(recent_lock); static DEFINE_MUTEX(recent_mutex); #ifdef CONFIG_PROC_FS static const struct proc_ops recent_mt_proc_ops; #endif static u_int32_t hash_rnd __read_mostly; static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr) { return jhash_1word((__force u32)addr->ip, hash_rnd) & (ip_list_hash_size - 1); } static inline unsigned int recent_entry_hash6(const union nf_inet_addr *addr) { return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) & (ip_list_hash_size - 1); } static struct recent_entry * recent_entry_lookup(const struct recent_table *table, const union nf_inet_addr *addrp, u_int16_t family, u_int8_t ttl) { struct recent_entry *e; unsigned int h; if (family == NFPROTO_IPV4) h = recent_entry_hash4(addrp); else h = recent_entry_hash6(addrp); list_for_each_entry(e, &table->iphash[h], list) if (e->family == family && memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 && (ttl == e->ttl || ttl == 0 || e->ttl == 0)) return e; return NULL; } static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) { list_del(&e->list); list_del(&e->lru_list); kfree(e); t->entries--; } /* * Drop entries with timestamps older then 'time'. */ static void recent_entry_reap(struct recent_table *t, unsigned long time, struct recent_entry *working, bool update) { struct recent_entry *e; /* * The head of the LRU list is always the oldest entry. */ e = list_entry(t->lru_list.next, struct recent_entry, lru_list); /* * Do not reap the entry which are going to be updated. */ if (e == working && update) return; /* * The last time stamp is the most recent. */ if (time_after(time, e->stamps[e->index-1])) recent_entry_remove(t, e); } static struct recent_entry * recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, u_int16_t family, u_int8_t ttl) { struct recent_entry *e; unsigned int nstamps_max = t->nstamps_max_mask; if (t->entries >= ip_list_tot) { e = list_entry(t->lru_list.next, struct recent_entry, lru_list); recent_entry_remove(t, e); } nstamps_max += 1; e = kmalloc(struct_size(e, stamps, nstamps_max), GFP_ATOMIC); if (e == NULL) return NULL; memcpy(&e->addr, addr, sizeof(e->addr)); e->ttl = ttl; e->stamps[0] = jiffies; e->nstamps = 1; e->index = 1; e->family = family; if (family == NFPROTO_IPV4) list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]); else list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]); list_add_tail(&e->lru_list, &t->lru_list); t->entries++; return e; } static void recent_entry_update(struct recent_table *t, struct recent_entry *e) { e->index &= t->nstamps_max_mask; e->stamps[e->index++] = jiffies; if (e->index > e->nstamps) e->nstamps = e->index; list_move_tail(&e->lru_list, &t->lru_list); } static struct recent_table *recent_table_lookup(struct recent_net *recent_net, const char *name) { struct recent_table *t; list_for_each_entry(t, &recent_net->tables, list) if (!strcmp(t->name, name)) return t; return NULL; } static void recent_table_flush(struct recent_table *t) { struct recent_entry *e, *next; unsigned int i; for (i = 0; i < ip_list_hash_size; i++) list_for_each_entry_safe(e, next, &t->iphash[i], list) recent_entry_remove(t, e); } static bool recent_mt(const struct sk_buff *skb, struct xt_action_param *par) { struct net *net = xt_net(par); struct recent_net *recent_net = recent_pernet(net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; struct recent_entry *e; union nf_inet_addr addr = {}, addr_mask; u_int8_t ttl; bool ret = info->invert; if (xt_family(par) == NFPROTO_IPV4) { const struct iphdr *iph = ip_hdr(skb); if (info->side == XT_RECENT_DEST) addr.ip = iph->daddr; else addr.ip = iph->saddr; ttl = iph->ttl; } else { const struct ipv6hdr *iph = ipv6_hdr(skb); if (info->side == XT_RECENT_DEST) memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6)); else memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6)); ttl = iph->hop_limit; } /* use TTL as seen before forwarding */ if (xt_out(par) != NULL && (!skb->sk || !net_eq(net, sock_net(skb->sk)))) ttl++; spin_lock_bh(&recent_lock); t = recent_table_lookup(recent_net, info->name); nf_inet_addr_mask(&addr, &addr_mask, &t->mask); e = recent_entry_lookup(t, &addr_mask, xt_family(par), (info->check_set & XT_RECENT_TTL) ? ttl : 0); if (e == NULL) { if (!(info->check_set & XT_RECENT_SET)) goto out; e = recent_entry_init(t, &addr_mask, xt_family(par), ttl); if (e == NULL) par->hotdrop = true; ret = !ret; goto out; } if (info->check_set & XT_RECENT_SET) ret = !ret; else if (info->check_set & XT_RECENT_REMOVE) { recent_entry_remove(t, e); ret = !ret; } else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) { unsigned long time = jiffies - info->seconds * HZ; unsigned int i, hits = 0; for (i = 0; i < e->nstamps; i++) { if (info->seconds && time_after(time, e->stamps[i])) continue; if (!info->hit_count || ++hits >= info->hit_count) { ret = !ret; break; } } /* info->seconds must be non-zero */ if (info->check_set & XT_RECENT_REAP) recent_entry_reap(t, time, e, info->check_set & XT_RECENT_UPDATE && ret); } if (info->check_set & XT_RECENT_SET || (info->check_set & XT_RECENT_UPDATE && ret)) { recent_entry_update(t, e); e->ttl = ttl; } out: spin_unlock_bh(&recent_lock); return ret; } static void recent_table_free(void *addr) { kvfree(addr); } static int recent_mt_check(const struct xt_mtchk_param *par, const struct xt_recent_mtinfo_v1 *info) { struct recent_net *recent_net = recent_pernet(par->net); struct recent_table *t; #ifdef CONFIG_PROC_FS struct proc_dir_entry *pde; kuid_t uid; kgid_t gid; #endif unsigned int nstamp_mask; unsigned int i; int ret = -EINVAL; net_get_random_once(&hash_rnd, sizeof(hash_rnd)); if (info->check_set & ~XT_RECENT_VALID_FLAGS) { pr_info_ratelimited("Unsupported userspace flags (%08x)\n", info->check_set); return -EINVAL; } if (hweight8(info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE | XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1) return -EINVAL; if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) && (info->seconds || info->hit_count || (info->check_set & XT_RECENT_MODIFIERS))) return -EINVAL; if ((info->check_set & XT_RECENT_REAP) && !info->seconds) return -EINVAL; if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) { pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n", info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); return -EINVAL; } ret = xt_check_proc_name(info->name, sizeof(info->name)); if (ret) return ret; if (ip_pkt_list_tot && info->hit_count < ip_pkt_list_tot) nstamp_mask = roundup_pow_of_two(ip_pkt_list_tot) - 1; else if (info->hit_count) nstamp_mask = roundup_pow_of_two(info->hit_count) - 1; else nstamp_mask = 32 - 1; mutex_lock(&recent_mutex); t = recent_table_lookup(recent_net, info->name); if (t != NULL) { if (nstamp_mask > t->nstamps_max_mask) { spin_lock_bh(&recent_lock); recent_table_flush(t); t->nstamps_max_mask = nstamp_mask; spin_unlock_bh(&recent_lock); } t->refcnt++; ret = 0; goto out; } t = kvzalloc(struct_size(t, iphash, ip_list_hash_size), GFP_KERNEL); if (t == NULL) { ret = -ENOMEM; goto out; } t->refcnt = 1; t->nstamps_max_mask = nstamp_mask; memcpy(&t->mask, &info->mask, sizeof(t->mask)); strcpy(t->name, info->name); INIT_LIST_HEAD(&t->lru_list); for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS uid = make_kuid(&init_user_ns, ip_list_uid); gid = make_kgid(&init_user_ns, ip_list_gid); if (!uid_valid(uid) || !gid_valid(gid)) { recent_table_free(t); ret = -EINVAL; goto out; } pde = proc_create_data(t->name, ip_list_perms, recent_net->xt_recent, &recent_mt_proc_ops, t); if (pde == NULL) { recent_table_free(t); ret = -ENOMEM; goto out; } proc_set_user(pde, uid, gid); #endif spin_lock_bh(&recent_lock); list_add_tail(&t->list, &recent_net->tables); spin_unlock_bh(&recent_lock); ret = 0; out: mutex_unlock(&recent_mutex); return ret; } static int recent_mt_check_v0(const struct xt_mtchk_param *par) { const struct xt_recent_mtinfo_v0 *info_v0 = par->matchinfo; struct xt_recent_mtinfo_v1 info_v1; /* Copy revision 0 structure to revision 1 */ memcpy(&info_v1, info_v0, sizeof(struct xt_recent_mtinfo)); /* Set default mask to ensure backward compatible behaviour */ memset(info_v1.mask.all, 0xFF, sizeof(info_v1.mask.all)); return recent_mt_check(par, &info_v1); } static int recent_mt_check_v1(const struct xt_mtchk_param *par) { return recent_mt_check(par, par->matchinfo); } static void recent_mt_destroy(const struct xt_mtdtor_param *par) { struct recent_net *recent_net = recent_pernet(par->net); const struct xt_recent_mtinfo_v1 *info = par->matchinfo; struct recent_table *t; mutex_lock(&recent_mutex); t = recent_table_lookup(recent_net, info->name); if (--t->refcnt == 0) { spin_lock_bh(&recent_lock); list_del(&t->list); spin_unlock_bh(&recent_lock); #ifdef CONFIG_PROC_FS if (recent_net->xt_recent != NULL) remove_proc_entry(t->name, recent_net->xt_recent); #endif recent_table_flush(t); recent_table_free(t); } mutex_unlock(&recent_mutex); } #ifdef CONFIG_PROC_FS struct recent_iter_state { const struct recent_table *table; unsigned int bucket; }; static void *recent_seq_start(struct seq_file *seq, loff_t *pos) __acquires(recent_lock) { struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; struct recent_entry *e; loff_t p = *pos; spin_lock_bh(&recent_lock); for (st->bucket = 0; st->bucket < ip_list_hash_size; st->bucket++) list_for_each_entry(e, &t->iphash[st->bucket], list) if (p-- == 0) return e; return NULL; } static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; const struct recent_entry *e = v; const struct list_head *head = e->list.next; (*pos)++; while (head == &t->iphash[st->bucket]) { if (++st->bucket >= ip_list_hash_size) return NULL; head = t->iphash[st->bucket].next; } return list_entry(head, struct recent_entry, list); } static void recent_seq_stop(struct seq_file *s, void *v) __releases(recent_lock) { spin_unlock_bh(&recent_lock); } static int recent_seq_show(struct seq_file *seq, void *v) { const struct recent_entry *e = v; struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; unsigned int i; i = (e->index - 1) & t->nstamps_max_mask; if (e->family == NFPROTO_IPV4) seq_printf(seq, "src=%pI4 ttl: %u last_seen: %lu oldest_pkt: %u", &e->addr.ip, e->ttl, e->stamps[i], e->index); else seq_printf(seq, "src=%pI6 ttl: %u last_seen: %lu oldest_pkt: %u", &e->addr.in6, e->ttl, e->stamps[i], e->index); for (i = 0; i < e->nstamps; i++) seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); seq_putc(seq, '\n'); return 0; } static const struct seq_operations recent_seq_ops = { .start = recent_seq_start, .next = recent_seq_next, .stop = recent_seq_stop, .show = recent_seq_show, }; static int recent_seq_open(struct inode *inode, struct file *file) { struct recent_iter_state *st; st = __seq_open_private(file, &recent_seq_ops, sizeof(*st)); if (st == NULL) return -ENOMEM; st->table = pde_data(inode); return 0; } static ssize_t recent_mt_proc_write(struct file *file, const char __user *input, size_t size, loff_t *loff) { struct recent_table *t = pde_data(file_inode(file)); struct recent_entry *e; char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:255.255.255.255")]; const char *c = buf; union nf_inet_addr addr = {}; u_int16_t family; bool add, succ; if (size == 0) return 0; if (size > sizeof(buf)) size = sizeof(buf); if (copy_from_user(buf, input, size) != 0) return -EFAULT; /* Strict protocol! */ if (*loff != 0) return -ESPIPE; switch (*c) { case '/': /* flush table */ spin_lock_bh(&recent_lock); recent_table_flush(t); spin_unlock_bh(&recent_lock); return size; case '-': /* remove address */ add = false; break; case '+': /* add address */ add = true; break; default: pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n"); return -EINVAL; } ++c; --size; if (strnchr(c, size, ':') != NULL) { family = NFPROTO_IPV6; succ = in6_pton(c, size, (void *)&addr, '\n', NULL); } else { family = NFPROTO_IPV4; succ = in4_pton(c, size, (void *)&addr, '\n', NULL); } if (!succ) return -EINVAL; spin_lock_bh(&recent_lock); e = recent_entry_lookup(t, &addr, family, 0); if (e == NULL) { if (add) recent_entry_init(t, &addr, family, 0); } else { if (add) recent_entry_update(t, e); else recent_entry_remove(t, e); } spin_unlock_bh(&recent_lock); /* Note we removed one above */ *loff += size + 1; return size + 1; } static const struct proc_ops recent_mt_proc_ops = { .proc_open = recent_seq_open, .proc_read = seq_read, .proc_write = recent_mt_proc_write, .proc_release = seq_release_private, .proc_lseek = seq_lseek, }; static int __net_init recent_proc_net_init(struct net *net) { struct recent_net *recent_net = recent_pernet(net); recent_net->xt_recent = proc_mkdir("xt_recent", net->proc_net); if (!recent_net->xt_recent) return -ENOMEM; return 0; } static void __net_exit recent_proc_net_exit(struct net *net) { struct recent_net *recent_net = recent_pernet(net); struct recent_table *t; /* recent_net_exit() is called before recent_mt_destroy(). Make sure * that the parent xt_recent proc entry is empty before trying to * remove it. */ spin_lock_bh(&recent_lock); list_for_each_entry(t, &recent_net->tables, list) remove_proc_entry(t->name, recent_net->xt_recent); recent_net->xt_recent = NULL; spin_unlock_bh(&recent_lock); remove_proc_entry("xt_recent", net->proc_net); } #else static inline int recent_proc_net_init(struct net *net) { return 0; } static inline void recent_proc_net_exit(struct net *net) { } #endif /* CONFIG_PROC_FS */ static int __net_init recent_net_init(struct net *net) { struct recent_net *recent_net = recent_pernet(net); INIT_LIST_HEAD(&recent_net->tables); return recent_proc_net_init(net); } static void __net_exit recent_net_exit(struct net *net) { recent_proc_net_exit(net); } static struct pernet_operations recent_net_ops = { .init = recent_net_init, .exit = recent_net_exit, .id = &recent_net_id, .size = sizeof(struct recent_net), }; static struct xt_match recent_mt_reg[] __read_mostly = { { .name = "recent", .revision = 0, .family = NFPROTO_IPV4, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo), .checkentry = recent_mt_check_v0, .destroy = recent_mt_destroy, .me = THIS_MODULE, }, { .name = "recent", .revision = 0, .family = NFPROTO_IPV6, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo), .checkentry = recent_mt_check_v0, .destroy = recent_mt_destroy, .me = THIS_MODULE, }, { .name = "recent", .revision = 1, .family = NFPROTO_IPV4, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo_v1), .checkentry = recent_mt_check_v1, .destroy = recent_mt_destroy, .me = THIS_MODULE, }, { .name = "recent", .revision = 1, .family = NFPROTO_IPV6, .match = recent_mt, .matchsize = sizeof(struct xt_recent_mtinfo_v1), .checkentry = recent_mt_check_v1, .destroy = recent_mt_destroy, .me = THIS_MODULE, } }; static int __init recent_mt_init(void) { int err; BUILD_BUG_ON_NOT_POWER_OF_2(XT_RECENT_MAX_NSTAMPS); if (!ip_list_tot || ip_pkt_list_tot >= XT_RECENT_MAX_NSTAMPS) return -EINVAL; ip_list_hash_size = 1 << fls(ip_list_tot); err = register_pernet_subsys(&recent_net_ops); if (err) return err; err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); if (err) unregister_pernet_subsys(&recent_net_ops); return err; } static void __exit recent_mt_exit(void) { xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); unregister_pernet_subsys(&recent_net_ops); } module_init(recent_mt_init); module_exit(recent_mt_exit);
18 18 18 18 18 18 14 12 146 146 146 139 146 139 76 1 1 74 12 153 32 125 18 18 18 18 18 18 18 18 18 18 18 18 9 9 18 18 19 16 16 16 3 16 16 16 16 16 1 16 16 16 16 16 314 313 1 294 301 18 4 3 4 6 6 64 130 92 194 26 13 207 207 207 52 156 111 24 22 26 24 208 18 19 170 3 19 17 207 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 /* SPDX-License-Identifier: GPL-2.0-only */ /* Copyright (C) 2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ #ifndef _IP_SET_HASH_GEN_H #define _IP_SET_HASH_GEN_H #include <linux/rcupdate.h> #include <linux/rcupdate_wait.h> #include <linux/jhash.h> #include <linux/types.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/ipset/ip_set.h> #define __ipset_dereference(p) \ rcu_dereference_protected(p, 1) #define ipset_dereference_nfnl(p) \ rcu_dereference_protected(p, \ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) #define ipset_dereference_set(p, set) \ rcu_dereference_protected(p, \ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET) || \ lockdep_is_held(&(set)->lock)) #define ipset_dereference_bh_nfnl(p) \ rcu_dereference_bh_check(p, \ lockdep_nfnl_is_held(NFNL_SUBSYS_IPSET)) /* Hashing which uses arrays to resolve clashing. The hash table is resized * (doubled) when searching becomes too long. * Internally jhash is used with the assumption that the size of the * stored data is a multiple of sizeof(u32). * * Readers and resizing * * Resizing can be triggered by userspace command only, and those * are serialized by the nfnl mutex. During resizing the set is * read-locked, so the only possible concurrent operations are * the kernel side readers. Those must be protected by proper RCU locking. */ /* Number of elements to store in an initial array block */ #define AHASH_INIT_SIZE 2 /* Max number of elements to store in an array block */ #define AHASH_MAX_SIZE (6 * AHASH_INIT_SIZE) /* Max muber of elements in the array block when tuned */ #define AHASH_MAX_TUNED 64 #define AHASH_MAX(h) ((h)->bucketsize) /* A hash bucket */ struct hbucket { struct rcu_head rcu; /* for call_rcu */ /* Which positions are used in the array */ DECLARE_BITMAP(used, AHASH_MAX_TUNED); u8 size; /* size of the array */ u8 pos; /* position of the first free entry */ unsigned char value[] /* the array of the values */ __aligned(__alignof__(u64)); }; /* Region size for locking == 2^HTABLE_REGION_BITS */ #define HTABLE_REGION_BITS 10 #define ahash_numof_locks(htable_bits) \ ((htable_bits) < HTABLE_REGION_BITS ? 1 \ : jhash_size((htable_bits) - HTABLE_REGION_BITS)) #define ahash_sizeof_regions(htable_bits) \ (ahash_numof_locks(htable_bits) * sizeof(struct ip_set_region)) #define ahash_region(n, htable_bits) \ ((n) % ahash_numof_locks(htable_bits)) #define ahash_bucket_start(h, htable_bits) \ ((htable_bits) < HTABLE_REGION_BITS ? 0 \ : (h) * jhash_size(HTABLE_REGION_BITS)) #define ahash_bucket_end(h, htable_bits) \ ((htable_bits) < HTABLE_REGION_BITS ? jhash_size(htable_bits) \ : ((h) + 1) * jhash_size(HTABLE_REGION_BITS)) struct htable_gc { struct delayed_work dwork; struct ip_set *set; /* Set the gc belongs to */ u32 region; /* Last gc run position */ }; /* The hash table: the table size stored here in order to make resizing easy */ struct htable { atomic_t ref; /* References for resizing */ atomic_t uref; /* References for dumping and gc */ u8 htable_bits; /* size of hash table == 2^htable_bits */ u32 maxelem; /* Maxelem per region */ struct ip_set_region *hregion; /* Region locks and ext sizes */ struct hbucket __rcu *bucket[]; /* hashtable buckets */ }; #define hbucket(h, i) ((h)->bucket[i]) #define ext_size(n, dsize) \ (sizeof(struct hbucket) + (n) * (dsize)) #ifndef IPSET_NET_COUNT #define IPSET_NET_COUNT 1 #endif /* Book-keeping of the prefixes added to the set */ struct net_prefixes { u32 nets[IPSET_NET_COUNT]; /* number of elements for this cidr */ u8 cidr[IPSET_NET_COUNT]; /* the cidr value */ }; /* Compute the hash table size */ static size_t htable_size(u8 hbits) { size_t hsize; /* We must fit both into u32 in jhash and INT_MAX in kvmalloc_node() */ if (hbits > 31) return 0; hsize = jhash_size(hbits); if ((INT_MAX - sizeof(struct htable)) / sizeof(struct hbucket *) < hsize) return 0; return hsize * sizeof(struct hbucket *) + sizeof(struct htable); } #ifdef IP_SET_HASH_WITH_NETS #if IPSET_NET_COUNT > 1 #define __CIDR(cidr, i) (cidr[i]) #else #define __CIDR(cidr, i) (cidr) #endif /* cidr + 1 is stored in net_prefixes to support /0 */ #define NCIDR_PUT(cidr) ((cidr) + 1) #define NCIDR_GET(cidr) ((cidr) - 1) #ifdef IP_SET_HASH_WITH_NETS_PACKED /* When cidr is packed with nomatch, cidr - 1 is stored in the data entry */ #define DCIDR_PUT(cidr) ((cidr) - 1) #define DCIDR_GET(cidr, i) (__CIDR(cidr, i) + 1) #else #define DCIDR_PUT(cidr) (cidr) #define DCIDR_GET(cidr, i) __CIDR(cidr, i) #endif #define INIT_CIDR(cidr, host_mask) \ DCIDR_PUT(((cidr) ? NCIDR_GET(cidr) : host_mask)) #ifdef IP_SET_HASH_WITH_NET0 /* cidr from 0 to HOST_MASK value and c = cidr + 1 */ #define NLEN (HOST_MASK + 1) #define CIDR_POS(c) ((c) - 1) #else /* cidr from 1 to HOST_MASK value and c = cidr + 1 */ #define NLEN HOST_MASK #define CIDR_POS(c) ((c) - 2) #endif #else #define NLEN 0 #endif /* IP_SET_HASH_WITH_NETS */ #define SET_ELEM_EXPIRED(set, d) \ (SET_WITH_TIMEOUT(set) && \ ip_set_timeout_expired(ext_timeout(d, set))) #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) static const union nf_inet_addr onesmask = { .all[0] = 0xffffffff, .all[1] = 0xffffffff, .all[2] = 0xffffffff, .all[3] = 0xffffffff }; static const union nf_inet_addr zeromask = {}; #endif #endif /* _IP_SET_HASH_GEN_H */ #ifndef MTYPE #error "MTYPE is not defined!" #endif #ifndef HTYPE #error "HTYPE is not defined!" #endif #ifndef HOST_MASK #error "HOST_MASK is not defined!" #endif /* Family dependent templates */ #undef ahash_data #undef mtype_data_equal #undef mtype_do_data_match #undef mtype_data_set_flags #undef mtype_data_reset_elem #undef mtype_data_reset_flags #undef mtype_data_netmask #undef mtype_data_list #undef mtype_data_next #undef mtype_elem #undef mtype_ahash_destroy #undef mtype_ext_cleanup #undef mtype_add_cidr #undef mtype_del_cidr #undef mtype_ahash_memsize #undef mtype_flush #undef mtype_destroy #undef mtype_same_set #undef mtype_kadt #undef mtype_uadt #undef mtype_add #undef mtype_del #undef mtype_test_cidrs #undef mtype_test #undef mtype_uref #undef mtype_resize #undef mtype_ext_size #undef mtype_resize_ad #undef mtype_head #undef mtype_list #undef mtype_gc_do #undef mtype_gc #undef mtype_gc_init #undef mtype_cancel_gc #undef mtype_variant #undef mtype_data_match #undef htype #undef HKEY #define mtype_data_equal IPSET_TOKEN(MTYPE, _data_equal) #ifdef IP_SET_HASH_WITH_NETS #define mtype_do_data_match IPSET_TOKEN(MTYPE, _do_data_match) #else #define mtype_do_data_match(d) 1 #endif #define mtype_data_set_flags IPSET_TOKEN(MTYPE, _data_set_flags) #define mtype_data_reset_elem IPSET_TOKEN(MTYPE, _data_reset_elem) #define mtype_data_reset_flags IPSET_TOKEN(MTYPE, _data_reset_flags) #define mtype_data_netmask IPSET_TOKEN(MTYPE, _data_netmask) #define mtype_data_list IPSET_TOKEN(MTYPE, _data_list) #define mtype_data_next IPSET_TOKEN(MTYPE, _data_next) #define mtype_elem IPSET_TOKEN(MTYPE, _elem) #define mtype_ahash_destroy IPSET_TOKEN(MTYPE, _ahash_destroy) #define mtype_ext_cleanup IPSET_TOKEN(MTYPE, _ext_cleanup) #define mtype_add_cidr IPSET_TOKEN(MTYPE, _add_cidr) #define mtype_del_cidr IPSET_TOKEN(MTYPE, _del_cidr) #define mtype_ahash_memsize IPSET_TOKEN(MTYPE, _ahash_memsize) #define mtype_flush IPSET_TOKEN(MTYPE, _flush) #define mtype_destroy IPSET_TOKEN(MTYPE, _destroy) #define mtype_same_set IPSET_TOKEN(MTYPE, _same_set) #define mtype_kadt IPSET_TOKEN(MTYPE, _kadt) #define mtype_uadt IPSET_TOKEN(MTYPE, _uadt) #define mtype_add IPSET_TOKEN(MTYPE, _add) #define mtype_del IPSET_TOKEN(MTYPE, _del) #define mtype_test_cidrs IPSET_TOKEN(MTYPE, _test_cidrs) #define mtype_test IPSET_TOKEN(MTYPE, _test) #define mtype_uref IPSET_TOKEN(MTYPE, _uref) #define mtype_resize IPSET_TOKEN(MTYPE, _resize) #define mtype_ext_size IPSET_TOKEN(MTYPE, _ext_size) #define mtype_resize_ad IPSET_TOKEN(MTYPE, _resize_ad) #define mtype_head IPSET_TOKEN(MTYPE, _head) #define mtype_list IPSET_TOKEN(MTYPE, _list) #define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do) #define mtype_gc IPSET_TOKEN(MTYPE, _gc) #define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init) #define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc) #define mtype_variant IPSET_TOKEN(MTYPE, _variant) #define mtype_data_match IPSET_TOKEN(MTYPE, _data_match) #ifndef HKEY_DATALEN #define HKEY_DATALEN sizeof(struct mtype_elem) #endif #define htype MTYPE #define HKEY(data, initval, htable_bits) \ ({ \ const u32 *__k = (const u32 *)data; \ u32 __l = HKEY_DATALEN / sizeof(u32); \ \ BUILD_BUG_ON(HKEY_DATALEN % sizeof(u32) != 0); \ \ jhash2(__k, __l, initval) & jhash_mask(htable_bits); \ }) /* The generic hash structure */ struct htype { struct htable __rcu *table; /* the hash table */ struct htable_gc gc; /* gc workqueue */ u32 maxelem; /* max elements in the hash */ u32 initval; /* random jhash init value */ #ifdef IP_SET_HASH_WITH_MARKMASK u32 markmask; /* markmask value for mark mask to store */ #endif u8 bucketsize; /* max elements in an array block */ #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) u8 netmask; /* netmask value for subnets to store */ union nf_inet_addr bitmask; /* stores bitmask */ #endif struct list_head ad; /* Resize add|del backlist */ struct mtype_elem next; /* temporary storage for uadd */ #ifdef IP_SET_HASH_WITH_NETS struct net_prefixes nets[NLEN]; /* book-keeping of prefixes */ #endif }; /* ADD|DEL entries saved during resize */ struct mtype_resize_ad { struct list_head list; enum ipset_adt ad; /* ADD|DEL element */ struct mtype_elem d; /* Element value */ struct ip_set_ext ext; /* Extensions for ADD */ struct ip_set_ext mext; /* Target extensions for ADD */ u32 flags; /* Flags for ADD */ }; #ifdef IP_SET_HASH_WITH_NETS /* Network cidr size book keeping when the hash stores different * sized networks. cidr == real cidr + 1 to support /0. */ static void mtype_add_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) { int i, j; spin_lock_bh(&set->lock); /* Add in increasing prefix order, so larger cidr first */ for (i = 0, j = -1; i < NLEN && h->nets[i].cidr[n]; i++) { if (j != -1) { continue; } else if (h->nets[i].cidr[n] < cidr) { j = i; } else if (h->nets[i].cidr[n] == cidr) { h->nets[CIDR_POS(cidr)].nets[n]++; goto unlock; } } if (j != -1) { for (; i > j; i--) h->nets[i].cidr[n] = h->nets[i - 1].cidr[n]; } h->nets[i].cidr[n] = cidr; h->nets[CIDR_POS(cidr)].nets[n] = 1; unlock: spin_unlock_bh(&set->lock); } static void mtype_del_cidr(struct ip_set *set, struct htype *h, u8 cidr, u8 n) { u8 i, j, net_end = NLEN - 1; spin_lock_bh(&set->lock); for (i = 0; i < NLEN; i++) { if (h->nets[i].cidr[n] != cidr) continue; h->nets[CIDR_POS(cidr)].nets[n]--; if (h->nets[CIDR_POS(cidr)].nets[n] > 0) goto unlock; for (j = i; j < net_end && h->nets[j].cidr[n]; j++) h->nets[j].cidr[n] = h->nets[j + 1].cidr[n]; h->nets[j].cidr[n] = 0; goto unlock; } unlock: spin_unlock_bh(&set->lock); } #endif /* Calculate the actual memory size of the set data */ static size_t mtype_ahash_memsize(const struct htype *h, const struct htable *t) { return sizeof(*h) + sizeof(*t) + ahash_sizeof_regions(t->htable_bits); } /* Get the ith element from the array block n */ #define ahash_data(n, i, dsize) \ ((struct mtype_elem *)((n)->value + ((i) * (dsize)))) static void mtype_ext_cleanup(struct ip_set *set, struct hbucket *n) { int i; for (i = 0; i < n->pos; i++) if (test_bit(i, n->used)) ip_set_ext_destroy(set, ahash_data(n, i, set->dsize)); } /* Flush a hash type of set: destroy all elements */ static void mtype_flush(struct ip_set *set) { struct htype *h = set->data; struct htable *t; struct hbucket *n; u32 r, i; t = ipset_dereference_nfnl(h->table); for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { spin_lock_bh(&t->hregion[r].lock); for (i = ahash_bucket_start(r, t->htable_bits); i < ahash_bucket_end(r, t->htable_bits); i++) { n = __ipset_dereference(hbucket(t, i)); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY) mtype_ext_cleanup(set, n); /* FIXME: use slab cache */ rcu_assign_pointer(hbucket(t, i), NULL); kfree_rcu(n, rcu); } t->hregion[r].ext_size = 0; t->hregion[r].elements = 0; spin_unlock_bh(&t->hregion[r].lock); } #ifdef IP_SET_HASH_WITH_NETS memset(h->nets, 0, sizeof(h->nets)); #endif } /* Destroy the hashtable part of the set */ static void mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy) { struct hbucket *n; u32 i; for (i = 0; i < jhash_size(t->htable_bits); i++) { n = (__force struct hbucket *)hbucket(t, i); if (!n) continue; if (set->extensions & IPSET_EXT_DESTROY && ext_destroy) mtype_ext_cleanup(set, n); /* FIXME: use slab cache */ kfree(n); } ip_set_free(t->hregion); ip_set_free(t); } /* Destroy a hash type of set */ static void mtype_destroy(struct ip_set *set) { struct htype *h = set->data; struct list_head *l, *lt; mtype_ahash_destroy(set, (__force struct htable *)h->table, true); list_for_each_safe(l, lt, &h->ad) { list_del(l); kfree(l); } kfree(h); set->data = NULL; } static bool mtype_same_set(const struct ip_set *a, const struct ip_set *b) { const struct htype *x = a->data; const struct htype *y = b->data; /* Resizing changes htable_bits, so we ignore it */ return x->maxelem == y->maxelem && a->timeout == b->timeout && #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) nf_inet_addr_cmp(&x->bitmask, &y->bitmask) && #endif #ifdef IP_SET_HASH_WITH_MARKMASK x->markmask == y->markmask && #endif a->extensions == b->extensions; } static void mtype_gc_do(struct ip_set *set, struct htype *h, struct htable *t, u32 r) { struct hbucket *n, *tmp; struct mtype_elem *data; u32 i, j, d; size_t dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 k; #endif u8 htable_bits = t->htable_bits; spin_lock_bh(&t->hregion[r].lock); for (i = ahash_bucket_start(r, htable_bits); i < ahash_bucket_end(r, htable_bits); i++) { n = __ipset_dereference(hbucket(t, i)); if (!n) continue; for (j = 0, d = 0; j < n->pos; j++) { if (!test_bit(j, n->used)) { d++; continue; } data = ahash_data(n, j, dsize); if (!ip_set_timeout_expired(ext_timeout(data, set))) continue; pr_debug("expired %u/%u\n", i, j); clear_bit(j, n->used); smp_mb__after_atomic(); #ifdef IP_SET_HASH_WITH_NETS for (k = 0; k < IPSET_NET_COUNT; k++) mtype_del_cidr(set, h, NCIDR_PUT(DCIDR_GET(data->cidr, k)), k); #endif t->hregion[r].elements--; ip_set_ext_destroy(set, data); d++; } if (d >= AHASH_INIT_SIZE) { if (d >= n->size) { t->hregion[r].ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, i), NULL); kfree_rcu(n, rcu); continue; } tmp = kzalloc(sizeof(*tmp) + (n->size - AHASH_INIT_SIZE) * dsize, GFP_ATOMIC); if (!tmp) /* Still try to delete expired elements. */ continue; tmp->size = n->size - AHASH_INIT_SIZE; for (j = 0, d = 0; j < n->pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, dsize); memcpy(tmp->value + d * dsize, data, dsize); set_bit(d, tmp->used); d++; } tmp->pos = d; t->hregion[r].ext_size -= ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, i), tmp); kfree_rcu(n, rcu); } } spin_unlock_bh(&t->hregion[r].lock); } static void mtype_gc(struct work_struct *work) { struct htable_gc *gc; struct ip_set *set; struct htype *h; struct htable *t; u32 r, numof_locks; unsigned int next_run; gc = container_of(work, struct htable_gc, dwork.work); set = gc->set; h = set->data; spin_lock_bh(&set->lock); t = ipset_dereference_set(h->table, set); atomic_inc(&t->uref); numof_locks = ahash_numof_locks(t->htable_bits); r = gc->region++; if (r >= numof_locks) { r = gc->region = 0; } next_run = (IPSET_GC_PERIOD(set->timeout) * HZ) / numof_locks; if (next_run < HZ/10) next_run = HZ/10; spin_unlock_bh(&set->lock); mtype_gc_do(set, h, t, r); if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { pr_debug("Table destroy after resize by expire: %p\n", t); mtype_ahash_destroy(set, t, false); } queue_delayed_work(system_power_efficient_wq, &gc->dwork, next_run); } static void mtype_gc_init(struct htable_gc *gc) { INIT_DEFERRABLE_WORK(&gc->dwork, mtype_gc); queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ); } static void mtype_cancel_gc(struct ip_set *set) { struct htype *h = set->data; if (SET_WITH_TIMEOUT(set)) cancel_delayed_work_sync(&h->gc.dwork); } static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); static int mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags); /* Resize a hash: create a new hash table with doubling the hashsize * and inserting the elements to it. Repeat until we succeed or * fail due to memory pressures. */ static int mtype_resize(struct ip_set *set, bool retried) { struct htype *h = set->data; struct htable *t, *orig; u8 htable_bits; size_t hsize, dsize = set->dsize; #ifdef IP_SET_HASH_WITH_NETS u8 flags; struct mtype_elem *tmp; #endif struct mtype_elem *data; struct mtype_elem *d; struct hbucket *n, *m; struct list_head *l, *lt; struct mtype_resize_ad *x; u32 i, j, r, nr, key; int ret; #ifdef IP_SET_HASH_WITH_NETS tmp = kmalloc(dsize, GFP_KERNEL); if (!tmp) return -ENOMEM; #endif orig = ipset_dereference_bh_nfnl(h->table); htable_bits = orig->htable_bits; retry: ret = 0; htable_bits++; if (!htable_bits) goto hbwarn; hsize = htable_size(htable_bits); if (!hsize) goto hbwarn; t = ip_set_alloc(hsize); if (!t) { ret = -ENOMEM; goto out; } t->hregion = ip_set_alloc(ahash_sizeof_regions(htable_bits)); if (!t->hregion) { ip_set_free(t); ret = -ENOMEM; goto out; } t->htable_bits = htable_bits; t->maxelem = h->maxelem / ahash_numof_locks(htable_bits); for (i = 0; i < ahash_numof_locks(htable_bits); i++) spin_lock_init(&t->hregion[i].lock); /* There can't be another parallel resizing, * but dumping, gc, kernel side add/del are possible */ orig = ipset_dereference_bh_nfnl(h->table); atomic_set(&orig->ref, 1); atomic_inc(&orig->uref); pr_debug("attempt to resize set %s from %u to %u, t %p\n", set->name, orig->htable_bits, htable_bits, orig); for (r = 0; r < ahash_numof_locks(orig->htable_bits); r++) { /* Expire may replace a hbucket with another one */ rcu_read_lock_bh(); for (i = ahash_bucket_start(r, orig->htable_bits); i < ahash_bucket_end(r, orig->htable_bits); i++) { n = __ipset_dereference(hbucket(orig, i)); if (!n) continue; for (j = 0; j < n->pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, dsize); if (SET_ELEM_EXPIRED(set, data)) continue; #ifdef IP_SET_HASH_WITH_NETS /* We have readers running parallel with us, * so the live data cannot be modified. */ flags = 0; memcpy(tmp, data, dsize); data = tmp; mtype_data_reset_flags(data, &flags); #endif key = HKEY(data, h->initval, htable_bits); m = __ipset_dereference(hbucket(t, key)); nr = ahash_region(key, htable_bits); if (!m) { m = kzalloc(sizeof(*m) + AHASH_INIT_SIZE * dsize, GFP_ATOMIC); if (!m) { ret = -ENOMEM; goto cleanup; } m->size = AHASH_INIT_SIZE; t->hregion[nr].ext_size += ext_size(AHASH_INIT_SIZE, dsize); RCU_INIT_POINTER(hbucket(t, key), m); } else if (m->pos >= m->size) { struct hbucket *ht; if (m->size >= AHASH_MAX(h)) { ret = -EAGAIN; } else { ht = kzalloc(sizeof(*ht) + (m->size + AHASH_INIT_SIZE) * dsize, GFP_ATOMIC); if (!ht) ret = -ENOMEM; } if (ret < 0) goto cleanup; memcpy(ht, m, sizeof(struct hbucket) + m->size * dsize); ht->size = m->size + AHASH_INIT_SIZE; t->hregion[nr].ext_size += ext_size(AHASH_INIT_SIZE, dsize); kfree(m); m = ht; RCU_INIT_POINTER(hbucket(t, key), ht); } d = ahash_data(m, m->pos, dsize); memcpy(d, data, dsize); set_bit(m->pos++, m->used); t->hregion[nr].elements++; #ifdef IP_SET_HASH_WITH_NETS mtype_data_reset_flags(d, &flags); #endif } } rcu_read_unlock_bh(); } /* There can't be any other writer. */ rcu_assign_pointer(h->table, t); /* Give time to other readers of the set */ synchronize_rcu(); pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name, orig->htable_bits, orig, t->htable_bits, t); /* Add/delete elements processed by the SET target during resize. * Kernel-side add cannot trigger a resize and userspace actions * are serialized by the mutex. */ list_for_each_safe(l, lt, &h->ad) { x = list_entry(l, struct mtype_resize_ad, list); if (x->ad == IPSET_ADD) { mtype_add(set, &x->d, &x->ext, &x->mext, x->flags); } else { mtype_del(set, &x->d, NULL, NULL, 0); } list_del(l); kfree(l); } /* If there's nobody else using the table, destroy it */ if (atomic_dec_and_test(&orig->uref)) { pr_debug("Table destroy by resize %p\n", orig); mtype_ahash_destroy(set, orig, false); } out: #ifdef IP_SET_HASH_WITH_NETS kfree(tmp); #endif return ret; cleanup: rcu_read_unlock_bh(); atomic_set(&orig->ref, 0); atomic_dec(&orig->uref); mtype_ahash_destroy(set, t, false); if (ret == -EAGAIN) goto retry; goto out; hbwarn: /* In case we have plenty of memory :-) */ pr_warn("Cannot increase the hashsize of set %s further\n", set->name); ret = -IPSET_ERR_HASH_FULL; goto out; } /* Get the current number of elements and ext_size in the set */ static void mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size) { struct htype *h = set->data; const struct htable *t; u32 i, j, r; struct hbucket *n; struct mtype_elem *data; t = rcu_dereference_bh(h->table); for (r = 0; r < ahash_numof_locks(t->htable_bits); r++) { for (i = ahash_bucket_start(r, t->htable_bits); i < ahash_bucket_end(r, t->htable_bits); i++) { n = rcu_dereference_bh(hbucket(t, i)); if (!n) continue; for (j = 0; j < n->pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, set->dsize); if (!SET_ELEM_EXPIRED(set, data)) (*elements)++; } } *ext_size += t->hregion[r].ext_size; } } /* Add an element to a hash and update the internal counters when succeeded, * otherwise report the proper error code. */ static int mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; struct htable *t; const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n, *old = ERR_PTR(-ENOENT); int i, j = -1, ret; bool flag_exist = flags & IPSET_FLAG_EXIST; bool deleted = false, forceadd = false, reuse = false; u32 r, key, multi = 0, elements, maxelem; rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); key = HKEY(value, h->initval, t->htable_bits); r = ahash_region(key, t->htable_bits); atomic_inc(&t->uref); elements = t->hregion[r].elements; maxelem = t->maxelem; if (elements >= maxelem) { u32 e; if (SET_WITH_TIMEOUT(set)) { rcu_read_unlock_bh(); mtype_gc_do(set, h, t, r); rcu_read_lock_bh(); } maxelem = h->maxelem; elements = 0; for (e = 0; e < ahash_numof_locks(t->htable_bits); e++) elements += t->hregion[e].elements; if (elements >= maxelem && SET_WITH_FORCEADD(set)) forceadd = true; } rcu_read_unlock_bh(); spin_lock_bh(&t->hregion[r].lock); n = rcu_dereference_bh(hbucket(t, key)); if (!n) { if (forceadd || elements >= maxelem) goto set_full; old = NULL; n = kzalloc(sizeof(*n) + AHASH_INIT_SIZE * set->dsize, GFP_ATOMIC); if (!n) { ret = -ENOMEM; goto unlock; } n->size = AHASH_INIT_SIZE; t->hregion[r].ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); goto copy_elem; } for (i = 0; i < n->pos; i++) { if (!test_bit(i, n->used)) { /* Reuse first deleted entry */ if (j == -1) { deleted = reuse = true; j = i; } continue; } data = ahash_data(n, i, set->dsize); if (mtype_data_equal(data, d, &multi)) { if (flag_exist || SET_ELEM_EXPIRED(set, data)) { /* Just the extensions could be overwritten */ j = i; goto overwrite_extensions; } ret = -IPSET_ERR_EXIST; goto unlock; } /* Reuse first timed out entry */ if (SET_ELEM_EXPIRED(set, data) && j == -1) { j = i; reuse = true; } } if (reuse || forceadd) { if (j == -1) j = 0; data = ahash_data(n, j, set->dsize); if (!deleted) { #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) mtype_del_cidr(set, h, NCIDR_PUT(DCIDR_GET(data->cidr, i)), i); #endif ip_set_ext_destroy(set, data); t->hregion[r].elements--; } goto copy_data; } if (elements >= maxelem) goto set_full; /* Create a new slot */ if (n->pos >= n->size) { #ifdef IP_SET_HASH_WITH_MULTI if (h->bucketsize >= AHASH_MAX_TUNED) goto set_full; else if (h->bucketsize <= multi) h->bucketsize += AHASH_INIT_SIZE; #endif if (n->size >= AHASH_MAX(h)) { /* Trigger rehashing */ mtype_data_next(&h->next, d); ret = -EAGAIN; goto resize; } old = n; n = kzalloc(sizeof(*n) + (old->size + AHASH_INIT_SIZE) * set->dsize, GFP_ATOMIC); if (!n) { ret = -ENOMEM; goto unlock; } memcpy(n, old, sizeof(struct hbucket) + old->size * set->dsize); n->size = old->size + AHASH_INIT_SIZE; t->hregion[r].ext_size += ext_size(AHASH_INIT_SIZE, set->dsize); } copy_elem: j = n->pos++; data = ahash_data(n, j, set->dsize); copy_data: t->hregion[r].elements++; #ifdef IP_SET_HASH_WITH_NETS for (i = 0; i < IPSET_NET_COUNT; i++) mtype_add_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, i)), i); #endif memcpy(data, d, sizeof(struct mtype_elem)); overwrite_extensions: #ifdef IP_SET_HASH_WITH_NETS mtype_data_set_flags(data, flags); #endif if (SET_WITH_COUNTER(set)) ip_set_init_counter(ext_counter(data, set), ext); if (SET_WITH_COMMENT(set)) ip_set_init_comment(set, ext_comment(data, set), ext); if (SET_WITH_SKBINFO(set)) ip_set_init_skbinfo(ext_skbinfo(data, set), ext); /* Must come last for the case when timed out entry is reused */ if (SET_WITH_TIMEOUT(set)) ip_set_timeout_set(ext_timeout(data, set), ext->timeout); smp_mb__before_atomic(); set_bit(j, n->used); if (old != ERR_PTR(-ENOENT)) { rcu_assign_pointer(hbucket(t, key), n); if (old) kfree_rcu(old, rcu); } ret = 0; resize: spin_unlock_bh(&t->hregion[r].lock); if (atomic_read(&t->ref) && ext->target) { /* Resize is in process and kernel side add, save values */ struct mtype_resize_ad *x; x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC); if (!x) /* Don't bother */ goto out; x->ad = IPSET_ADD; memcpy(&x->d, value, sizeof(struct mtype_elem)); memcpy(&x->ext, ext, sizeof(struct ip_set_ext)); memcpy(&x->mext, mext, sizeof(struct ip_set_ext)); x->flags = flags; spin_lock_bh(&set->lock); list_add_tail(&x->list, &h->ad); spin_unlock_bh(&set->lock); } goto out; set_full: if (net_ratelimit()) pr_warn("Set %s is full, maxelem %u reached\n", set->name, maxelem); ret = -IPSET_ERR_HASH_FULL; unlock: spin_unlock_bh(&t->hregion[r].lock); out: if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { pr_debug("Table destroy after resize by add: %p\n", t); mtype_ahash_destroy(set, t, false); } return ret; } /* Delete an element from the hash and free up space if possible. */ static int mtype_del(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; struct htable *t; const struct mtype_elem *d = value; struct mtype_elem *data; struct hbucket *n; struct mtype_resize_ad *x = NULL; int i, j, k, r, ret = -IPSET_ERR_EXIST; u32 key, multi = 0; size_t dsize = set->dsize; /* Userspace add and resize is excluded by the mutex. * Kernespace add does not trigger resize. */ rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); key = HKEY(value, h->initval, t->htable_bits); r = ahash_region(key, t->htable_bits); atomic_inc(&t->uref); rcu_read_unlock_bh(); spin_lock_bh(&t->hregion[r].lock); n = rcu_dereference_bh(hbucket(t, key)); if (!n) goto out; for (i = 0, k = 0; i < n->pos; i++) { if (!test_bit(i, n->used)) { k++; continue; } data = ahash_data(n, i, dsize); if (!mtype_data_equal(data, d, &multi)) continue; if (SET_ELEM_EXPIRED(set, data)) goto out; ret = 0; clear_bit(i, n->used); smp_mb__after_atomic(); if (i + 1 == n->pos) n->pos--; t->hregion[r].elements--; #ifdef IP_SET_HASH_WITH_NETS for (j = 0; j < IPSET_NET_COUNT; j++) mtype_del_cidr(set, h, NCIDR_PUT(DCIDR_GET(d->cidr, j)), j); #endif ip_set_ext_destroy(set, data); if (atomic_read(&t->ref) && ext->target) { /* Resize is in process and kernel side del, * save values */ x = kzalloc(sizeof(struct mtype_resize_ad), GFP_ATOMIC); if (x) { x->ad = IPSET_DEL; memcpy(&x->d, value, sizeof(struct mtype_elem)); x->flags = flags; } } for (; i < n->pos; i++) { if (!test_bit(i, n->used)) k++; } if (n->pos == 0 && k == 0) { t->hregion[r].ext_size -= ext_size(n->size, dsize); rcu_assign_pointer(hbucket(t, key), NULL); kfree_rcu(n, rcu); } else if (k >= AHASH_INIT_SIZE) { struct hbucket *tmp = kzalloc(sizeof(*tmp) + (n->size - AHASH_INIT_SIZE) * dsize, GFP_ATOMIC); if (!tmp) goto out; tmp->size = n->size - AHASH_INIT_SIZE; for (j = 0, k = 0; j < n->pos; j++) { if (!test_bit(j, n->used)) continue; data = ahash_data(n, j, dsize); memcpy(tmp->value + k * dsize, data, dsize); set_bit(k, tmp->used); k++; } tmp->pos = k; t->hregion[r].ext_size -= ext_size(AHASH_INIT_SIZE, dsize); rcu_assign_pointer(hbucket(t, key), tmp); kfree_rcu(n, rcu); } goto out; } out: spin_unlock_bh(&t->hregion[r].lock); if (x) { spin_lock_bh(&set->lock); list_add(&x->list, &h->ad); spin_unlock_bh(&set->lock); } if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { pr_debug("Table destroy after resize by del: %p\n", t); mtype_ahash_destroy(set, t, false); } return ret; } static int mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext, struct ip_set_ext *mext, struct ip_set *set, u32 flags) { if (!ip_set_match_extensions(set, ext, mext, flags, data)) return 0; /* nomatch entries return -ENOTEMPTY */ return mtype_do_data_match(data); } #ifdef IP_SET_HASH_WITH_NETS /* Special test function which takes into account the different network * sizes added to the set */ static int mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; struct htable *t = rcu_dereference_bh(h->table); struct hbucket *n; struct mtype_elem *data; #if IPSET_NET_COUNT == 2 struct mtype_elem orig = *d; int ret, i, j = 0, k; #else int ret, i, j = 0; #endif u32 key, multi = 0; pr_debug("test by nets\n"); for (; j < NLEN && h->nets[j].cidr[0] && !multi; j++) { #if IPSET_NET_COUNT == 2 mtype_data_reset_elem(d, &orig); mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0]), false); for (k = 0; k < NLEN && h->nets[k].cidr[1] && !multi; k++) { mtype_data_netmask(d, NCIDR_GET(h->nets[k].cidr[1]), true); #else mtype_data_netmask(d, NCIDR_GET(h->nets[j].cidr[0])); #endif key = HKEY(d, h->initval, t->htable_bits); n = rcu_dereference_bh(hbucket(t, key)); if (!n) continue; for (i = 0; i < n->pos; i++) { if (!test_bit(i, n->used)) continue; data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; ret = mtype_data_match(data, ext, mext, set, flags); if (ret != 0) return ret; #ifdef IP_SET_HASH_WITH_MULTI /* No match, reset multiple match flag */ multi = 0; #endif } #if IPSET_NET_COUNT == 2 } #endif } return 0; } #endif /* Test whether the element is added to the set */ static int mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext, struct ip_set_ext *mext, u32 flags) { struct htype *h = set->data; struct htable *t; struct mtype_elem *d = value; struct hbucket *n; struct mtype_elem *data; int i, ret = 0; u32 key, multi = 0; rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); #ifdef IP_SET_HASH_WITH_NETS /* If we test an IP address and not a network address, * try all possible network sizes */ for (i = 0; i < IPSET_NET_COUNT; i++) if (DCIDR_GET(d->cidr, i) != HOST_MASK) break; if (i == IPSET_NET_COUNT) { ret = mtype_test_cidrs(set, d, ext, mext, flags); goto out; } #endif key = HKEY(d, h->initval, t->htable_bits); n = rcu_dereference_bh(hbucket(t, key)); if (!n) { ret = 0; goto out; } for (i = 0; i < n->pos; i++) { if (!test_bit(i, n->used)) continue; data = ahash_data(n, i, set->dsize); if (!mtype_data_equal(data, d, &multi)) continue; ret = mtype_data_match(data, ext, mext, set, flags); if (ret != 0) goto out; } out: rcu_read_unlock_bh(); return ret; } /* Reply a HEADER request: fill out the header part of the set */ static int mtype_head(struct ip_set *set, struct sk_buff *skb) { struct htype *h = set->data; const struct htable *t; struct nlattr *nested; size_t memsize; u32 elements = 0; size_t ext_size = 0; u8 htable_bits; rcu_read_lock_bh(); t = rcu_dereference_bh(h->table); mtype_ext_size(set, &elements, &ext_size); memsize = mtype_ahash_memsize(h, t) + ext_size + set->ext_size; htable_bits = t->htable_bits; rcu_read_unlock_bh(); nested = nla_nest_start(skb, IPSET_ATTR_DATA); if (!nested) goto nla_put_failure; if (nla_put_net32(skb, IPSET_ATTR_HASHSIZE, htonl(jhash_size(htable_bits))) || nla_put_net32(skb, IPSET_ATTR_MAXELEM, htonl(h->maxelem))) goto nla_put_failure; #ifdef IP_SET_HASH_WITH_BITMASK /* if netmask is set to anything other than HOST_MASK we know that the user supplied netmask * and not bitmask. These two are mutually exclusive. */ if (h->netmask == HOST_MASK && !nf_inet_addr_cmp(&onesmask, &h->bitmask)) { if (set->family == NFPROTO_IPV4) { if (nla_put_ipaddr4(skb, IPSET_ATTR_BITMASK, h->bitmask.ip)) goto nla_put_failure; } else if (set->family == NFPROTO_IPV6) { if (nla_put_ipaddr6(skb, IPSET_ATTR_BITMASK, &h->bitmask.in6)) goto nla_put_failure; } } #endif #ifdef IP_SET_HASH_WITH_NETMASK if (h->netmask != HOST_MASK && nla_put_u8(skb, IPSET_ATTR_NETMASK, h->netmask)) goto nla_put_failure; #endif #ifdef IP_SET_HASH_WITH_MARKMASK if (nla_put_u32(skb, IPSET_ATTR_MARKMASK, h->markmask)) goto nla_put_failure; #endif if (set->flags & IPSET_CREATE_FLAG_BUCKETSIZE) { if (nla_put_u8(skb, IPSET_ATTR_BUCKETSIZE, h->bucketsize) || nla_put_net32(skb, IPSET_ATTR_INITVAL, htonl(h->initval))) goto nla_put_failure; } if (nla_put_net32(skb, IPSET_ATTR_REFERENCES, htonl(set->ref)) || nla_put_net32(skb, IPSET_ATTR_MEMSIZE, htonl(memsize)) || nla_put_net32(skb, IPSET_ATTR_ELEMENTS, htonl(elements))) goto nla_put_failure; if (unlikely(ip_set_put_flags(skb, set))) goto nla_put_failure; nla_nest_end(skb, nested); return 0; nla_put_failure: return -EMSGSIZE; } /* Make possible to run dumping parallel with resizing */ static void mtype_uref(struct ip_set *set, struct netlink_callback *cb, bool start) { struct htype *h = set->data; struct htable *t; if (start) { rcu_read_lock_bh(); t = ipset_dereference_bh_nfnl(h->table); atomic_inc(&t->uref); cb->args[IPSET_CB_PRIVATE] = (unsigned long)t; rcu_read_unlock_bh(); } else if (cb->args[IPSET_CB_PRIVATE]) { t = (struct htable *)cb->args[IPSET_CB_PRIVATE]; if (atomic_dec_and_test(&t->uref) && atomic_read(&t->ref)) { pr_debug("Table destroy after resize " " by dump: %p\n", t); mtype_ahash_destroy(set, t, false); } cb->args[IPSET_CB_PRIVATE] = 0; } } /* Reply a LIST/SAVE request: dump the elements of the specified set */ static int mtype_list(const struct ip_set *set, struct sk_buff *skb, struct netlink_callback *cb) { const struct htable *t; struct nlattr *atd, *nested; const struct hbucket *n; const struct mtype_elem *e; u32 first = cb->args[IPSET_CB_ARG0]; /* We assume that one hash bucket fills into one page */ void *incomplete; int i, ret = 0; atd = nla_nest_start(skb, IPSET_ATTR_ADT); if (!atd) return -EMSGSIZE; pr_debug("list hash set %s\n", set->name); t = (const struct htable *)cb->args[IPSET_CB_PRIVATE]; /* Expire may replace a hbucket with another one */ rcu_read_lock(); for (; cb->args[IPSET_CB_ARG0] < jhash_size(t->htable_bits); cb->args[IPSET_CB_ARG0]++) { cond_resched_rcu(); incomplete = skb_tail_pointer(skb); n = rcu_dereference(hbucket(t, cb->args[IPSET_CB_ARG0])); pr_debug("cb->arg bucket: %lu, t %p n %p\n", cb->args[IPSET_CB_ARG0], t, n); if (!n) continue; for (i = 0; i < n->pos; i++) { if (!test_bit(i, n->used)) continue; e = ahash_data(n, i, set->dsize); if (SET_ELEM_EXPIRED(set, e)) continue; pr_debug("list hash %lu hbucket %p i %u, data %p\n", cb->args[IPSET_CB_ARG0], n, i, e); nested = nla_nest_start(skb, IPSET_ATTR_DATA); if (!nested) { if (cb->args[IPSET_CB_ARG0] == first) { nla_nest_cancel(skb, atd); ret = -EMSGSIZE; goto out; } goto nla_put_failure; } if (mtype_data_list(skb, e)) goto nla_put_failure; if (ip_set_put_extensions(skb, set, e, true)) goto nla_put_failure; nla_nest_end(skb, nested); } } nla_nest_end(skb, atd); /* Set listing finished */ cb->args[IPSET_CB_ARG0] = 0; goto out; nla_put_failure: nlmsg_trim(skb, incomplete); if (unlikely(first == cb->args[IPSET_CB_ARG0])) { pr_warn("Can't list set %s: one bucket does not fit into a message. Please report it!\n", set->name); cb->args[IPSET_CB_ARG0] = 0; ret = -EMSGSIZE; } else { nla_nest_end(skb, atd); } out: rcu_read_unlock(); return ret; } static int IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt); static int IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried); static const struct ip_set_type_variant mtype_variant = { .kadt = mtype_kadt, .uadt = mtype_uadt, .adt = { [IPSET_ADD] = mtype_add, [IPSET_DEL] = mtype_del, [IPSET_TEST] = mtype_test, }, .destroy = mtype_destroy, .flush = mtype_flush, .head = mtype_head, .list = mtype_list, .uref = mtype_uref, .resize = mtype_resize, .same_set = mtype_same_set, .cancel_gc = mtype_cancel_gc, .region_lock = true, }; #ifdef IP_SET_EMIT_CREATE static int IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, struct nlattr *tb[], u32 flags) { u32 hashsize = IPSET_DEFAULT_HASHSIZE, maxelem = IPSET_DEFAULT_MAXELEM; #ifdef IP_SET_HASH_WITH_MARKMASK u32 markmask; #endif u8 hbits; #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) int ret __attribute__((unused)) = 0; u8 netmask = set->family == NFPROTO_IPV4 ? 32 : 128; union nf_inet_addr bitmask = onesmask; #endif size_t hsize; struct htype *h; struct htable *t; u32 i; pr_debug("Create set %s with family %s\n", set->name, set->family == NFPROTO_IPV4 ? "inet" : "inet6"); #ifdef IP_SET_PROTO_UNDEF if (set->family != NFPROTO_UNSPEC) return -IPSET_ERR_INVALID_FAMILY; #else if (!(set->family == NFPROTO_IPV4 || set->family == NFPROTO_IPV6)) return -IPSET_ERR_INVALID_FAMILY; #endif if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_HASHSIZE) || !ip_set_optattr_netorder(tb, IPSET_ATTR_MAXELEM) || !ip_set_optattr_netorder(tb, IPSET_ATTR_TIMEOUT) || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; #ifdef IP_SET_HASH_WITH_MARKMASK /* Separated condition in order to avoid directive in argument list */ if (unlikely(!ip_set_optattr_netorder(tb, IPSET_ATTR_MARKMASK))) return -IPSET_ERR_PROTOCOL; markmask = 0xffffffff; if (tb[IPSET_ATTR_MARKMASK]) { markmask = ntohl(nla_get_be32(tb[IPSET_ATTR_MARKMASK])); if (markmask == 0) return -IPSET_ERR_INVALID_MARKMASK; } #endif #ifdef IP_SET_HASH_WITH_NETMASK if (tb[IPSET_ATTR_NETMASK]) { netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]); if ((set->family == NFPROTO_IPV4 && netmask > 32) || (set->family == NFPROTO_IPV6 && netmask > 128) || netmask == 0) return -IPSET_ERR_INVALID_NETMASK; /* we convert netmask to bitmask and store it */ if (set->family == NFPROTO_IPV4) bitmask.ip = ip_set_netmask(netmask); else ip6_netmask(&bitmask, netmask); } #endif #ifdef IP_SET_HASH_WITH_BITMASK if (tb[IPSET_ATTR_BITMASK]) { /* bitmask and netmask do the same thing, allow only one of these options */ if (tb[IPSET_ATTR_NETMASK]) return -IPSET_ERR_BITMASK_NETMASK_EXCL; if (set->family == NFPROTO_IPV4) { ret = ip_set_get_ipaddr4(tb[IPSET_ATTR_BITMASK], &bitmask.ip); if (ret || !bitmask.ip) return -IPSET_ERR_INVALID_NETMASK; } else if (set->family == NFPROTO_IPV6) { ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_BITMASK], &bitmask); if (ret || ipv6_addr_any(&bitmask.in6)) return -IPSET_ERR_INVALID_NETMASK; } if (nf_inet_addr_cmp(&bitmask, &zeromask)) return -IPSET_ERR_INVALID_NETMASK; } #endif if (tb[IPSET_ATTR_HASHSIZE]) { hashsize = ip_set_get_h32(tb[IPSET_ATTR_HASHSIZE]); if (hashsize < IPSET_MIMINAL_HASHSIZE) hashsize = IPSET_MIMINAL_HASHSIZE; } if (tb[IPSET_ATTR_MAXELEM]) maxelem = ip_set_get_h32(tb[IPSET_ATTR_MAXELEM]); hsize = sizeof(*h); h = kzalloc(hsize, GFP_KERNEL); if (!h) return -ENOMEM; /* Compute htable_bits from the user input parameter hashsize. * Assume that hashsize == 2^htable_bits, * otherwise round up to the first 2^n value. */ hbits = fls(hashsize - 1); hsize = htable_size(hbits); if (hsize == 0) { kfree(h); return -ENOMEM; } t = ip_set_alloc(hsize); if (!t) { kfree(h); return -ENOMEM; } t->hregion = ip_set_alloc(ahash_sizeof_regions(hbits)); if (!t->hregion) { ip_set_free(t); kfree(h); return -ENOMEM; } h->gc.set = set; for (i = 0; i < ahash_numof_locks(hbits); i++) spin_lock_init(&t->hregion[i].lock); h->maxelem = maxelem; #if defined(IP_SET_HASH_WITH_NETMASK) || defined(IP_SET_HASH_WITH_BITMASK) h->bitmask = bitmask; h->netmask = netmask; #endif #ifdef IP_SET_HASH_WITH_MARKMASK h->markmask = markmask; #endif if (tb[IPSET_ATTR_INITVAL]) h->initval = ntohl(nla_get_be32(tb[IPSET_ATTR_INITVAL])); else get_random_bytes(&h->initval, sizeof(h->initval)); h->bucketsize = AHASH_MAX_SIZE; if (tb[IPSET_ATTR_BUCKETSIZE]) { h->bucketsize = nla_get_u8(tb[IPSET_ATTR_BUCKETSIZE]); if (h->bucketsize < AHASH_INIT_SIZE) h->bucketsize = AHASH_INIT_SIZE; else if (h->bucketsize > AHASH_MAX_SIZE) h->bucketsize = AHASH_MAX_SIZE; else if (h->bucketsize % 2) h->bucketsize += 1; } t->htable_bits = hbits; t->maxelem = h->maxelem / ahash_numof_locks(hbits); RCU_INIT_POINTER(h->table, t); INIT_LIST_HEAD(&h->ad); set->data = h; #ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) { #endif set->variant = &IPSET_TOKEN(HTYPE, 4_variant); set->dsize = ip_set_elem_len(set, tb, sizeof(struct IPSET_TOKEN(HTYPE, 4_elem)), __alignof__(struct IPSET_TOKEN(HTYPE, 4_elem))); #ifndef IP_SET_PROTO_UNDEF } else { set->variant = &IPSET_TOKEN(HTYPE, 6_variant); set->dsize = ip_set_elem_len(set, tb, sizeof(struct IPSET_TOKEN(HTYPE, 6_elem)), __alignof__(struct IPSET_TOKEN(HTYPE, 6_elem))); } #endif set->timeout = IPSET_NO_TIMEOUT; if (tb[IPSET_ATTR_TIMEOUT]) { set->timeout = ip_set_timeout_uget(tb[IPSET_ATTR_TIMEOUT]); #ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) #endif IPSET_TOKEN(HTYPE, 4_gc_init)(&h->gc); #ifndef IP_SET_PROTO_UNDEF else IPSET_TOKEN(HTYPE, 6_gc_init)(&h->gc); #endif } pr_debug("create %s hashsize %u (%u) maxelem %u: %p(%p)\n", set->name, jhash_size(t->htable_bits), t->htable_bits, h->maxelem, set->data, t); return 0; } #endif /* IP_SET_EMIT_CREATE */ #undef HKEY_DATALEN
1 2 1 1 1 1 2 12 4 1 9 3 2 18 19 1 18 18 11 10 6 2 2 3 3 5 1 2 2 1 2 2 1 1 1 1 1 1 1 1 5 1 1 4 97 97 5 1 3 1 690 11 690 690 690 690 72 1 73 15 49 4 45 7 40 1 2 12 3 1 1 1 3 2 2 4 4 15 14 2 2 3 3 2 2 4 1 3 3 15 1 7 6 7 12 7 5 12 12 14 9 3 1 1 7 8 3 1 2 9 1 2 1 8 8 5 1 5 9 9 1 9 1 4 3 2 4 1 3 9 1 1 1 6 7 3 1 2 1 95 2 94 2 2 2 743 744 744 744 672 3 713 8 7 6 672 672 162 7 7 1 6 7 311 668 668 3 665 668 654 58 17 56 56 13 1 5 6 1 1 16 13 13 3 3 3 13 29 29 17 17 4 17 13 9 29 6 1 5 3 6 1 5 1 5 3 3 4 4 7 4 2 1 1 1 1 1 3 4 1 1 2 2 2 3 2 603 108 540 107 540 2 603 614 3 595 595 390 596 611 21 1 20 1 5 6 20 8 8 7 1 6 4 2 19 1 18 1 2 17 1 1 16 5 12 5 5 5 5 3 1 2 2 2 2 2 2 6 2 4 4 4 4 4 1 1 1 6 6 6 6 6 6 6 3 3 3 1 2 3 3 3 3 3 1 3 3 1 1 1 2 1 11 7 2 5 601 29 58 58 157 103 54 53 54 52 53 6 6 3 6 4 5 6 11 1 5 3 2 4 5 3 3 1 1 1 5 95 104 104 111 9 103 102 103 7 95 102 111 11 10 2 2 3 10 10 6 3 5 6 6 21 21 11 6 6 8 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/proc/base.c * * Copyright (C) 1991, 1992 Linus Torvalds * * proc base directory handling functions * * 1999, Al Viro. Rewritten. Now it covers the whole per-process part. * Instead of using magical inumbers to determine the kind of object * we allocate and fill in-core inodes upon lookup. They don't even * go into icache. We cache the reference to task_struct upon lookup too. * Eventually it should become a filesystem in its own. We don't use the * rest of procfs anymore. * * * Changelog: * 17-Jan-2005 * Allan Bezerra * Bruna Moreira <bruna.moreira@indt.org.br> * Edjard Mota <edjard.mota@indt.org.br> * Ilias Biris <ilias.biris@indt.org.br> * Mauricio Lin <mauricio.lin@indt.org.br> * * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT * * A new process specific entry (smaps) included in /proc. It shows the * size of rss for each memory area. The maps entry lacks information * about physical memory size (rss) for each mapped file, i.e., * rss information for executables and library files. * This additional information is useful for any tools that need to know * about physical memory consumption for a process specific library. * * Changelog: * 21-Feb-2005 * Embedded Linux Lab - 10LE Instituto Nokia de Tecnologia - INdT * Pud inclusion in the page table walking. * * ChangeLog: * 10-Mar-2005 * 10LE Instituto Nokia de Tecnologia - INdT: * A better way to walks through the page table as suggested by Hugh Dickins. * * Simo Piiroinen <simo.piiroinen@nokia.com>: * Smaps information related to shared, private, clean and dirty pages. * * Paul Mundt <paul.mundt@nokia.com>: * Overall revision about smaps. */ #include <linux/uaccess.h> #include <linux/errno.h> #include <linux/time.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/task_io_accounting_ops.h> #include <linux/init.h> #include <linux/capability.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/generic-radix-tree.h> #include <linux/string.h> #include <linux/seq_file.h> #include <linux/namei.h> #include <linux/mnt_namespace.h> #include <linux/mm.h> #include <linux/swap.h> #include <linux/rcupdate.h> #include <linux/kallsyms.h> #include <linux/stacktrace.h> #include <linux/resource.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/ptrace.h> #include <linux/printk.h> #include <linux/cache.h> #include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> #include <linux/poll.h> #include <linux/nsproxy.h> #include <linux/oom.h> #include <linux/elf.h> #include <linux/pid_namespace.h> #include <linux/user_namespace.h> #include <linux/fs_struct.h> #include <linux/slab.h> #include <linux/sched/autogroup.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/debug.h> #include <linux/sched/stat.h> #include <linux/posix-timers.h> #include <linux/time_namespace.h> #include <linux/resctrl.h> #include <linux/cn_proc.h> #include <linux/ksm.h> #include <uapi/linux/lsm.h> #include <trace/events/oom.h> #include "internal.h" #include "fd.h" #include "../../lib/kstrtox.h" /* NOTE: * Implementing inode permission operations in /proc is almost * certainly an error. Permission checks need to happen during * each system call not at open time. The reason is that most of * what we wish to check for permissions in /proc varies at runtime. * * The classic example of a problem is opening file descriptors * in /proc for a task before it execs a suid executable. */ static u8 nlink_tid __ro_after_init; static u8 nlink_tgid __ro_after_init; struct pid_entry { const char *name; unsigned int len; umode_t mode; const struct inode_operations *iop; const struct file_operations *fop; union proc_op op; }; #define NOD(NAME, MODE, IOP, FOP, OP) { \ .name = (NAME), \ .len = sizeof(NAME) - 1, \ .mode = MODE, \ .iop = IOP, \ .fop = FOP, \ .op = OP, \ } #define DIR(NAME, MODE, iops, fops) \ NOD(NAME, (S_IFDIR|(MODE)), &iops, &fops, {} ) #define LNK(NAME, get_link) \ NOD(NAME, (S_IFLNK|S_IRWXUGO), \ &proc_pid_link_inode_operations, NULL, \ { .proc_get_link = get_link } ) #define REG(NAME, MODE, fops) \ NOD(NAME, (S_IFREG|(MODE)), NULL, &fops, {}) #define ONE(NAME, MODE, show) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_single_file_operations, \ { .proc_show = show } ) #define ATTR(LSMID, NAME, MODE) \ NOD(NAME, (S_IFREG|(MODE)), \ NULL, &proc_pid_attr_operations, \ { .lsmid = LSMID }) /* * Count the number of hardlinks for the pid_entry table, excluding the . * and .. links. */ static unsigned int __init pid_entry_nlink(const struct pid_entry *entries, unsigned int n) { unsigned int i; unsigned int count; count = 2; for (i = 0; i < n; ++i) { if (S_ISDIR(entries[i].mode)) ++count; } return count; } static int get_task_root(struct task_struct *task, struct path *root) { int result = -ENOENT; task_lock(task); if (task->fs) { get_fs_root(task->fs, root); result = 0; } task_unlock(task); return result; } static int proc_cwd_link(struct dentry *dentry, struct path *path) { struct task_struct *task = get_proc_task(d_inode(dentry)); int result = -ENOENT; if (task) { task_lock(task); if (task->fs) { get_fs_pwd(task->fs, path); result = 0; } task_unlock(task); put_task_struct(task); } return result; } static int proc_root_link(struct dentry *dentry, struct path *path) { struct task_struct *task = get_proc_task(d_inode(dentry)); int result = -ENOENT; if (task) { result = get_task_root(task, path); put_task_struct(task); } return result; } /* * If the user used setproctitle(), we just get the string from * user space at arg_start, and limit it to a maximum of one page. */ static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf, size_t count, unsigned long pos, unsigned long arg_start) { char *page; int ret, got; if (pos >= PAGE_SIZE) return 0; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; ret = 0; got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON); if (got > 0) { int len = strnlen(page, got); /* Include the NUL character if it was found */ if (len < got) len++; if (len > pos) { len -= pos; if (len > count) len = count; len -= copy_to_user(buf, page+pos, len); if (!len) len = -EFAULT; ret = len; } } free_page((unsigned long)page); return ret; } static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf, size_t count, loff_t *ppos) { unsigned long arg_start, arg_end, env_start, env_end; unsigned long pos, len; char *page, c; /* Check if process spawned far enough to have cmdline. */ if (!mm->env_end) return 0; spin_lock(&mm->arg_lock); arg_start = mm->arg_start; arg_end = mm->arg_end; env_start = mm->env_start; env_end = mm->env_end; spin_unlock(&mm->arg_lock); if (arg_start >= arg_end) return 0; /* * We allow setproctitle() to overwrite the argument * strings, and overflow past the original end. But * only when it overflows into the environment area. */ if (env_start != arg_end || env_end < env_start) env_start = env_end = arg_end; len = env_end - arg_start; /* We're not going to care if "*ppos" has high bits set */ pos = *ppos; if (pos >= len) return 0; if (count > len - pos) count = len - pos; if (!count) return 0; /* * Magical special case: if the argv[] end byte is not * zero, the user has overwritten it with setproctitle(3). * * Possible future enhancement: do this only once when * pos is 0, and set a flag in the 'struct file'. */ if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c) return get_mm_proctitle(mm, buf, count, pos, arg_start); /* * For the non-setproctitle() case we limit things strictly * to the [arg_start, arg_end[ range. */ pos += arg_start; if (pos < arg_start || pos >= arg_end) return 0; if (count > arg_end - pos) count = arg_end - pos; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; len = 0; while (count) { int got; size_t size = min_t(size_t, PAGE_SIZE, count); got = access_remote_vm(mm, pos, page, size, FOLL_ANON); if (got <= 0) break; got -= copy_to_user(buf, page, got); if (unlikely(!got)) { if (!len) len = -EFAULT; break; } pos += got; buf += got; len += got; count -= got; } free_page((unsigned long)page); return len; } static ssize_t get_task_cmdline(struct task_struct *tsk, char __user *buf, size_t count, loff_t *pos) { struct mm_struct *mm; ssize_t ret; mm = get_task_mm(tsk); if (!mm) return 0; ret = get_mm_cmdline(mm, buf, count, pos); mmput(mm); return ret; } static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { struct task_struct *tsk; ssize_t ret; BUG_ON(*pos < 0); tsk = get_proc_task(file_inode(file)); if (!tsk) return -ESRCH; ret = get_task_cmdline(tsk, buf, count, pos); put_task_struct(tsk); if (ret > 0) *pos += ret; return ret; } static const struct file_operations proc_pid_cmdline_ops = { .read = proc_pid_cmdline_read, .llseek = generic_file_llseek, }; #ifdef CONFIG_KALLSYMS /* * Provides a wchan file via kallsyms in a proper one-value-per-file format. * Returns the resolved symbol. If that fails, simply return the address. */ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long wchan; char symname[KSYM_NAME_LEN]; if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto print0; wchan = get_wchan(task); if (wchan && !lookup_symbol_name(wchan, symname)) { seq_puts(m, symname); return 0; } print0: seq_putc(m, '0'); return 0; } #endif /* CONFIG_KALLSYMS */ static int lock_trace(struct task_struct *task) { int err = down_read_killable(&task->signal->exec_update_lock); if (err) return err; if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { up_read(&task->signal->exec_update_lock); return -EPERM; } return 0; } static void unlock_trace(struct task_struct *task) { up_read(&task->signal->exec_update_lock); } #ifdef CONFIG_STACKTRACE #define MAX_STACK_TRACE_DEPTH 64 static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long *entries; int err; /* * The ability to racily run the kernel stack unwinder on a running task * and then observe the unwinder output is scary; while it is useful for * debugging kernel issues, it can also allow an attacker to leak kernel * stack contents. * Doing this in a manner that is at least safe from races would require * some work to ensure that the remote task can not be scheduled; and * even then, this would still expose the unwinder as local attack * surface. * Therefore, this interface is restricted to root. */ if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN)) return -EACCES; entries = kmalloc_array(MAX_STACK_TRACE_DEPTH, sizeof(*entries), GFP_KERNEL); if (!entries) return -ENOMEM; err = lock_trace(task); if (!err) { unsigned int i, nr_entries; nr_entries = stack_trace_save_tsk(task, entries, MAX_STACK_TRACE_DEPTH, 0); for (i = 0; i < nr_entries; i++) { seq_printf(m, "[<0>] %pB\n", (void *)entries[i]); } unlock_trace(task); } kfree(entries); return err; } #endif #ifdef CONFIG_SCHED_INFO /* * Provides /proc/PID/schedstat */ static int proc_pid_schedstat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { if (unlikely(!sched_info_on())) seq_puts(m, "0 0 0\n"); else seq_printf(m, "%llu %llu %lu\n", (unsigned long long)task->se.sum_exec_runtime, (unsigned long long)task->sched_info.run_delay, task->sched_info.pcount); return 0; } #endif #ifdef CONFIG_LATENCYTOP static int lstats_show_proc(struct seq_file *m, void *v) { int i; struct inode *inode = m->private; struct task_struct *task = get_proc_task(inode); if (!task) return -ESRCH; seq_puts(m, "Latency Top version : v0.1\n"); for (i = 0; i < LT_SAVECOUNT; i++) { struct latency_record *lr = &task->latency_record[i]; if (lr->backtrace[0]) { int q; seq_printf(m, "%i %li %li", lr->count, lr->time, lr->max); for (q = 0; q < LT_BACKTRACEDEPTH; q++) { unsigned long bt = lr->backtrace[q]; if (!bt) break; seq_printf(m, " %ps", (void *)bt); } seq_putc(m, '\n'); } } put_task_struct(task); return 0; } static int lstats_open(struct inode *inode, struct file *file) { return single_open(file, lstats_show_proc, inode); } static ssize_t lstats_write(struct file *file, const char __user *buf, size_t count, loff_t *offs) { struct task_struct *task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; clear_tsk_latency_tracing(task); put_task_struct(task); return count; } static const struct file_operations proc_lstats_operations = { .open = lstats_open, .read = seq_read, .write = lstats_write, .llseek = seq_lseek, .release = single_release, }; #endif static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long totalpages = totalram_pages() + total_swap_pages; unsigned long points = 0; long badness; badness = oom_badness(task, totalpages); /* * Special case OOM_SCORE_ADJ_MIN for all others scale the * badness value into [0, 2000] range which we have been * exporting for a long time so userspace might depend on it. */ if (badness != LONG_MIN) points = (1000 + badness * 1000 / (long)totalpages) * 2 / 3; seq_printf(m, "%lu\n", points); return 0; } struct limit_names { const char *name; const char *unit; }; static const struct limit_names lnames[RLIM_NLIMITS] = { [RLIMIT_CPU] = {"Max cpu time", "seconds"}, [RLIMIT_FSIZE] = {"Max file size", "bytes"}, [RLIMIT_DATA] = {"Max data size", "bytes"}, [RLIMIT_STACK] = {"Max stack size", "bytes"}, [RLIMIT_CORE] = {"Max core file size", "bytes"}, [RLIMIT_RSS] = {"Max resident set", "bytes"}, [RLIMIT_NPROC] = {"Max processes", "processes"}, [RLIMIT_NOFILE] = {"Max open files", "files"}, [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, [RLIMIT_AS] = {"Max address space", "bytes"}, [RLIMIT_LOCKS] = {"Max file locks", "locks"}, [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, [RLIMIT_NICE] = {"Max nice priority", NULL}, [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, [RLIMIT_RTTIME] = {"Max realtime timeout", "us"}, }; /* Display limits for a process */ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned int i; unsigned long flags; struct rlimit rlim[RLIM_NLIMITS]; if (!lock_task_sighand(task, &flags)) return 0; memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); unlock_task_sighand(task, &flags); /* * print the file header */ seq_puts(m, "Limit " "Soft Limit " "Hard Limit " "Units \n"); for (i = 0; i < RLIM_NLIMITS; i++) { if (rlim[i].rlim_cur == RLIM_INFINITY) seq_printf(m, "%-25s %-20s ", lnames[i].name, "unlimited"); else seq_printf(m, "%-25s %-20lu ", lnames[i].name, rlim[i].rlim_cur); if (rlim[i].rlim_max == RLIM_INFINITY) seq_printf(m, "%-20s ", "unlimited"); else seq_printf(m, "%-20lu ", rlim[i].rlim_max); if (lnames[i].unit) seq_printf(m, "%-10s\n", lnames[i].unit); else seq_putc(m, '\n'); } return 0; } #ifdef CONFIG_HAVE_ARCH_TRACEHOOK static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { struct syscall_info info; u64 *args = &info.data.args[0]; int res; res = lock_trace(task); if (res) return res; if (task_current_syscall(task, &info)) seq_puts(m, "running\n"); else if (info.data.nr < 0) seq_printf(m, "%d 0x%llx 0x%llx\n", info.data.nr, info.sp, info.data.instruction_pointer); else seq_printf(m, "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n", info.data.nr, args[0], args[1], args[2], args[3], args[4], args[5], info.sp, info.data.instruction_pointer); unlock_trace(task); return 0; } #endif /* CONFIG_HAVE_ARCH_TRACEHOOK */ /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ /* permission checks */ static bool proc_fd_access_allowed(struct inode *inode) { struct task_struct *task; bool allowed = false; /* Allow access to a task's file descriptors if it is us or we * may use ptrace attach to the process and find out that * information. */ task = get_proc_task(inode); if (task) { allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); put_task_struct(task); } return allowed; } int proc_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { int error; struct inode *inode = d_inode(dentry); if (attr->ia_valid & ATTR_MODE) return -EPERM; error = setattr_prepare(&nop_mnt_idmap, dentry, attr); if (error) return error; setattr_copy(&nop_mnt_idmap, inode, attr); return 0; } /* * May current process learn task's sched/cmdline info (for hide_pid_min=1) * or euid/egid (for hide_pid_min=2)? */ static bool has_pid_permissions(struct proc_fs_info *fs_info, struct task_struct *task, enum proc_hidepid hide_pid_min) { /* * If 'hidpid' mount option is set force a ptrace check, * we indicate that we are using a filesystem syscall * by passing PTRACE_MODE_READ_FSCREDS */ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); if (fs_info->hide_pid < hide_pid_min) return true; if (in_group_p(fs_info->pid_gid)) return true; return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); } static int proc_pid_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct task_struct *task; bool has_perms; task = get_proc_task(inode); if (!task) return -ESRCH; has_perms = has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS); put_task_struct(task); if (!has_perms) { if (fs_info->hide_pid == HIDEPID_INVISIBLE) { /* * Let's make getdents(), stat(), and open() * consistent with each other. If a process * may not stat() a file, it shouldn't be seen * in procfs at all. */ return -ENOENT; } return -EPERM; } return generic_permission(&nop_mnt_idmap, inode, mask); } static const struct inode_operations proc_def_inode_operations = { .setattr = proc_setattr, }; static int proc_single_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct pid_namespace *ns = proc_pid_ns(inode->i_sb); struct pid *pid = proc_pid(inode); struct task_struct *task; int ret; task = get_pid_task(pid, PIDTYPE_PID); if (!task) return -ESRCH; ret = PROC_I(inode)->op.proc_show(m, ns, pid, task); put_task_struct(task); return ret; } static int proc_single_open(struct inode *inode, struct file *filp) { return single_open(filp, proc_single_show, inode); } static const struct file_operations proc_single_file_operations = { .open = proc_single_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, }; struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) { struct task_struct *task = get_proc_task(inode); struct mm_struct *mm = ERR_PTR(-ESRCH); if (task) { mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); put_task_struct(task); if (!IS_ERR_OR_NULL(mm)) { /* ensure this mm_struct can't be freed */ mmgrab(mm); /* but do not pin its memory */ mmput(mm); } } return mm; } static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) { struct mm_struct *mm = proc_mem_open(inode, mode); if (IS_ERR(mm)) return PTR_ERR(mm); file->private_data = mm; return 0; } static int mem_open(struct inode *inode, struct file *file) { int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); /* OK to pass negative loff_t, we can catch out-of-range */ file->f_mode |= FMODE_UNSIGNED_OFFSET; return ret; } static ssize_t mem_rw(struct file *file, char __user *buf, size_t count, loff_t *ppos, int write) { struct mm_struct *mm = file->private_data; unsigned long addr = *ppos; ssize_t copied; char *page; unsigned int flags; if (!mm) return 0; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; copied = 0; if (!mmget_not_zero(mm)) goto free; flags = FOLL_FORCE | (write ? FOLL_WRITE : 0); while (count > 0) { size_t this_len = min_t(size_t, count, PAGE_SIZE); if (write && copy_from_user(page, buf, this_len)) { copied = -EFAULT; break; } this_len = access_remote_vm(mm, addr, page, this_len, flags); if (!this_len) { if (!copied) copied = -EIO; break; } if (!write && copy_to_user(buf, page, this_len)) { copied = -EFAULT; break; } buf += this_len; addr += this_len; copied += this_len; count -= this_len; } *ppos = addr; mmput(mm); free: free_page((unsigned long) page); return copied; } static ssize_t mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { return mem_rw(file, buf, count, ppos, 0); } static ssize_t mem_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { return mem_rw(file, (char __user*)buf, count, ppos, 1); } loff_t mem_lseek(struct file *file, loff_t offset, int orig) { switch (orig) { case 0: file->f_pos = offset; break; case 1: file->f_pos += offset; break; default: return -EINVAL; } force_successful_syscall_return(); return file->f_pos; } static int mem_release(struct inode *inode, struct file *file) { struct mm_struct *mm = file->private_data; if (mm) mmdrop(mm); return 0; } static const struct file_operations proc_mem_operations = { .llseek = mem_lseek, .read = mem_read, .write = mem_write, .open = mem_open, .release = mem_release, }; static int environ_open(struct inode *inode, struct file *file) { return __mem_open(inode, file, PTRACE_MODE_READ); } static ssize_t environ_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { char *page; unsigned long src = *ppos; int ret = 0; struct mm_struct *mm = file->private_data; unsigned long env_start, env_end; /* Ensure the process spawned far enough to have an environment. */ if (!mm || !mm->env_end) return 0; page = (char *)__get_free_page(GFP_KERNEL); if (!page) return -ENOMEM; ret = 0; if (!mmget_not_zero(mm)) goto free; spin_lock(&mm->arg_lock); env_start = mm->env_start; env_end = mm->env_end; spin_unlock(&mm->arg_lock); while (count > 0) { size_t this_len, max_len; int retval; if (src >= (env_end - env_start)) break; this_len = env_end - (env_start + src); max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); if (retval <= 0) { ret = retval; break; } if (copy_to_user(buf, page, retval)) { ret = -EFAULT; break; } ret += retval; src += retval; buf += retval; count -= retval; } *ppos = src; mmput(mm); free: free_page((unsigned long) page); return ret; } static const struct file_operations proc_environ_operations = { .open = environ_open, .read = environ_read, .llseek = generic_file_llseek, .release = mem_release, }; static int auxv_open(struct inode *inode, struct file *file) { return __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); } static ssize_t auxv_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct mm_struct *mm = file->private_data; unsigned int nwords = 0; if (!mm) return 0; do { nwords += 2; } while (mm->saved_auxv[nwords - 2] != 0); /* AT_NULL */ return simple_read_from_buffer(buf, count, ppos, mm->saved_auxv, nwords * sizeof(mm->saved_auxv[0])); } static const struct file_operations proc_auxv_operations = { .open = auxv_open, .read = auxv_read, .llseek = generic_file_llseek, .release = mem_release, }; static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; int oom_adj = OOM_ADJUST_MIN; size_t len; if (!task) return -ESRCH; if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) oom_adj = OOM_ADJUST_MAX; else oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / OOM_SCORE_ADJ_MAX; put_task_struct(task); if (oom_adj > OOM_ADJUST_MAX) oom_adj = OOM_ADJUST_MAX; len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) { struct mm_struct *mm = NULL; struct task_struct *task; int err = 0; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; mutex_lock(&oom_adj_mutex); if (legacy) { if (oom_adj < task->signal->oom_score_adj && !capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_unlock; } /* * /proc/pid/oom_adj is provided for legacy purposes, ask users to use * /proc/pid/oom_score_adj instead. */ pr_warn_once("%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", current->comm, task_pid_nr(current), task_pid_nr(task), task_pid_nr(task)); } else { if ((short)oom_adj < task->signal->oom_score_adj_min && !capable(CAP_SYS_RESOURCE)) { err = -EACCES; goto err_unlock; } } /* * Make sure we will check other processes sharing the mm if this is * not vfrok which wants its own oom_score_adj. * pin the mm so it doesn't go away and get reused after task_unlock */ if (!task->vfork_done) { struct task_struct *p = find_lock_task_mm(task); if (p) { if (test_bit(MMF_MULTIPROCESS, &p->mm->flags)) { mm = p->mm; mmgrab(mm); } task_unlock(p); } } task->signal->oom_score_adj = oom_adj; if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) task->signal->oom_score_adj_min = (short)oom_adj; trace_oom_score_adj_update(task); if (mm) { struct task_struct *p; rcu_read_lock(); for_each_process(p) { if (same_thread_group(task, p)) continue; /* do not touch kernel threads or the global init */ if (p->flags & PF_KTHREAD || is_global_init(p)) continue; task_lock(p); if (!p->vfork_done && process_shares_mm(p, mm)) { p->signal->oom_score_adj = oom_adj; if (!legacy && has_capability_noaudit(current, CAP_SYS_RESOURCE)) p->signal->oom_score_adj_min = (short)oom_adj; } task_unlock(p); } rcu_read_unlock(); mmdrop(mm); } err_unlock: mutex_unlock(&oom_adj_mutex); put_task_struct(task); return err; } /* * /proc/pid/oom_adj exists solely for backwards compatibility with previous * kernels. The effective policy is defined by oom_score_adj, which has a * different scale: oom_adj grew exponentially and oom_score_adj grows linearly. * Values written to oom_adj are simply mapped linearly to oom_score_adj. * Processes that become oom disabled via oom_adj will still be oom disabled * with this implementation. * * oom_adj cannot be removed since existing userspace binaries use it. */ static ssize_t oom_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char buffer[PROC_NUMBUF] = {}; int oom_adj; int err; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) { err = -EFAULT; goto out; } err = kstrtoint(strstrip(buffer), 0, &oom_adj); if (err) goto out; if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && oom_adj != OOM_DISABLE) { err = -EINVAL; goto out; } /* * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum * value is always attainable. */ if (oom_adj == OOM_ADJUST_MAX) oom_adj = OOM_SCORE_ADJ_MAX; else oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; err = __set_oom_adj(file, oom_adj, true); out: return err < 0 ? err : count; } static const struct file_operations proc_oom_adj_operations = { .read = oom_adj_read, .write = oom_adj_write, .llseek = generic_file_llseek, }; static ssize_t oom_score_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; short oom_score_adj = OOM_SCORE_ADJ_MIN; size_t len; if (!task) return -ESRCH; oom_score_adj = task->signal->oom_score_adj; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%hd\n", oom_score_adj); return simple_read_from_buffer(buf, count, ppos, buffer, len); } static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char buffer[PROC_NUMBUF] = {}; int oom_score_adj; int err; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) { err = -EFAULT; goto out; } err = kstrtoint(strstrip(buffer), 0, &oom_score_adj); if (err) goto out; if (oom_score_adj < OOM_SCORE_ADJ_MIN || oom_score_adj > OOM_SCORE_ADJ_MAX) { err = -EINVAL; goto out; } err = __set_oom_adj(file, oom_score_adj, false); out: return err < 0 ? err : count; } static const struct file_operations proc_oom_score_adj_operations = { .read = oom_score_adj_read, .write = oom_score_adj_write, .llseek = default_llseek, }; #ifdef CONFIG_AUDIT #define TMPBUFLEN 11 static ssize_t proc_loginuid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", from_kuid(file->f_cred->user_ns, audit_get_loginuid(task))); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } static ssize_t proc_loginuid_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); uid_t loginuid; kuid_t kloginuid; int rv; /* Don't let kthreads write their own loginuid */ if (current->flags & PF_KTHREAD) return -EPERM; rcu_read_lock(); if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) { rcu_read_unlock(); return -EPERM; } rcu_read_unlock(); if (*ppos != 0) { /* No partial writes. */ return -EINVAL; } rv = kstrtou32_from_user(buf, count, 10, &loginuid); if (rv < 0) return rv; /* is userspace tring to explicitly UNSET the loginuid? */ if (loginuid == AUDIT_UID_UNSET) { kloginuid = INVALID_UID; } else { kloginuid = make_kuid(file->f_cred->user_ns, loginuid); if (!uid_valid(kloginuid)) return -EINVAL; } rv = audit_set_loginuid(kloginuid); if (rv < 0) return rv; return count; } static const struct file_operations proc_loginuid_operations = { .read = proc_loginuid_read, .write = proc_loginuid_write, .llseek = generic_file_llseek, }; static ssize_t proc_sessionid_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); struct task_struct *task = get_proc_task(inode); ssize_t length; char tmpbuf[TMPBUFLEN]; if (!task) return -ESRCH; length = scnprintf(tmpbuf, TMPBUFLEN, "%u", audit_get_sessionid(task)); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, tmpbuf, length); } static const struct file_operations proc_sessionid_operations = { .read = proc_sessionid_read, .llseek = generic_file_llseek, }; #endif #ifdef CONFIG_FAULT_INJECTION static ssize_t proc_fault_inject_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); char buffer[PROC_NUMBUF]; size_t len; int make_it_fail; if (!task) return -ESRCH; make_it_fail = task->make_it_fail; put_task_struct(task); len = snprintf(buffer, sizeof(buffer), "%i\n", make_it_fail); return simple_read_from_buffer(buf, count, ppos, buffer, len); } static ssize_t proc_fault_inject_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct task_struct *task; char buffer[PROC_NUMBUF] = {}; int make_it_fail; int rv; if (!capable(CAP_SYS_RESOURCE)) return -EPERM; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; rv = kstrtoint(strstrip(buffer), 0, &make_it_fail); if (rv < 0) return rv; if (make_it_fail < 0 || make_it_fail > 1) return -EINVAL; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; task->make_it_fail = make_it_fail; put_task_struct(task); return count; } static const struct file_operations proc_fault_inject_operations = { .read = proc_fault_inject_read, .write = proc_fault_inject_write, .llseek = generic_file_llseek, }; static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; int err; unsigned int n; err = kstrtouint_from_user(buf, count, 0, &n); if (err) return err; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; task->fail_nth = n; put_task_struct(task); return count; } static ssize_t proc_fail_nth_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; char numbuf[PROC_NUMBUF]; ssize_t len; task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth); put_task_struct(task); return simple_read_from_buffer(buf, count, ppos, numbuf, len); } static const struct file_operations proc_fail_nth_operations = { .read = proc_fail_nth_read, .write = proc_fail_nth_write, }; #endif #ifdef CONFIG_SCHED_DEBUG /* * Print out various scheduling related per-task fields: */ static int sched_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct pid_namespace *ns = proc_pid_ns(inode->i_sb); struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_sched_show_task(p, ns, m); put_task_struct(p); return 0; } static ssize_t sched_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_sched_set_task(p); put_task_struct(p); return count; } static int sched_open(struct inode *inode, struct file *filp) { return single_open(filp, sched_show, inode); } static const struct file_operations proc_pid_sched_operations = { .open = sched_open, .read = seq_read, .write = sched_write, .llseek = seq_lseek, .release = single_release, }; #endif #ifdef CONFIG_SCHED_AUTOGROUP /* * Print out autogroup related information: */ static int sched_autogroup_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_sched_autogroup_show_task(p, m); put_task_struct(p); return 0; } static ssize_t sched_autogroup_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; char buffer[PROC_NUMBUF] = {}; int nice; int err; if (count > sizeof(buffer) - 1) count = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count)) return -EFAULT; err = kstrtoint(strstrip(buffer), 0, &nice); if (err < 0) return err; p = get_proc_task(inode); if (!p) return -ESRCH; err = proc_sched_autogroup_set_nice(p, nice); if (err) count = err; put_task_struct(p); return count; } static int sched_autogroup_open(struct inode *inode, struct file *filp) { int ret; ret = single_open(filp, sched_autogroup_show, NULL); if (!ret) { struct seq_file *m = filp->private_data; m->private = inode; } return ret; } static const struct file_operations proc_pid_sched_autogroup_operations = { .open = sched_autogroup_open, .read = seq_read, .write = sched_autogroup_write, .llseek = seq_lseek, .release = single_release, }; #endif /* CONFIG_SCHED_AUTOGROUP */ #ifdef CONFIG_TIME_NS static int timens_offsets_show(struct seq_file *m, void *v) { struct task_struct *p; p = get_proc_task(file_inode(m->file)); if (!p) return -ESRCH; proc_timens_show_offsets(p, m); put_task_struct(p); return 0; } static ssize_t timens_offsets_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct inode *inode = file_inode(file); struct proc_timens_offset offsets[2]; char *kbuf = NULL, *pos, *next_line; struct task_struct *p; int ret, noffsets; /* Only allow < page size writes at the beginning of the file */ if ((*ppos != 0) || (count >= PAGE_SIZE)) return -EINVAL; /* Slurp in the user data */ kbuf = memdup_user_nul(buf, count); if (IS_ERR(kbuf)) return PTR_ERR(kbuf); /* Parse the user data */ ret = -EINVAL; noffsets = 0; for (pos = kbuf; pos; pos = next_line) { struct proc_timens_offset *off = &offsets[noffsets]; char clock[10]; int err; /* Find the end of line and ensure we don't look past it */ next_line = strchr(pos, '\n'); if (next_line) { *next_line = '\0'; next_line++; if (*next_line == '\0') next_line = NULL; } err = sscanf(pos, "%9s %lld %lu", clock, &off->val.tv_sec, &off->val.tv_nsec); if (err != 3 || off->val.tv_nsec >= NSEC_PER_SEC) goto out; clock[sizeof(clock) - 1] = 0; if (strcmp(clock, "monotonic") == 0 || strcmp(clock, __stringify(CLOCK_MONOTONIC)) == 0) off->clockid = CLOCK_MONOTONIC; else if (strcmp(clock, "boottime") == 0 || strcmp(clock, __stringify(CLOCK_BOOTTIME)) == 0) off->clockid = CLOCK_BOOTTIME; else goto out; noffsets++; if (noffsets == ARRAY_SIZE(offsets)) { if (next_line) count = next_line - kbuf; break; } } ret = -ESRCH; p = get_proc_task(inode); if (!p) goto out; ret = proc_timens_set_offset(file, p, offsets, noffsets); put_task_struct(p); if (ret) goto out; ret = count; out: kfree(kbuf); return ret; } static int timens_offsets_open(struct inode *inode, struct file *filp) { return single_open(filp, timens_offsets_show, inode); } static const struct file_operations proc_timens_offsets_operations = { .open = timens_offsets_open, .read = seq_read, .write = timens_offsets_write, .llseek = seq_lseek, .release = single_release, }; #endif /* CONFIG_TIME_NS */ static ssize_t comm_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; char buffer[TASK_COMM_LEN] = {}; const size_t maxlen = sizeof(buffer) - 1; if (copy_from_user(buffer, buf, count > maxlen ? maxlen : count)) return -EFAULT; p = get_proc_task(inode); if (!p) return -ESRCH; if (same_thread_group(current, p)) { set_task_comm(p, buffer); proc_comm_connector(p); } else count = -EINVAL; put_task_struct(p); return count; } static int comm_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; p = get_proc_task(inode); if (!p) return -ESRCH; proc_task_name(m, p, false); seq_putc(m, '\n'); put_task_struct(p); return 0; } static int comm_open(struct inode *inode, struct file *filp) { return single_open(filp, comm_show, inode); } static const struct file_operations proc_pid_set_comm_operations = { .open = comm_open, .read = seq_read, .write = comm_write, .llseek = seq_lseek, .release = single_release, }; static int proc_exe_link(struct dentry *dentry, struct path *exe_path) { struct task_struct *task; struct file *exe_file; task = get_proc_task(d_inode(dentry)); if (!task) return -ENOENT; exe_file = get_task_exe_file(task); put_task_struct(task); if (exe_file) { *exe_path = exe_file->f_path; path_get(&exe_file->f_path); fput(exe_file); return 0; } else return -ENOENT; } static const char *proc_pid_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct path path; int error = -EACCES; if (!dentry) return ERR_PTR(-ECHILD); /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; error = PROC_I(inode)->op.proc_get_link(dentry, &path); if (error) goto out; error = nd_jump_link(&path); out: return ERR_PTR(error); } static int do_proc_readlink(const struct path *path, char __user *buffer, int buflen) { char *tmp = kmalloc(PATH_MAX, GFP_KERNEL); char *pathname; int len; if (!tmp) return -ENOMEM; pathname = d_path(path, tmp, PATH_MAX); len = PTR_ERR(pathname); if (IS_ERR(pathname)) goto out; len = tmp + PATH_MAX - 1 - pathname; if (len > buflen) len = buflen; if (copy_to_user(buffer, pathname, len)) len = -EFAULT; out: kfree(tmp); return len; } static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen) { int error = -EACCES; struct inode *inode = d_inode(dentry); struct path path; /* Are we allowed to snoop on the tasks file descriptors? */ if (!proc_fd_access_allowed(inode)) goto out; error = PROC_I(inode)->op.proc_get_link(dentry, &path); if (error) goto out; error = do_proc_readlink(&path, buffer, buflen); path_put(&path); out: return error; } const struct inode_operations proc_pid_link_inode_operations = { .readlink = proc_pid_readlink, .get_link = proc_pid_get_link, .setattr = proc_setattr, }; /* building an inode */ void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t *ruid, kgid_t *rgid) { /* Depending on the state of dumpable compute who should own a * proc file for a task. */ const struct cred *cred; kuid_t uid; kgid_t gid; if (unlikely(task->flags & PF_KTHREAD)) { *ruid = GLOBAL_ROOT_UID; *rgid = GLOBAL_ROOT_GID; return; } /* Default to the tasks effective ownership */ rcu_read_lock(); cred = __task_cred(task); uid = cred->euid; gid = cred->egid; rcu_read_unlock(); /* * Before the /proc/pid/status file was created the only way to read * the effective uid of a /process was to stat /proc/pid. Reading * /proc/pid/status is slow enough that procps and other packages * kept stating /proc/pid. To keep the rules in /proc simple I have * made this apply to all per process world readable and executable * directories. */ if (mode != (S_IFDIR|S_IRUGO|S_IXUGO)) { struct mm_struct *mm; task_lock(task); mm = task->mm; /* Make non-dumpable tasks owned by some root */ if (mm) { if (get_dumpable(mm) != SUID_DUMP_USER) { struct user_namespace *user_ns = mm->user_ns; uid = make_kuid(user_ns, 0); if (!uid_valid(uid)) uid = GLOBAL_ROOT_UID; gid = make_kgid(user_ns, 0); if (!gid_valid(gid)) gid = GLOBAL_ROOT_GID; } } else { uid = GLOBAL_ROOT_UID; gid = GLOBAL_ROOT_GID; } task_unlock(task); } *ruid = uid; *rgid = gid; } void proc_pid_evict_inode(struct proc_inode *ei) { struct pid *pid = ei->pid; if (S_ISDIR(ei->vfs_inode.i_mode)) { spin_lock(&pid->lock); hlist_del_init_rcu(&ei->sibling_inodes); spin_unlock(&pid->lock); } } struct inode *proc_pid_make_inode(struct super_block *sb, struct task_struct *task, umode_t mode) { struct inode * inode; struct proc_inode *ei; struct pid *pid; /* We need a new inode */ inode = new_inode(sb); if (!inode) goto out; /* Common stuff */ ei = PROC_I(inode); inode->i_mode = mode; inode->i_ino = get_next_ino(); simple_inode_init_ts(inode); inode->i_op = &proc_def_inode_operations; /* * grab the reference to task. */ pid = get_task_pid(task, PIDTYPE_PID); if (!pid) goto out_unlock; /* Let the pid remember us for quick removal */ ei->pid = pid; task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); out: return inode; out_unlock: iput(inode); return NULL; } /* * Generating an inode and adding it into @pid->inodes, so that task will * invalidate inode's dentry before being released. * * This helper is used for creating dir-type entries under '/proc' and * '/proc/<tgid>/task'. Other entries(eg. fd, stat) under '/proc/<tgid>' * can be released by invalidating '/proc/<tgid>' dentry. * In theory, dentries under '/proc/<tgid>/task' can also be released by * invalidating '/proc/<tgid>' dentry, we reserve it to handle single * thread exiting situation: Any one of threads should invalidate its * '/proc/<tgid>/task/<pid>' dentry before released. */ static struct inode *proc_pid_make_base_inode(struct super_block *sb, struct task_struct *task, umode_t mode) { struct inode *inode; struct proc_inode *ei; struct pid *pid; inode = proc_pid_make_inode(sb, task, mode); if (!inode) return NULL; /* Let proc_flush_pid find this directory inode */ ei = PROC_I(inode); pid = ei->pid; spin_lock(&pid->lock); hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); spin_unlock(&pid->lock); return inode; } int pid_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb); struct task_struct *task; generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); stat->uid = GLOBAL_ROOT_UID; stat->gid = GLOBAL_ROOT_GID; rcu_read_lock(); task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { if (!has_pid_permissions(fs_info, task, HIDEPID_INVISIBLE)) { rcu_read_unlock(); /* * This doesn't prevent learning whether PID exists, * it only makes getattr() consistent with readdir(). */ return -ENOENT; } task_dump_owner(task, inode->i_mode, &stat->uid, &stat->gid); } rcu_read_unlock(); return 0; } /* dentry stuff */ /* * Set <pid>/... inode ownership (can change due to setuid(), etc.) */ void pid_update_inode(struct task_struct *task, struct inode *inode) { task_dump_owner(task, inode->i_mode, &inode->i_uid, &inode->i_gid); inode->i_mode &= ~(S_ISUID | S_ISGID); security_task_to_inode(task, inode); } /* * Rewrite the inode's ownerships here because the owning task may have * performed a setuid(), etc. * */ static int pid_revalidate(struct dentry *dentry, unsigned int flags) { struct inode *inode; struct task_struct *task; int ret = 0; rcu_read_lock(); inode = d_inode_rcu(dentry); if (!inode) goto out; task = pid_task(proc_pid(inode), PIDTYPE_PID); if (task) { pid_update_inode(task, inode); ret = 1; } out: rcu_read_unlock(); return ret; } static inline bool proc_inode_is_dead(struct inode *inode) { return !proc_pid(inode)->tasks[PIDTYPE_PID].first; } int pid_delete_dentry(const struct dentry *dentry) { /* Is the task we represent dead? * If so, then don't put the dentry on the lru list, * kill it immediately. */ return proc_inode_is_dead(d_inode(dentry)); } const struct dentry_operations pid_dentry_operations = { .d_revalidate = pid_revalidate, .d_delete = pid_delete_dentry, }; /* Lookups */ /* * Fill a directory entry. * * If possible create the dcache entry and derive our inode number and * file type from dcache entry. * * Since all of the proc inode numbers are dynamically generated, the inode * numbers do not exist until the inode is cache. This means creating * the dcache entry in readdir is necessary to keep the inode numbers * reported by readdir in sync with the inode numbers reported * by stat. */ bool proc_fill_cache(struct file *file, struct dir_context *ctx, const char *name, unsigned int len, instantiate_t instantiate, struct task_struct *task, const void *ptr) { struct dentry *child, *dir = file->f_path.dentry; struct qstr qname = QSTR_INIT(name, len); struct inode *inode; unsigned type = DT_UNKNOWN; ino_t ino = 1; child = d_hash_and_lookup(dir, &qname); if (!child) { DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); child = d_alloc_parallel(dir, &qname, &wq); if (IS_ERR(child)) goto end_instantiate; if (d_in_lookup(child)) { struct dentry *res; res = instantiate(child, task, ptr); d_lookup_done(child); if (unlikely(res)) { dput(child); child = res; if (IS_ERR(child)) goto end_instantiate; } } } inode = d_inode(child); ino = inode->i_ino; type = inode->i_mode >> 12; dput(child); end_instantiate: return dir_emit(ctx, name, len, ino, type); } /* * dname_to_vma_addr - maps a dentry name into two unsigned longs * which represent vma start and end addresses. */ static int dname_to_vma_addr(struct dentry *dentry, unsigned long *start, unsigned long *end) { const char *str = dentry->d_name.name; unsigned long long sval, eval; unsigned int len; if (str[0] == '0' && str[1] != '-') return -EINVAL; len = _parse_integer(str, 16, &sval); if (len & KSTRTOX_OVERFLOW) return -EINVAL; if (sval != (unsigned long)sval) return -EINVAL; str += len; if (*str != '-') return -EINVAL; str++; if (str[0] == '0' && str[1]) return -EINVAL; len = _parse_integer(str, 16, &eval); if (len & KSTRTOX_OVERFLOW) return -EINVAL; if (eval != (unsigned long)eval) return -EINVAL; str += len; if (*str != '\0') return -EINVAL; *start = sval; *end = eval; return 0; } static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; bool exact_vma_exists = false; struct mm_struct *mm = NULL; struct task_struct *task; struct inode *inode; int status = 0; if (flags & LOOKUP_RCU) return -ECHILD; inode = d_inode(dentry); task = get_proc_task(inode); if (!task) goto out_notask; mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (IS_ERR_OR_NULL(mm)) goto out; if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { status = mmap_read_lock_killable(mm); if (!status) { exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); mmap_read_unlock(mm); } } mmput(mm); if (exact_vma_exists) { task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); status = 1; } out: put_task_struct(task); out_notask: return status; } static const struct dentry_operations tid_map_files_dentry_operations = { .d_revalidate = map_files_d_revalidate, .d_delete = pid_delete_dentry, }; static int map_files_get_link(struct dentry *dentry, struct path *path) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; struct task_struct *task; struct mm_struct *mm; int rc; rc = -ENOENT; task = get_proc_task(d_inode(dentry)); if (!task) goto out; mm = get_task_mm(task); put_task_struct(task); if (!mm) goto out; rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); if (rc) goto out_mmput; rc = mmap_read_lock_killable(mm); if (rc) goto out_mmput; rc = -ENOENT; vma = find_exact_vma(mm, vm_start, vm_end); if (vma && vma->vm_file) { *path = *file_user_path(vma->vm_file); path_get(path); rc = 0; } mmap_read_unlock(mm); out_mmput: mmput(mm); out: return rc; } struct map_files_info { unsigned long start; unsigned long end; fmode_t mode; }; /* * Only allow CAP_SYS_ADMIN and CAP_CHECKPOINT_RESTORE to follow the links, due * to concerns about how the symlinks may be used to bypass permissions on * ancestor directories in the path to the file in question. */ static const char * proc_map_files_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { if (!checkpoint_restore_ns_capable(&init_user_ns)) return ERR_PTR(-EPERM); return proc_pid_get_link(dentry, inode, done); } /* * Identical to proc_pid_link_inode_operations except for get_link() */ static const struct inode_operations proc_map_files_link_inode_operations = { .readlink = proc_pid_readlink, .get_link = proc_map_files_get_link, .setattr = proc_setattr, }; static struct dentry * proc_map_files_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { fmode_t mode = (fmode_t)(unsigned long)ptr; struct proc_inode *ei; struct inode *inode; inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | ((mode & FMODE_READ ) ? S_IRUSR : 0) | ((mode & FMODE_WRITE) ? S_IWUSR : 0)); if (!inode) return ERR_PTR(-ENOENT); ei = PROC_I(inode); ei->op.proc_get_link = map_files_get_link; inode->i_op = &proc_map_files_link_inode_operations; inode->i_size = 64; d_set_d_op(dentry, &tid_map_files_dentry_operations); return d_splice_alias(inode, dentry); } static struct dentry *proc_map_files_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { unsigned long vm_start, vm_end; struct vm_area_struct *vma; struct task_struct *task; struct dentry *result; struct mm_struct *mm; result = ERR_PTR(-ENOENT); task = get_proc_task(dir); if (!task) goto out; result = ERR_PTR(-EACCES); if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; result = ERR_PTR(-ENOENT); if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) goto out_put_task; mm = get_task_mm(task); if (!mm) goto out_put_task; result = ERR_PTR(-EINTR); if (mmap_read_lock_killable(mm)) goto out_put_mm; result = ERR_PTR(-ENOENT); vma = find_exact_vma(mm, vm_start, vm_end); if (!vma) goto out_no_vma; if (vma->vm_file) result = proc_map_files_instantiate(dentry, task, (void *)(unsigned long)vma->vm_file->f_mode); out_no_vma: mmap_read_unlock(mm); out_put_mm: mmput(mm); out_put_task: put_task_struct(task); out: return result; } static const struct inode_operations proc_map_files_inode_operations = { .lookup = proc_map_files_lookup, .permission = proc_fd_permission, .setattr = proc_setattr, }; static int proc_map_files_readdir(struct file *file, struct dir_context *ctx) { struct vm_area_struct *vma; struct task_struct *task; struct mm_struct *mm; unsigned long nr_files, pos, i; GENRADIX(struct map_files_info) fa; struct map_files_info *p; int ret; struct vma_iterator vmi; genradix_init(&fa); ret = -ENOENT; task = get_proc_task(file_inode(file)); if (!task) goto out; ret = -EACCES; if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; ret = 0; if (!dir_emit_dots(file, ctx)) goto out_put_task; mm = get_task_mm(task); if (!mm) goto out_put_task; ret = mmap_read_lock_killable(mm); if (ret) { mmput(mm); goto out_put_task; } nr_files = 0; /* * We need two passes here: * * 1) Collect vmas of mapped files with mmap_lock taken * 2) Release mmap_lock and instantiate entries * * otherwise we get lockdep complained, since filldir() * routine might require mmap_lock taken in might_fault(). */ pos = 2; vma_iter_init(&vmi, mm, 0); for_each_vma(vmi, vma) { if (!vma->vm_file) continue; if (++pos <= ctx->pos) continue; p = genradix_ptr_alloc(&fa, nr_files++, GFP_KERNEL); if (!p) { ret = -ENOMEM; mmap_read_unlock(mm); mmput(mm); goto out_put_task; } p->start = vma->vm_start; p->end = vma->vm_end; p->mode = vma->vm_file->f_mode; } mmap_read_unlock(mm); mmput(mm); for (i = 0; i < nr_files; i++) { char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ unsigned int len; p = genradix_ptr(&fa, i); len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end); if (!proc_fill_cache(file, ctx, buf, len, proc_map_files_instantiate, task, (void *)(unsigned long)p->mode)) break; ctx->pos++; } out_put_task: put_task_struct(task); out: genradix_free(&fa); return ret; } static const struct file_operations proc_map_files_operations = { .read = generic_read_dir, .iterate_shared = proc_map_files_readdir, .llseek = generic_file_llseek, }; #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) struct timers_private { struct pid *pid; struct task_struct *task; struct sighand_struct *sighand; struct pid_namespace *ns; unsigned long flags; }; static void *timers_start(struct seq_file *m, loff_t *pos) { struct timers_private *tp = m->private; tp->task = get_pid_task(tp->pid, PIDTYPE_PID); if (!tp->task) return ERR_PTR(-ESRCH); tp->sighand = lock_task_sighand(tp->task, &tp->flags); if (!tp->sighand) return ERR_PTR(-ESRCH); return seq_list_start(&tp->task->signal->posix_timers, *pos); } static void *timers_next(struct seq_file *m, void *v, loff_t *pos) { struct timers_private *tp = m->private; return seq_list_next(v, &tp->task->signal->posix_timers, pos); } static void timers_stop(struct seq_file *m, void *v) { struct timers_private *tp = m->private; if (tp->sighand) { unlock_task_sighand(tp->task, &tp->flags); tp->sighand = NULL; } if (tp->task) { put_task_struct(tp->task); tp->task = NULL; } } static int show_timer(struct seq_file *m, void *v) { struct k_itimer *timer; struct timers_private *tp = m->private; int notify; static const char * const nstr[] = { [SIGEV_SIGNAL] = "signal", [SIGEV_NONE] = "none", [SIGEV_THREAD] = "thread", }; timer = list_entry((struct list_head *)v, struct k_itimer, list); notify = timer->it_sigev_notify; seq_printf(m, "ID: %d\n", timer->it_id); seq_printf(m, "signal: %d/%px\n", timer->sigq->info.si_signo, timer->sigq->info.si_value.sival_ptr); seq_printf(m, "notify: %s/%s.%d\n", nstr[notify & ~SIGEV_THREAD_ID], (notify & SIGEV_THREAD_ID) ? "tid" : "pid", pid_nr_ns(timer->it_pid, tp->ns)); seq_printf(m, "ClockID: %d\n", timer->it_clock); return 0; } static const struct seq_operations proc_timers_seq_ops = { .start = timers_start, .next = timers_next, .stop = timers_stop, .show = show_timer, }; static int proc_timers_open(struct inode *inode, struct file *file) { struct timers_private *tp; tp = __seq_open_private(file, &proc_timers_seq_ops, sizeof(struct timers_private)); if (!tp) return -ENOMEM; tp->pid = proc_pid(inode); tp->ns = proc_pid_ns(inode->i_sb); return 0; } static const struct file_operations proc_timers_operations = { .open = proc_timers_open, .read = seq_read, .llseek = seq_lseek, .release = seq_release_private, }; #endif static ssize_t timerslack_ns_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct inode *inode = file_inode(file); struct task_struct *p; u64 slack_ns; int err; err = kstrtoull_from_user(buf, count, 10, &slack_ns); if (err < 0) return err; p = get_proc_task(inode); if (!p) return -ESRCH; if (p != current) { rcu_read_lock(); if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { rcu_read_unlock(); count = -EPERM; goto out; } rcu_read_unlock(); err = security_task_setscheduler(p); if (err) { count = err; goto out; } } task_lock(p); if (slack_ns == 0) p->timer_slack_ns = p->default_timer_slack_ns; else p->timer_slack_ns = slack_ns; task_unlock(p); out: put_task_struct(p); return count; } static int timerslack_ns_show(struct seq_file *m, void *v) { struct inode *inode = m->private; struct task_struct *p; int err = 0; p = get_proc_task(inode); if (!p) return -ESRCH; if (p != current) { rcu_read_lock(); if (!ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE)) { rcu_read_unlock(); err = -EPERM; goto out; } rcu_read_unlock(); err = security_task_getscheduler(p); if (err) goto out; } task_lock(p); seq_printf(m, "%llu\n", p->timer_slack_ns); task_unlock(p); out: put_task_struct(p); return err; } static int timerslack_ns_open(struct inode *inode, struct file *filp) { return single_open(filp, timerslack_ns_show, inode); } static const struct file_operations proc_pid_set_timerslack_ns_operations = { .open = timerslack_ns_open, .read = seq_read, .write = timerslack_ns_write, .llseek = seq_lseek, .release = single_release, }; static struct dentry *proc_pident_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { const struct pid_entry *p = ptr; struct inode *inode; struct proc_inode *ei; inode = proc_pid_make_inode(dentry->d_sb, task, p->mode); if (!inode) return ERR_PTR(-ENOENT); ei = PROC_I(inode); if (S_ISDIR(inode->i_mode)) set_nlink(inode, 2); /* Use getattr to fix if necessary */ if (p->iop) inode->i_op = p->iop; if (p->fop) inode->i_fop = p->fop; ei->op = p->op; pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); return d_splice_alias(inode, dentry); } static struct dentry *proc_pident_lookup(struct inode *dir, struct dentry *dentry, const struct pid_entry *p, const struct pid_entry *end) { struct task_struct *task = get_proc_task(dir); struct dentry *res = ERR_PTR(-ENOENT); if (!task) goto out_no_task; /* * Yes, it does not scale. And it should not. Don't add * new entries into /proc/<tgid>/ without very good reasons. */ for (; p < end; p++) { if (p->len != dentry->d_name.len) continue; if (!memcmp(dentry->d_name.name, p->name, p->len)) { res = proc_pident_instantiate(dentry, task, p); break; } } put_task_struct(task); out_no_task: return res; } static int proc_pident_readdir(struct file *file, struct dir_context *ctx, const struct pid_entry *ents, unsigned int nents) { struct task_struct *task = get_proc_task(file_inode(file)); const struct pid_entry *p; if (!task) return -ENOENT; if (!dir_emit_dots(file, ctx)) goto out; if (ctx->pos >= nents + 2) goto out; for (p = ents + (ctx->pos - 2); p < ents + nents; p++) { if (!proc_fill_cache(file, ctx, p->name, p->len, proc_pident_instantiate, task, p)) break; ctx->pos++; } out: put_task_struct(task); return 0; } #ifdef CONFIG_SECURITY static int proc_pid_attr_open(struct inode *inode, struct file *file) { file->private_data = NULL; __mem_open(inode, file, PTRACE_MODE_READ_FSCREDS); return 0; } static ssize_t proc_pid_attr_read(struct file * file, char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); char *p = NULL; ssize_t length; struct task_struct *task = get_proc_task(inode); if (!task) return -ESRCH; length = security_getprocattr(task, PROC_I(inode)->op.lsmid, file->f_path.dentry->d_name.name, &p); put_task_struct(task); if (length > 0) length = simple_read_from_buffer(buf, count, ppos, p, length); kfree(p); return length; } static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, size_t count, loff_t *ppos) { struct inode * inode = file_inode(file); struct task_struct *task; void *page; int rv; /* A task may only write when it was the opener. */ if (file->private_data != current->mm) return -EPERM; rcu_read_lock(); task = pid_task(proc_pid(inode), PIDTYPE_PID); if (!task) { rcu_read_unlock(); return -ESRCH; } /* A task may only write its own attributes. */ if (current != task) { rcu_read_unlock(); return -EACCES; } /* Prevent changes to overridden credentials. */ if (current_cred() != current_real_cred()) { rcu_read_unlock(); return -EBUSY; } rcu_read_unlock(); if (count > PAGE_SIZE) count = PAGE_SIZE; /* No partial writes. */ if (*ppos != 0) return -EINVAL; page = memdup_user(buf, count); if (IS_ERR(page)) { rv = PTR_ERR(page); goto out; } /* Guard against adverse ptrace interaction */ rv = mutex_lock_interruptible(&current->signal->cred_guard_mutex); if (rv < 0) goto out_free; rv = security_setprocattr(PROC_I(inode)->op.lsmid, file->f_path.dentry->d_name.name, page, count); mutex_unlock(&current->signal->cred_guard_mutex); out_free: kfree(page); out: return rv; } static const struct file_operations proc_pid_attr_operations = { .open = proc_pid_attr_open, .read = proc_pid_attr_read, .write = proc_pid_attr_write, .llseek = generic_file_llseek, .release = mem_release, }; #define LSM_DIR_OPS(LSM) \ static int proc_##LSM##_attr_dir_iterate(struct file *filp, \ struct dir_context *ctx) \ { \ return proc_pident_readdir(filp, ctx, \ LSM##_attr_dir_stuff, \ ARRAY_SIZE(LSM##_attr_dir_stuff)); \ } \ \ static const struct file_operations proc_##LSM##_attr_dir_ops = { \ .read = generic_read_dir, \ .iterate_shared = proc_##LSM##_attr_dir_iterate, \ .llseek = default_llseek, \ }; \ \ static struct dentry *proc_##LSM##_attr_dir_lookup(struct inode *dir, \ struct dentry *dentry, unsigned int flags) \ { \ return proc_pident_lookup(dir, dentry, \ LSM##_attr_dir_stuff, \ LSM##_attr_dir_stuff + ARRAY_SIZE(LSM##_attr_dir_stuff)); \ } \ \ static const struct inode_operations proc_##LSM##_attr_dir_inode_ops = { \ .lookup = proc_##LSM##_attr_dir_lookup, \ .getattr = pid_getattr, \ .setattr = proc_setattr, \ } #ifdef CONFIG_SECURITY_SMACK static const struct pid_entry smack_attr_dir_stuff[] = { ATTR(LSM_ID_SMACK, "current", 0666), }; LSM_DIR_OPS(smack); #endif #ifdef CONFIG_SECURITY_APPARMOR static const struct pid_entry apparmor_attr_dir_stuff[] = { ATTR(LSM_ID_APPARMOR, "current", 0666), ATTR(LSM_ID_APPARMOR, "prev", 0444), ATTR(LSM_ID_APPARMOR, "exec", 0666), }; LSM_DIR_OPS(apparmor); #endif static const struct pid_entry attr_dir_stuff[] = { ATTR(LSM_ID_UNDEF, "current", 0666), ATTR(LSM_ID_UNDEF, "prev", 0444), ATTR(LSM_ID_UNDEF, "exec", 0666), ATTR(LSM_ID_UNDEF, "fscreate", 0666), ATTR(LSM_ID_UNDEF, "keycreate", 0666), ATTR(LSM_ID_UNDEF, "sockcreate", 0666), #ifdef CONFIG_SECURITY_SMACK DIR("smack", 0555, proc_smack_attr_dir_inode_ops, proc_smack_attr_dir_ops), #endif #ifdef CONFIG_SECURITY_APPARMOR DIR("apparmor", 0555, proc_apparmor_attr_dir_inode_ops, proc_apparmor_attr_dir_ops), #endif }; static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) { return proc_pident_readdir(file, ctx, attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); } static const struct file_operations proc_attr_dir_operations = { .read = generic_read_dir, .iterate_shared = proc_attr_dir_readdir, .llseek = generic_file_llseek, }; static struct dentry *proc_attr_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, attr_dir_stuff, attr_dir_stuff + ARRAY_SIZE(attr_dir_stuff)); } static const struct inode_operations proc_attr_dir_inode_operations = { .lookup = proc_attr_dir_lookup, .getattr = pid_getattr, .setattr = proc_setattr, }; #endif #ifdef CONFIG_ELF_CORE static ssize_t proc_coredump_filter_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task = get_proc_task(file_inode(file)); struct mm_struct *mm; char buffer[PROC_NUMBUF]; size_t len; int ret; if (!task) return -ESRCH; ret = 0; mm = get_task_mm(task); if (mm) { len = snprintf(buffer, sizeof(buffer), "%08lx\n", ((mm->flags & MMF_DUMP_FILTER_MASK) >> MMF_DUMP_FILTER_SHIFT)); mmput(mm); ret = simple_read_from_buffer(buf, count, ppos, buffer, len); } put_task_struct(task); return ret; } static ssize_t proc_coredump_filter_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct task_struct *task; struct mm_struct *mm; unsigned int val; int ret; int i; unsigned long mask; ret = kstrtouint_from_user(buf, count, 0, &val); if (ret < 0) return ret; ret = -ESRCH; task = get_proc_task(file_inode(file)); if (!task) goto out_no_task; mm = get_task_mm(task); if (!mm) goto out_no_mm; ret = 0; for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { if (val & mask) set_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); else clear_bit(i + MMF_DUMP_FILTER_SHIFT, &mm->flags); } mmput(mm); out_no_mm: put_task_struct(task); out_no_task: if (ret < 0) return ret; return count; } static const struct file_operations proc_coredump_filter_operations = { .read = proc_coredump_filter_read, .write = proc_coredump_filter_write, .llseek = generic_file_llseek, }; #endif #ifdef CONFIG_TASK_IO_ACCOUNTING static int do_io_accounting(struct task_struct *task, struct seq_file *m, int whole) { struct task_io_accounting acct; int result; result = down_read_killable(&task->signal->exec_update_lock); if (result) return result; if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { result = -EACCES; goto out_unlock; } if (whole) { struct signal_struct *sig = task->signal; struct task_struct *t; unsigned int seq = 1; unsigned long flags; rcu_read_lock(); do { seq++; /* 2 on the 1st/lockless path, otherwise odd */ flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); acct = sig->ioac; __for_each_thread(sig, t) task_io_accounting_add(&acct, &t->ioac); } while (need_seqretry(&sig->stats_lock, seq)); done_seqretry_irqrestore(&sig->stats_lock, seq, flags); rcu_read_unlock(); } else { acct = task->ioac; } seq_printf(m, "rchar: %llu\n" "wchar: %llu\n" "syscr: %llu\n" "syscw: %llu\n" "read_bytes: %llu\n" "write_bytes: %llu\n" "cancelled_write_bytes: %llu\n", (unsigned long long)acct.rchar, (unsigned long long)acct.wchar, (unsigned long long)acct.syscr, (unsigned long long)acct.syscw, (unsigned long long)acct.read_bytes, (unsigned long long)acct.write_bytes, (unsigned long long)acct.cancelled_write_bytes); result = 0; out_unlock: up_read(&task->signal->exec_update_lock); return result; } static int proc_tid_io_accounting(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return do_io_accounting(task, m, 0); } static int proc_tgid_io_accounting(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { return do_io_accounting(task, m, 1); } #endif /* CONFIG_TASK_IO_ACCOUNTING */ #ifdef CONFIG_USER_NS static int proc_id_map_open(struct inode *inode, struct file *file, const struct seq_operations *seq_ops) { struct user_namespace *ns = NULL; struct task_struct *task; struct seq_file *seq; int ret = -EINVAL; task = get_proc_task(inode); if (task) { rcu_read_lock(); ns = get_user_ns(task_cred_xxx(task, user_ns)); rcu_read_unlock(); put_task_struct(task); } if (!ns) goto err; ret = seq_open(file, seq_ops); if (ret) goto err_put_ns; seq = file->private_data; seq->private = ns; return 0; err_put_ns: put_user_ns(ns); err: return ret; } static int proc_id_map_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; put_user_ns(ns); return seq_release(inode, file); } static int proc_uid_map_open(struct inode *inode, struct file *file) { return proc_id_map_open(inode, file, &proc_uid_seq_operations); } static int proc_gid_map_open(struct inode *inode, struct file *file) { return proc_id_map_open(inode, file, &proc_gid_seq_operations); } static int proc_projid_map_open(struct inode *inode, struct file *file) { return proc_id_map_open(inode, file, &proc_projid_seq_operations); } static const struct file_operations proc_uid_map_operations = { .open = proc_uid_map_open, .write = proc_uid_map_write, .read = seq_read, .llseek = seq_lseek, .release = proc_id_map_release, }; static const struct file_operations proc_gid_map_operations = { .open = proc_gid_map_open, .write = proc_gid_map_write, .read = seq_read, .llseek = seq_lseek, .release = proc_id_map_release, }; static const struct file_operations proc_projid_map_operations = { .open = proc_projid_map_open, .write = proc_projid_map_write, .read = seq_read, .llseek = seq_lseek, .release = proc_id_map_release, }; static int proc_setgroups_open(struct inode *inode, struct file *file) { struct user_namespace *ns = NULL; struct task_struct *task; int ret; ret = -ESRCH; task = get_proc_task(inode); if (task) { rcu_read_lock(); ns = get_user_ns(task_cred_xxx(task, user_ns)); rcu_read_unlock(); put_task_struct(task); } if (!ns) goto err; if (file->f_mode & FMODE_WRITE) { ret = -EACCES; if (!ns_capable(ns, CAP_SYS_ADMIN)) goto err_put_ns; } ret = single_open(file, &proc_setgroups_show, ns); if (ret) goto err_put_ns; return 0; err_put_ns: put_user_ns(ns); err: return ret; } static int proc_setgroups_release(struct inode *inode, struct file *file) { struct seq_file *seq = file->private_data; struct user_namespace *ns = seq->private; int ret = single_release(inode, file); put_user_ns(ns); return ret; } static const struct file_operations proc_setgroups_operations = { .open = proc_setgroups_open, .write = proc_setgroups_write, .read = seq_read, .llseek = seq_lseek, .release = proc_setgroups_release, }; #endif /* CONFIG_USER_NS */ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { int err = lock_trace(task); if (!err) { seq_printf(m, "%08x\n", task->personality); unlock_trace(task); } return err; } #ifdef CONFIG_LIVEPATCH static int proc_pid_patch_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { seq_printf(m, "%d\n", task->patch_state); return 0; } #endif /* CONFIG_LIVEPATCH */ #ifdef CONFIG_KSM static int proc_pid_ksm_merging_pages(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { struct mm_struct *mm; mm = get_task_mm(task); if (mm) { seq_printf(m, "%lu\n", mm->ksm_merging_pages); mmput(mm); } return 0; } static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { struct mm_struct *mm; mm = get_task_mm(task); if (mm) { seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items); seq_printf(m, "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm)); seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages); seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm)); mmput(mm); } return 0; } #endif /* CONFIG_KSM */ #ifdef CONFIG_STACKLEAK_METRICS static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task) { unsigned long prev_depth = THREAD_SIZE - (task->prev_lowest_stack & (THREAD_SIZE - 1)); unsigned long depth = THREAD_SIZE - (task->lowest_stack & (THREAD_SIZE - 1)); seq_printf(m, "previous stack depth: %lu\nstack depth: %lu\n", prev_depth, depth); return 0; } #endif /* CONFIG_STACKLEAK_METRICS */ /* * Thread groups */ static const struct file_operations proc_task_operations; static const struct inode_operations proc_task_inode_operations; static const struct pid_entry tgid_base_stuff[] = { DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations), DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), REG("auxv", S_IRUSR, proc_auxv_operations), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif #ifdef CONFIG_SCHED_AUTOGROUP REG("autogroup", S_IRUGO|S_IWUSR, proc_pid_sched_autogroup_operations), #endif #ifdef CONFIG_TIME_NS REG("timens_offsets", S_IRUGO|S_IWUSR, proc_timens_offsets_operations), #endif REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tgid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), LNK("root", proc_root_link), LNK("exe", proc_exe_link), REG("mounts", S_IRUGO, proc_mounts_operations), REG("mountinfo", S_IRUGO, proc_mountinfo_operations), REG("mountstats", S_IRUSR, proc_mountstats_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS ONE("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif #ifdef CONFIG_PROC_CPU_RESCTRL ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), REG("fail-nth", 0644, proc_fail_nth_operations), #endif #ifdef CONFIG_ELF_CORE REG("coredump_filter", S_IRUGO|S_IWUSR, proc_coredump_filter_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING ONE("io", S_IRUSR, proc_tgid_io_accounting), #endif #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) REG("timers", S_IRUGO, proc_timers_operations), #endif REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif #ifdef CONFIG_STACKLEAK_METRICS ONE("stack_depth", S_IRUGO, proc_stack_depth), #endif #ifdef CONFIG_PROC_PID_ARCH_STATUS ONE("arch_status", S_IRUGO, proc_pid_arch_status), #endif #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif }; static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) { return proc_pident_readdir(file, ctx, tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); } static const struct file_operations proc_tgid_base_operations = { .read = generic_read_dir, .iterate_shared = proc_tgid_base_readdir, .llseek = generic_file_llseek, }; struct pid *tgid_pidfd_to_pid(const struct file *file) { if (file->f_op != &proc_tgid_base_operations) return ERR_PTR(-EBADF); return proc_pid(file_inode(file)); } static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, tgid_base_stuff, tgid_base_stuff + ARRAY_SIZE(tgid_base_stuff)); } static const struct inode_operations proc_tgid_base_inode_operations = { .lookup = proc_tgid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, .permission = proc_pid_permission, }; /** * proc_flush_pid - Remove dcache entries for @pid from the /proc dcache. * @pid: pid that should be flushed. * * This function walks a list of inodes (that belong to any proc * filesystem) that are attached to the pid and flushes them from * the dentry cache. * * It is safe and reasonable to cache /proc entries for a task until * that task exits. After that they just clog up the dcache with * useless entries, possibly causing useful dcache entries to be * flushed instead. This routine is provided to flush those useless * dcache entries when a process is reaped. * * NOTE: This routine is just an optimization so it does not guarantee * that no dcache entries will exist after a process is reaped * it just makes it very unlikely that any will persist. */ void proc_flush_pid(struct pid *pid) { proc_invalidate_siblings_dcache(&pid->inodes, &pid->lock); } static struct dentry *proc_pid_instantiate(struct dentry * dentry, struct task_struct *task, const void *ptr) { struct inode *inode; inode = proc_pid_make_base_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); inode->i_op = &proc_tgid_base_inode_operations; inode->i_fop = &proc_tgid_base_operations; inode->i_flags|=S_IMMUTABLE; set_nlink(inode, nlink_tgid); pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); return d_splice_alias(inode, dentry); } struct dentry *proc_pid_lookup(struct dentry *dentry, unsigned int flags) { struct task_struct *task; unsigned tgid; struct proc_fs_info *fs_info; struct pid_namespace *ns; struct dentry *result = ERR_PTR(-ENOENT); tgid = name_to_int(&dentry->d_name); if (tgid == ~0U) goto out; fs_info = proc_sb_info(dentry->d_sb); ns = fs_info->pid_ns; rcu_read_lock(); task = find_task_by_pid_ns(tgid, ns); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) goto out; /* Limit procfs to only ptraceable tasks */ if (fs_info->hide_pid == HIDEPID_NOT_PTRACEABLE) { if (!has_pid_permissions(fs_info, task, HIDEPID_NO_ACCESS)) goto out_put_task; } result = proc_pid_instantiate(dentry, task, NULL); out_put_task: put_task_struct(task); out: return result; } /* * Find the first task with tgid >= tgid * */ struct tgid_iter { unsigned int tgid; struct task_struct *task; }; static struct tgid_iter next_tgid(struct pid_namespace *ns, struct tgid_iter iter) { struct pid *pid; if (iter.task) put_task_struct(iter.task); rcu_read_lock(); retry: iter.task = NULL; pid = find_ge_pid(iter.tgid, ns); if (pid) { iter.tgid = pid_nr_ns(pid, ns); iter.task = pid_task(pid, PIDTYPE_TGID); if (!iter.task) { iter.tgid += 1; goto retry; } get_task_struct(iter.task); } rcu_read_unlock(); return iter; } #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2) /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file *file, struct dir_context *ctx) { struct tgid_iter iter; struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb); struct pid_namespace *ns = proc_pid_ns(file_inode(file)->i_sb); loff_t pos = ctx->pos; if (pos >= PID_MAX_LIMIT + TGID_OFFSET) return 0; if (pos == TGID_OFFSET - 2) { struct inode *inode = d_inode(fs_info->proc_self); if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } if (pos == TGID_OFFSET - 1) { struct inode *inode = d_inode(fs_info->proc_thread_self); if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK)) return 0; ctx->pos = pos = pos + 1; } iter.tgid = pos - TGID_OFFSET; iter.task = NULL; for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { char name[10 + 1]; unsigned int len; cond_resched(); if (!has_pid_permissions(fs_info, iter.task, HIDEPID_INVISIBLE)) continue; len = snprintf(name, sizeof(name), "%u", iter.tgid); ctx->pos = iter.tgid + TGID_OFFSET; if (!proc_fill_cache(file, ctx, name, len, proc_pid_instantiate, iter.task, NULL)) { put_task_struct(iter.task); return 0; } } ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; return 0; } /* * proc_tid_comm_permission is a special permission function exclusively * used for the node /proc/<pid>/task/<tid>/comm. * It bypasses generic permission checks in the case where a task of the same * task group attempts to access the node. * The rationale behind this is that glibc and bionic access this node for * cross thread naming (pthread_set/getname_np(!self)). However, if * PR_SET_DUMPABLE gets set to 0 this node among others becomes uid=0 gid=0, * which locks out the cross thread naming implementation. * This function makes sure that the node is always accessible for members of * same thread group. */ static int proc_tid_comm_permission(struct mnt_idmap *idmap, struct inode *inode, int mask) { bool is_same_tgroup; struct task_struct *task; task = get_proc_task(inode); if (!task) return -ESRCH; is_same_tgroup = same_thread_group(current, task); put_task_struct(task); if (likely(is_same_tgroup && !(mask & MAY_EXEC))) { /* This file (/proc/<pid>/task/<tid>/comm) can always be * read or written by the members of the corresponding * thread group. */ return 0; } return generic_permission(&nop_mnt_idmap, inode, mask); } static const struct inode_operations proc_tid_comm_inode_operations = { .setattr = proc_setattr, .permission = proc_tid_comm_permission, }; /* * Tasks */ static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUGO|S_IXUGO, proc_fdinfo_inode_operations, proc_fdinfo_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), #endif REG("environ", S_IRUSR, proc_environ_operations), REG("auxv", S_IRUSR, proc_auxv_operations), ONE("status", S_IRUGO, proc_pid_status), ONE("personality", S_IRUSR, proc_pid_personality), ONE("limits", S_IRUGO, proc_pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), #endif NOD("comm", S_IFREG|S_IRUGO|S_IWUSR, &proc_tid_comm_inode_operations, &proc_pid_set_comm_operations, {}), #ifdef CONFIG_HAVE_ARCH_TRACEHOOK ONE("syscall", S_IRUSR, proc_pid_syscall), #endif REG("cmdline", S_IRUGO, proc_pid_cmdline_ops), ONE("stat", S_IRUGO, proc_tid_stat), ONE("statm", S_IRUGO, proc_pid_statm), REG("maps", S_IRUGO, proc_pid_maps_operations), #ifdef CONFIG_PROC_CHILDREN REG("children", S_IRUGO, proc_tid_children_operations), #endif #ifdef CONFIG_NUMA REG("numa_maps", S_IRUGO, proc_pid_numa_maps_operations), #endif REG("mem", S_IRUSR|S_IWUSR, proc_mem_operations), LNK("cwd", proc_cwd_link), LNK("root", proc_root_link), LNK("exe", proc_exe_link), REG("mounts", S_IRUGO, proc_mounts_operations), REG("mountinfo", S_IRUGO, proc_mountinfo_operations), #ifdef CONFIG_PROC_PAGE_MONITOR REG("clear_refs", S_IWUSR, proc_clear_refs_operations), REG("smaps", S_IRUGO, proc_pid_smaps_operations), REG("smaps_rollup", S_IRUGO, proc_pid_smaps_rollup_operations), REG("pagemap", S_IRUSR, proc_pagemap_operations), #endif #ifdef CONFIG_SECURITY DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), #endif #ifdef CONFIG_KALLSYMS ONE("wchan", S_IRUGO, proc_pid_wchan), #endif #ifdef CONFIG_STACKTRACE ONE("stack", S_IRUSR, proc_pid_stack), #endif #ifdef CONFIG_SCHED_INFO ONE("schedstat", S_IRUGO, proc_pid_schedstat), #endif #ifdef CONFIG_LATENCYTOP REG("latency", S_IRUGO, proc_lstats_operations), #endif #ifdef CONFIG_PROC_PID_CPUSET ONE("cpuset", S_IRUGO, proc_cpuset_show), #endif #ifdef CONFIG_CGROUPS ONE("cgroup", S_IRUGO, proc_cgroup_show), #endif #ifdef CONFIG_PROC_CPU_RESCTRL ONE("cpu_resctrl_groups", S_IRUGO, proc_resctrl_show), #endif ONE("oom_score", S_IRUGO, proc_oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDIT REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), REG("sessionid", S_IRUGO, proc_sessionid_operations), #endif #ifdef CONFIG_FAULT_INJECTION REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations), REG("fail-nth", 0644, proc_fail_nth_operations), #endif #ifdef CONFIG_TASK_IO_ACCOUNTING ONE("io", S_IRUSR, proc_tid_io_accounting), #endif #ifdef CONFIG_USER_NS REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations), REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations), REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif #ifdef CONFIG_LIVEPATCH ONE("patch_state", S_IRUSR, proc_pid_patch_state), #endif #ifdef CONFIG_PROC_PID_ARCH_STATUS ONE("arch_status", S_IRUGO, proc_pid_arch_status), #endif #ifdef CONFIG_SECCOMP_CACHE_DEBUG ONE("seccomp_cache", S_IRUSR, proc_pid_seccomp_cache), #endif #ifdef CONFIG_KSM ONE("ksm_merging_pages", S_IRUSR, proc_pid_ksm_merging_pages), ONE("ksm_stat", S_IRUSR, proc_pid_ksm_stat), #endif }; static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) { return proc_pident_readdir(file, ctx, tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); } static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { return proc_pident_lookup(dir, dentry, tid_base_stuff, tid_base_stuff + ARRAY_SIZE(tid_base_stuff)); } static const struct file_operations proc_tid_base_operations = { .read = generic_read_dir, .iterate_shared = proc_tid_base_readdir, .llseek = generic_file_llseek, }; static const struct inode_operations proc_tid_base_inode_operations = { .lookup = proc_tid_base_lookup, .getattr = pid_getattr, .setattr = proc_setattr, }; static struct dentry *proc_task_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { struct inode *inode; inode = proc_pid_make_base_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); inode->i_op = &proc_tid_base_inode_operations; inode->i_fop = &proc_tid_base_operations; inode->i_flags |= S_IMMUTABLE; set_nlink(inode, nlink_tid); pid_update_inode(task, inode); d_set_d_op(dentry, &pid_dentry_operations); return d_splice_alias(inode, dentry); } static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; struct proc_fs_info *fs_info; struct pid_namespace *ns; struct dentry *result = ERR_PTR(-ENOENT); if (!leader) goto out_no_task; tid = name_to_int(&dentry->d_name); if (tid == ~0U) goto out; fs_info = proc_sb_info(dentry->d_sb); ns = fs_info->pid_ns; rcu_read_lock(); task = find_task_by_pid_ns(tid, ns); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) goto out; if (!same_thread_group(leader, task)) goto out_drop_task; result = proc_task_instantiate(dentry, task, NULL); out_drop_task: put_task_struct(task); out: put_task_struct(leader); out_no_task: return result; } /* * Find the first tid of a thread group to return to user space. * * Usually this is just the thread group leader, but if the users * buffer was too small or there was a seek into the middle of the * directory we have more work todo. * * In the case of a short read we start with find_task_by_pid. * * In the case of a seek we start with the leader and walk nr * threads past it. */ static struct task_struct *first_tid(struct pid *pid, int tid, loff_t f_pos, struct pid_namespace *ns) { struct task_struct *pos, *task; unsigned long nr = f_pos; if (nr != f_pos) /* 32bit overflow? */ return NULL; rcu_read_lock(); task = pid_task(pid, PIDTYPE_PID); if (!task) goto fail; /* Attempt to start with the tid of a thread */ if (tid && nr) { pos = find_task_by_pid_ns(tid, ns); if (pos && same_thread_group(pos, task)) goto found; } /* If nr exceeds the number of threads there is nothing todo */ if (nr >= get_nr_threads(task)) goto fail; /* If we haven't found our starting place yet start * with the leader and walk nr threads forward. */ for_each_thread(task, pos) { if (!nr--) goto found; } fail: pos = NULL; goto out; found: get_task_struct(pos); out: rcu_read_unlock(); return pos; } /* * Find the next thread in the thread list. * Return NULL if there is an error or no next thread. * * The reference to the input task_struct is released. */ static struct task_struct *next_tid(struct task_struct *start) { struct task_struct *pos = NULL; rcu_read_lock(); if (pid_alive(start)) { pos = __next_thread(start); if (pos) get_task_struct(pos); } rcu_read_unlock(); put_task_struct(start); return pos; } /* for the /proc/TGID/task/ directories */ static int proc_task_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct task_struct *task; struct pid_namespace *ns; int tid; if (proc_inode_is_dead(inode)) return -ENOENT; if (!dir_emit_dots(file, ctx)) return 0; /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ ns = proc_pid_ns(inode->i_sb); tid = (int)file->f_version; file->f_version = 0; for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { char name[10 + 1]; unsigned int len; tid = task_pid_nr_ns(task, ns); if (!tid) continue; /* The task has just exited. */ len = snprintf(name, sizeof(name), "%u", tid); if (!proc_fill_cache(file, ctx, name, len, proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first * pid for the next readir call */ file->f_version = (u64)tid; put_task_struct(task); break; } } return 0; } static int proc_task_getattr(struct mnt_idmap *idmap, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { struct inode *inode = d_inode(path->dentry); struct task_struct *p = get_proc_task(inode); generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat); if (p) { stat->nlink += get_nr_threads(p); put_task_struct(p); } return 0; } static const struct inode_operations proc_task_inode_operations = { .lookup = proc_task_lookup, .getattr = proc_task_getattr, .setattr = proc_setattr, .permission = proc_pid_permission, }; static const struct file_operations proc_task_operations = { .read = generic_read_dir, .iterate_shared = proc_task_readdir, .llseek = generic_file_llseek, }; void __init set_proc_pid_nlink(void) { nlink_tid = pid_entry_nlink(tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); nlink_tgid = pid_entry_nlink(tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); }
6 6 11 1 11 7 7 7 1 6 6 6 17 17 17 17 15 17 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 // SPDX-License-Identifier: GPL-2.0-or-later /* * Generic MIDI synth driver for ALSA sequencer * Copyright (c) 1998 by Frank van de Pol <fvdpol@coil.demon.nl> * Jaroslav Kysela <perex@perex.cz> */ /* Possible options for midisynth module: - automatic opening of midi ports on first received event or subscription (close will be performed when client leaves) */ #include <linux/init.h> #include <linux/slab.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/module.h> #include <linux/mutex.h> #include <sound/core.h> #include <sound/rawmidi.h> #include <sound/seq_kernel.h> #include <sound/seq_device.h> #include <sound/seq_midi_event.h> #include <sound/initval.h> MODULE_AUTHOR("Frank van de Pol <fvdpol@coil.demon.nl>, Jaroslav Kysela <perex@perex.cz>"); MODULE_DESCRIPTION("Advanced Linux Sound Architecture sequencer MIDI synth."); MODULE_LICENSE("GPL"); static int output_buffer_size = PAGE_SIZE; module_param(output_buffer_size, int, 0644); MODULE_PARM_DESC(output_buffer_size, "Output buffer size in bytes."); static int input_buffer_size = PAGE_SIZE; module_param(input_buffer_size, int, 0644); MODULE_PARM_DESC(input_buffer_size, "Input buffer size in bytes."); /* data for this midi synth driver */ struct seq_midisynth { struct snd_card *card; struct snd_rawmidi *rmidi; int device; int subdevice; struct snd_rawmidi_file input_rfile; struct snd_rawmidi_file output_rfile; int seq_client; int seq_port; struct snd_midi_event *parser; }; struct seq_midisynth_client { int seq_client; int num_ports; int ports_per_device[SNDRV_RAWMIDI_DEVICES]; struct seq_midisynth *ports[SNDRV_RAWMIDI_DEVICES]; }; static struct seq_midisynth_client *synths[SNDRV_CARDS]; static DEFINE_MUTEX(register_mutex); /* handle rawmidi input event (MIDI v1.0 stream) */ static void snd_midi_input_event(struct snd_rawmidi_substream *substream) { struct snd_rawmidi_runtime *runtime; struct seq_midisynth *msynth; struct snd_seq_event ev; char buf[16], *pbuf; long res; if (substream == NULL) return; runtime = substream->runtime; msynth = runtime->private_data; if (msynth == NULL) return; memset(&ev, 0, sizeof(ev)); while (runtime->avail > 0) { res = snd_rawmidi_kernel_read(substream, buf, sizeof(buf)); if (res <= 0) continue; if (msynth->parser == NULL) continue; pbuf = buf; while (res-- > 0) { if (!snd_midi_event_encode_byte(msynth->parser, *pbuf++, &ev)) continue; ev.source.port = msynth->seq_port; ev.dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS; snd_seq_kernel_client_dispatch(msynth->seq_client, &ev, 1, 0); /* clear event and reset header */ memset(&ev, 0, sizeof(ev)); } } } static int dump_midi(struct snd_rawmidi_substream *substream, const char *buf, int count) { struct snd_rawmidi_runtime *runtime; int tmp; if (snd_BUG_ON(!substream || !buf)) return -EINVAL; runtime = substream->runtime; tmp = runtime->avail; if (tmp < count) { if (printk_ratelimit()) pr_err("ALSA: seq_midi: MIDI output buffer overrun\n"); return -ENOMEM; } if (snd_rawmidi_kernel_write(substream, buf, count) < count) return -EINVAL; return 0; } /* callback for snd_seq_dump_var_event(), bridging to dump_midi() */ static int __dump_midi(void *ptr, void *buf, int count) { return dump_midi(ptr, buf, count); } static int event_process_midi(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop) { struct seq_midisynth *msynth = private_data; unsigned char msg[10]; /* buffer for constructing midi messages */ struct snd_rawmidi_substream *substream; int len; if (snd_BUG_ON(!msynth)) return -EINVAL; substream = msynth->output_rfile.output; if (substream == NULL) return -ENODEV; if (ev->type == SNDRV_SEQ_EVENT_SYSEX) { /* special case, to save space */ if ((ev->flags & SNDRV_SEQ_EVENT_LENGTH_MASK) != SNDRV_SEQ_EVENT_LENGTH_VARIABLE) { /* invalid event */ pr_debug("ALSA: seq_midi: invalid sysex event flags = 0x%x\n", ev->flags); return 0; } snd_seq_dump_var_event(ev, __dump_midi, substream); snd_midi_event_reset_decode(msynth->parser); } else { if (msynth->parser == NULL) return -EIO; len = snd_midi_event_decode(msynth->parser, msg, sizeof(msg), ev); if (len < 0) return 0; if (dump_midi(substream, msg, len) < 0) snd_midi_event_reset_decode(msynth->parser); } return 0; } static int snd_seq_midisynth_new(struct seq_midisynth *msynth, struct snd_card *card, int device, int subdevice) { if (snd_midi_event_new(MAX_MIDI_EVENT_BUF, &msynth->parser) < 0) return -ENOMEM; msynth->card = card; msynth->device = device; msynth->subdevice = subdevice; return 0; } /* open associated midi device for input */ static int midisynth_subscribe(void *private_data, struct snd_seq_port_subscribe *info) { int err; struct seq_midisynth *msynth = private_data; struct snd_rawmidi_runtime *runtime; struct snd_rawmidi_params params; /* open midi port */ err = snd_rawmidi_kernel_open(msynth->rmidi, msynth->subdevice, SNDRV_RAWMIDI_LFLG_INPUT, &msynth->input_rfile); if (err < 0) { pr_debug("ALSA: seq_midi: midi input open failed!!!\n"); return err; } runtime = msynth->input_rfile.input->runtime; memset(&params, 0, sizeof(params)); params.avail_min = 1; params.buffer_size = input_buffer_size; err = snd_rawmidi_input_params(msynth->input_rfile.input, &params); if (err < 0) { snd_rawmidi_kernel_release(&msynth->input_rfile); return err; } snd_midi_event_reset_encode(msynth->parser); runtime->event = snd_midi_input_event; runtime->private_data = msynth; snd_rawmidi_kernel_read(msynth->input_rfile.input, NULL, 0); return 0; } /* close associated midi device for input */ static int midisynth_unsubscribe(void *private_data, struct snd_seq_port_subscribe *info) { int err; struct seq_midisynth *msynth = private_data; if (snd_BUG_ON(!msynth->input_rfile.input)) return -EINVAL; err = snd_rawmidi_kernel_release(&msynth->input_rfile); return err; } /* open associated midi device for output */ static int midisynth_use(void *private_data, struct snd_seq_port_subscribe *info) { int err; struct seq_midisynth *msynth = private_data; struct snd_rawmidi_params params; /* open midi port */ err = snd_rawmidi_kernel_open(msynth->rmidi, msynth->subdevice, SNDRV_RAWMIDI_LFLG_OUTPUT, &msynth->output_rfile); if (err < 0) { pr_debug("ALSA: seq_midi: midi output open failed!!!\n"); return err; } memset(&params, 0, sizeof(params)); params.avail_min = 1; params.buffer_size = output_buffer_size; params.no_active_sensing = 1; err = snd_rawmidi_output_params(msynth->output_rfile.output, &params); if (err < 0) { snd_rawmidi_kernel_release(&msynth->output_rfile); return err; } snd_midi_event_reset_decode(msynth->parser); return 0; } /* close associated midi device for output */ static int midisynth_unuse(void *private_data, struct snd_seq_port_subscribe *info) { struct seq_midisynth *msynth = private_data; if (snd_BUG_ON(!msynth->output_rfile.output)) return -EINVAL; snd_rawmidi_drain_output(msynth->output_rfile.output); return snd_rawmidi_kernel_release(&msynth->output_rfile); } /* delete given midi synth port */ static void snd_seq_midisynth_delete(struct seq_midisynth *msynth) { if (msynth == NULL) return; if (msynth->seq_client > 0) { /* delete port */ snd_seq_event_port_detach(msynth->seq_client, msynth->seq_port); } snd_midi_event_free(msynth->parser); } /* register new midi synth port */ static int snd_seq_midisynth_probe(struct device *_dev) { struct snd_seq_device *dev = to_seq_dev(_dev); struct seq_midisynth_client *client; struct seq_midisynth *msynth, *ms; struct snd_seq_port_info *port __free(kfree) = NULL; struct snd_rawmidi_info *info __free(kfree) = NULL; struct snd_rawmidi *rmidi = dev->private_data; int newclient = 0; unsigned int p, ports; struct snd_seq_port_callback pcallbacks; struct snd_card *card = dev->card; int device = dev->device; unsigned int input_count = 0, output_count = 0; if (snd_BUG_ON(!card || device < 0 || device >= SNDRV_RAWMIDI_DEVICES)) return -EINVAL; info = kmalloc(sizeof(*info), GFP_KERNEL); if (! info) return -ENOMEM; info->device = device; info->stream = SNDRV_RAWMIDI_STREAM_OUTPUT; info->subdevice = 0; if (snd_rawmidi_info_select(card, info) >= 0) output_count = info->subdevices_count; info->stream = SNDRV_RAWMIDI_STREAM_INPUT; if (snd_rawmidi_info_select(card, info) >= 0) { input_count = info->subdevices_count; } ports = output_count; if (ports < input_count) ports = input_count; if (ports == 0) return -ENODEV; if (ports > (256 / SNDRV_RAWMIDI_DEVICES)) ports = 256 / SNDRV_RAWMIDI_DEVICES; guard(mutex)(&register_mutex); client = synths[card->number]; if (client == NULL) { newclient = 1; client = kzalloc(sizeof(*client), GFP_KERNEL); if (client == NULL) return -ENOMEM; client->seq_client = snd_seq_create_kernel_client( card, 0, "%s", card->shortname[0] ? (const char *)card->shortname : "External MIDI"); if (client->seq_client < 0) { kfree(client); return -ENOMEM; } } msynth = kcalloc(ports, sizeof(struct seq_midisynth), GFP_KERNEL); port = kmalloc(sizeof(*port), GFP_KERNEL); if (msynth == NULL || port == NULL) goto __nomem; for (p = 0; p < ports; p++) { ms = &msynth[p]; ms->rmidi = rmidi; if (snd_seq_midisynth_new(ms, card, device, p) < 0) goto __nomem; /* declare port */ memset(port, 0, sizeof(*port)); port->addr.client = client->seq_client; port->addr.port = device * (256 / SNDRV_RAWMIDI_DEVICES) + p; port->flags = SNDRV_SEQ_PORT_FLG_GIVEN_PORT; memset(info, 0, sizeof(*info)); info->device = device; if (p < output_count) info->stream = SNDRV_RAWMIDI_STREAM_OUTPUT; else info->stream = SNDRV_RAWMIDI_STREAM_INPUT; info->subdevice = p; if (snd_rawmidi_info_select(card, info) >= 0) strcpy(port->name, info->subname); if (! port->name[0]) { if (info->name[0]) { if (ports > 1) scnprintf(port->name, sizeof(port->name), "%s-%u", info->name, p); else scnprintf(port->name, sizeof(port->name), "%s", info->name); } else { /* last resort */ if (ports > 1) sprintf(port->name, "MIDI %d-%d-%u", card->number, device, p); else sprintf(port->name, "MIDI %d-%d", card->number, device); } } if ((info->flags & SNDRV_RAWMIDI_INFO_OUTPUT) && p < output_count) port->capability |= SNDRV_SEQ_PORT_CAP_WRITE | SNDRV_SEQ_PORT_CAP_SYNC_WRITE | SNDRV_SEQ_PORT_CAP_SUBS_WRITE; if ((info->flags & SNDRV_RAWMIDI_INFO_INPUT) && p < input_count) port->capability |= SNDRV_SEQ_PORT_CAP_READ | SNDRV_SEQ_PORT_CAP_SYNC_READ | SNDRV_SEQ_PORT_CAP_SUBS_READ; if ((port->capability & (SNDRV_SEQ_PORT_CAP_WRITE|SNDRV_SEQ_PORT_CAP_READ)) == (SNDRV_SEQ_PORT_CAP_WRITE|SNDRV_SEQ_PORT_CAP_READ) && info->flags & SNDRV_RAWMIDI_INFO_DUPLEX) port->capability |= SNDRV_SEQ_PORT_CAP_DUPLEX; if (port->capability & SNDRV_SEQ_PORT_CAP_READ) port->direction |= SNDRV_SEQ_PORT_DIR_INPUT; if (port->capability & SNDRV_SEQ_PORT_CAP_WRITE) port->direction |= SNDRV_SEQ_PORT_DIR_OUTPUT; port->type = SNDRV_SEQ_PORT_TYPE_MIDI_GENERIC | SNDRV_SEQ_PORT_TYPE_HARDWARE | SNDRV_SEQ_PORT_TYPE_PORT; port->midi_channels = 16; memset(&pcallbacks, 0, sizeof(pcallbacks)); pcallbacks.owner = THIS_MODULE; pcallbacks.private_data = ms; pcallbacks.subscribe = midisynth_subscribe; pcallbacks.unsubscribe = midisynth_unsubscribe; pcallbacks.use = midisynth_use; pcallbacks.unuse = midisynth_unuse; pcallbacks.event_input = event_process_midi; port->kernel = &pcallbacks; if (rmidi->ops && rmidi->ops->get_port_info) rmidi->ops->get_port_info(rmidi, p, port); if (snd_seq_kernel_client_ctl(client->seq_client, SNDRV_SEQ_IOCTL_CREATE_PORT, port)<0) goto __nomem; ms->seq_client = client->seq_client; ms->seq_port = port->addr.port; } client->ports_per_device[device] = ports; client->ports[device] = msynth; client->num_ports++; if (newclient) synths[card->number] = client; return 0; /* success */ __nomem: if (msynth != NULL) { for (p = 0; p < ports; p++) snd_seq_midisynth_delete(&msynth[p]); kfree(msynth); } if (newclient) { snd_seq_delete_kernel_client(client->seq_client); kfree(client); } return -ENOMEM; } /* release midi synth port */ static int snd_seq_midisynth_remove(struct device *_dev) { struct snd_seq_device *dev = to_seq_dev(_dev); struct seq_midisynth_client *client; struct seq_midisynth *msynth; struct snd_card *card = dev->card; int device = dev->device, p, ports; guard(mutex)(&register_mutex); client = synths[card->number]; if (client == NULL || client->ports[device] == NULL) return -ENODEV; ports = client->ports_per_device[device]; client->ports_per_device[device] = 0; msynth = client->ports[device]; client->ports[device] = NULL; for (p = 0; p < ports; p++) snd_seq_midisynth_delete(&msynth[p]); kfree(msynth); client->num_ports--; if (client->num_ports <= 0) { snd_seq_delete_kernel_client(client->seq_client); synths[card->number] = NULL; kfree(client); } return 0; } static struct snd_seq_driver seq_midisynth_driver = { .driver = { .name = KBUILD_MODNAME, .probe = snd_seq_midisynth_probe, .remove = snd_seq_midisynth_remove, }, .id = SNDRV_SEQ_DEV_ID_MIDISYNTH, .argsize = 0, }; module_snd_seq_driver(seq_midisynth_driver);
16 16 12 13 5 2 15 15 15 15 15 15 15 15 15 15 12 3 13 2 15 2 13 2 2 2 2 2 2 15 15 15 4 4 4 4 4 4 6 6 6 6 1 5 5 5 6 15 11 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Marek Lindner, Simon Wunderlich */ #include "bat_iv_ogm.h" #include "main.h" #include <linux/atomic.h> #include <linux/bitmap.h> #include <linux/bitops.h> #include <linux/bug.h> #include <linux/byteorder/generic.h> #include <linux/cache.h> #include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/init.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/netlink.h> #include <linux/pkt_sched.h> #include <linux/printk.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/types.h> #include <linux/workqueue.h> #include <net/genetlink.h> #include <net/netlink.h> #include <uapi/linux/batadv_packet.h> #include <uapi/linux/batman_adv.h> #include "bat_algo.h" #include "bitarray.h" #include "gateway_client.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "netlink.h" #include "network-coding.h" #include "originator.h" #include "routing.h" #include "send.h" #include "translation-table.h" #include "tvlv.h" static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work); /** * enum batadv_dup_status - duplicate status */ enum batadv_dup_status { /** @BATADV_NO_DUP: the packet is no duplicate */ BATADV_NO_DUP = 0, /** * @BATADV_ORIG_DUP: OGM is a duplicate in the originator (but not for * the neighbor) */ BATADV_ORIG_DUP, /** @BATADV_NEIGH_DUP: OGM is a duplicate for the neighbor */ BATADV_NEIGH_DUP, /** * @BATADV_PROTECTED: originator is currently protected (after reboot) */ BATADV_PROTECTED, }; /** * batadv_ring_buffer_set() - update the ring buffer with the given value * @lq_recv: pointer to the ring buffer * @lq_index: index to store the value at * @value: value to store in the ring buffer */ static void batadv_ring_buffer_set(u8 lq_recv[], u8 *lq_index, u8 value) { lq_recv[*lq_index] = value; *lq_index = (*lq_index + 1) % BATADV_TQ_GLOBAL_WINDOW_SIZE; } /** * batadv_ring_buffer_avg() - compute the average of all non-zero values stored * in the given ring buffer * @lq_recv: pointer to the ring buffer * * Return: computed average value. */ static u8 batadv_ring_buffer_avg(const u8 lq_recv[]) { const u8 *ptr; u16 count = 0; u16 i = 0; u16 sum = 0; ptr = lq_recv; while (i < BATADV_TQ_GLOBAL_WINDOW_SIZE) { if (*ptr != 0) { count++; sum += *ptr; } i++; ptr++; } if (count == 0) return 0; return (u8)(sum / count); } /** * batadv_iv_ogm_orig_get() - retrieve or create (if does not exist) an * originator * @bat_priv: the bat priv with all the soft interface information * @addr: mac address of the originator * * Return: the originator object corresponding to the passed mac address or NULL * on failure. * If the object does not exist, it is created and initialised. */ static struct batadv_orig_node * batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) { struct batadv_orig_node *orig_node; int hash_added; orig_node = batadv_orig_hash_find(bat_priv, addr); if (orig_node) return orig_node; orig_node = batadv_orig_node_new(bat_priv, addr); if (!orig_node) return NULL; spin_lock_init(&orig_node->bat_iv.ogm_cnt_lock); kref_get(&orig_node->refcount); hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, batadv_choose_orig, orig_node, &orig_node->hash_entry); if (hash_added != 0) goto free_orig_node_hash; return orig_node; free_orig_node_hash: /* reference for batadv_hash_add */ batadv_orig_node_put(orig_node); /* reference from batadv_orig_node_new */ batadv_orig_node_put(orig_node); return NULL; } static struct batadv_neigh_node * batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface, const u8 *neigh_addr, struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh) { struct batadv_neigh_node *neigh_node; neigh_node = batadv_neigh_node_get_or_create(orig_node, hard_iface, neigh_addr); if (!neigh_node) goto out; neigh_node->orig_node = orig_neigh; out: return neigh_node; } static int batadv_iv_ogm_iface_enable(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; unsigned char *ogm_buff; u32 random_seqno; mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); /* randomize initial seqno to avoid collision */ get_random_bytes(&random_seqno, sizeof(random_seqno)); atomic_set(&hard_iface->bat_iv.ogm_seqno, random_seqno); hard_iface->bat_iv.ogm_buff_len = BATADV_OGM_HLEN; ogm_buff = kmalloc(hard_iface->bat_iv.ogm_buff_len, GFP_ATOMIC); if (!ogm_buff) { mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); return -ENOMEM; } hard_iface->bat_iv.ogm_buff = ogm_buff; batadv_ogm_packet = (struct batadv_ogm_packet *)ogm_buff; batadv_ogm_packet->packet_type = BATADV_IV_OGM; batadv_ogm_packet->version = BATADV_COMPAT_VERSION; batadv_ogm_packet->ttl = 2; batadv_ogm_packet->flags = BATADV_NO_FLAGS; batadv_ogm_packet->reserved = 0; batadv_ogm_packet->tq = BATADV_TQ_MAX_VALUE; mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); return 0; } static void batadv_iv_ogm_iface_disable(struct batadv_hard_iface *hard_iface) { mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); kfree(hard_iface->bat_iv.ogm_buff); hard_iface->bat_iv.ogm_buff = NULL; mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_iface_update_mac(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; void *ogm_buff; mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); ogm_buff = hard_iface->bat_iv.ogm_buff; if (!ogm_buff) goto unlock; batadv_ogm_packet = ogm_buff; ether_addr_copy(batadv_ogm_packet->orig, hard_iface->net_dev->dev_addr); ether_addr_copy(batadv_ogm_packet->prev_sender, hard_iface->net_dev->dev_addr); unlock: mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } static void batadv_iv_ogm_primary_iface_set(struct batadv_hard_iface *hard_iface) { struct batadv_ogm_packet *batadv_ogm_packet; void *ogm_buff; mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); ogm_buff = hard_iface->bat_iv.ogm_buff; if (!ogm_buff) goto unlock; batadv_ogm_packet = ogm_buff; batadv_ogm_packet->ttl = BATADV_TTL; unlock: mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } /* when do we schedule our own ogm to be sent */ static unsigned long batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv) { unsigned int msecs; msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER; msecs += get_random_u32_below(2 * BATADV_JITTER); return jiffies + msecs_to_jiffies(msecs); } /* when do we schedule a ogm packet to be sent */ static unsigned long batadv_iv_ogm_fwd_send_time(void) { return jiffies + msecs_to_jiffies(get_random_u32_below(BATADV_JITTER / 2)); } /* apply hop penalty for a normal link */ static u8 batadv_hop_penalty(u8 tq, const struct batadv_priv *bat_priv) { int hop_penalty = atomic_read(&bat_priv->hop_penalty); int new_tq; new_tq = tq * (BATADV_TQ_MAX_VALUE - hop_penalty); new_tq /= BATADV_TQ_MAX_VALUE; return new_tq; } /** * batadv_iv_ogm_aggr_packet() - checks if there is another OGM attached * @buff_pos: current position in the skb * @packet_len: total length of the skb * @ogm_packet: potential OGM in buffer * * Return: true if there is enough space for another OGM, false otherwise. */ static bool batadv_iv_ogm_aggr_packet(int buff_pos, int packet_len, const struct batadv_ogm_packet *ogm_packet) { int next_buff_pos = 0; /* check if there is enough space for the header */ next_buff_pos += buff_pos + sizeof(*ogm_packet); if (next_buff_pos > packet_len) return false; /* check if there is enough space for the optional TVLV */ next_buff_pos += ntohs(ogm_packet->tvlv_len); return (next_buff_pos <= packet_len) && (next_buff_pos <= BATADV_MAX_AGGREGATION_BYTES); } /* send a batman ogm to a given interface */ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); const char *fwd_str; u8 packet_num; s16 buff_pos; struct batadv_ogm_packet *batadv_ogm_packet; struct sk_buff *skb; u8 *packet_pos; if (hard_iface->if_status != BATADV_IF_ACTIVE) return; packet_num = 0; buff_pos = 0; packet_pos = forw_packet->skb->data; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; /* adjust all flags and log packets */ while (batadv_iv_ogm_aggr_packet(buff_pos, forw_packet->packet_len, batadv_ogm_packet)) { /* we might have aggregated direct link packets with an * ordinary base packet */ if (forw_packet->direct_link_flags & BIT(packet_num) && forw_packet->if_incoming == hard_iface) batadv_ogm_packet->flags |= BATADV_DIRECTLINK; else batadv_ogm_packet->flags &= ~BATADV_DIRECTLINK; if (packet_num > 0 || !forw_packet->own) fwd_str = "Forwarding"; else fwd_str = "Sending own"; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s %spacket (originator %pM, seqno %u, TQ %d, TTL %d, IDF %s) on interface %s [%pM]\n", fwd_str, (packet_num > 0 ? "aggregated " : ""), batadv_ogm_packet->orig, ntohl(batadv_ogm_packet->seqno), batadv_ogm_packet->tq, batadv_ogm_packet->ttl, ((batadv_ogm_packet->flags & BATADV_DIRECTLINK) ? "on" : "off"), hard_iface->net_dev->name, hard_iface->net_dev->dev_addr); buff_pos += BATADV_OGM_HLEN; buff_pos += ntohs(batadv_ogm_packet->tvlv_len); packet_num++; packet_pos = forw_packet->skb->data + buff_pos; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_pos; } /* create clone because function is called more than once */ skb = skb_clone(forw_packet->skb, GFP_ATOMIC); if (skb) { batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_TX); batadv_add_counter(bat_priv, BATADV_CNT_MGMT_TX_BYTES, skb->len + ETH_HLEN); batadv_send_broadcast_skb(skb, hard_iface); } } /* send a batman ogm packet */ static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) { struct net_device *soft_iface; if (!forw_packet->if_incoming) { pr_err("Error - can't forward packet: incoming iface not specified\n"); return; } soft_iface = forw_packet->if_incoming->soft_iface; if (WARN_ON(!forw_packet->if_outgoing)) return; if (forw_packet->if_outgoing->soft_iface != soft_iface) { pr_warn("%s: soft interface switch for queued OGM\n", __func__); return; } if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) return; /* only for one specific outgoing interface */ batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing); } /** * batadv_iv_ogm_can_aggregate() - find out if an OGM can be aggregated on an * existing forward packet * @new_bat_ogm_packet: OGM packet to be aggregated * @bat_priv: the bat priv with all the soft interface information * @packet_len: (total) length of the OGM * @send_time: timestamp (jiffies) when the packet is to be sent * @directlink: true if this is a direct link packet * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * @forw_packet: the forwarded packet which should be checked * * Return: true if new_packet can be aggregated with forw_packet */ static bool batadv_iv_ogm_can_aggregate(const struct batadv_ogm_packet *new_bat_ogm_packet, struct batadv_priv *bat_priv, int packet_len, unsigned long send_time, bool directlink, const struct batadv_hard_iface *if_incoming, const struct batadv_hard_iface *if_outgoing, const struct batadv_forw_packet *forw_packet) { struct batadv_ogm_packet *batadv_ogm_packet; int aggregated_bytes = forw_packet->packet_len + packet_len; struct batadv_hard_iface *primary_if = NULL; bool res = false; unsigned long aggregation_end_time; batadv_ogm_packet = (struct batadv_ogm_packet *)forw_packet->skb->data; aggregation_end_time = send_time; aggregation_end_time += msecs_to_jiffies(BATADV_MAX_AGGREGATION_MS); /* we can aggregate the current packet to this aggregated packet * if: * * - the send time is within our MAX_AGGREGATION_MS time * - the resulting packet won't be bigger than * MAX_AGGREGATION_BYTES * otherwise aggregation is not possible */ if (!time_before(send_time, forw_packet->send_time) || !time_after_eq(aggregation_end_time, forw_packet->send_time)) return false; if (aggregated_bytes > BATADV_MAX_AGGREGATION_BYTES) return false; /* packet is not leaving on the same interface. */ if (forw_packet->if_outgoing != if_outgoing) return false; /* check aggregation compatibility * -> direct link packets are broadcasted on * their interface only * -> aggregate packet if the current packet is * a "global" packet as well as the base * packet */ primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) return false; /* packets without direct link flag and high TTL * are flooded through the net */ if (!directlink && !(batadv_ogm_packet->flags & BATADV_DIRECTLINK) && batadv_ogm_packet->ttl != 1 && /* own packets originating non-primary * interfaces leave only that interface */ (!forw_packet->own || forw_packet->if_incoming == primary_if)) { res = true; goto out; } /* if the incoming packet is sent via this one * interface only - we still can aggregate */ if (directlink && new_bat_ogm_packet->ttl == 1 && forw_packet->if_incoming == if_incoming && /* packets from direct neighbors or * own secondary interface packets * (= secondary interface packets in general) */ (batadv_ogm_packet->flags & BATADV_DIRECTLINK || (forw_packet->own && forw_packet->if_incoming != primary_if))) { res = true; goto out; } out: batadv_hardif_put(primary_if); return res; } /** * batadv_iv_ogm_aggregate_new() - create a new aggregated packet and add this * packet to it. * @packet_buff: pointer to the OGM * @packet_len: (total) length of the OGM * @send_time: timestamp (jiffies) when the packet is to be sent * @direct_link: whether this OGM has direct link status * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * @own_packet: true if it is a self-generated ogm */ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, int packet_len, unsigned long send_time, bool direct_link, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, int own_packet) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_forw_packet *forw_packet_aggr; struct sk_buff *skb; unsigned char *skb_buff; unsigned int skb_size; atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left; if (atomic_read(&bat_priv->aggregated_ogms) && packet_len < BATADV_MAX_AGGREGATION_BYTES) skb_size = BATADV_MAX_AGGREGATION_BYTES; else skb_size = packet_len; skb_size += ETH_HLEN; skb = netdev_alloc_skb_ip_align(NULL, skb_size); if (!skb) return; forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, queue_left, bat_priv, skb); if (!forw_packet_aggr) { kfree_skb(skb); return; } forw_packet_aggr->skb->priority = TC_PRIO_CONTROL; skb_reserve(forw_packet_aggr->skb, ETH_HLEN); skb_buff = skb_put(forw_packet_aggr->skb, packet_len); forw_packet_aggr->packet_len = packet_len; memcpy(skb_buff, packet_buff, packet_len); forw_packet_aggr->own = own_packet; forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS; forw_packet_aggr->send_time = send_time; /* save packet direct link flag status */ if (direct_link) forw_packet_aggr->direct_link_flags |= 1; INIT_DELAYED_WORK(&forw_packet_aggr->delayed_work, batadv_iv_send_outstanding_bat_ogm_packet); batadv_forw_packet_ogmv1_queue(bat_priv, forw_packet_aggr, send_time); } /* aggregate a new packet into the existing ogm packet */ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, const unsigned char *packet_buff, int packet_len, bool direct_link) { unsigned long new_direct_link_flag; skb_put_data(forw_packet_aggr->skb, packet_buff, packet_len); forw_packet_aggr->packet_len += packet_len; forw_packet_aggr->num_packets++; /* save packet direct link flag status */ if (direct_link) { new_direct_link_flag = BIT(forw_packet_aggr->num_packets); forw_packet_aggr->direct_link_flags |= new_direct_link_flag; } } /** * batadv_iv_ogm_queue_add() - queue up an OGM for transmission * @bat_priv: the bat priv with all the soft interface information * @packet_buff: pointer to the OGM * @packet_len: (total) length of the OGM * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * @own_packet: true if it is a self-generated ogm * @send_time: timestamp (jiffies) when the packet is to be sent */ static void batadv_iv_ogm_queue_add(struct batadv_priv *bat_priv, unsigned char *packet_buff, int packet_len, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, int own_packet, unsigned long send_time) { /* _aggr -> pointer to the packet we want to aggregate with * _pos -> pointer to the position in the queue */ struct batadv_forw_packet *forw_packet_aggr = NULL; struct batadv_forw_packet *forw_packet_pos = NULL; struct batadv_ogm_packet *batadv_ogm_packet; bool direct_link; unsigned long max_aggregation_jiffies; batadv_ogm_packet = (struct batadv_ogm_packet *)packet_buff; direct_link = !!(batadv_ogm_packet->flags & BATADV_DIRECTLINK); max_aggregation_jiffies = msecs_to_jiffies(BATADV_MAX_AGGREGATION_MS); /* find position for the packet in the forward queue */ spin_lock_bh(&bat_priv->forw_bat_list_lock); /* own packets are not to be aggregated */ if (atomic_read(&bat_priv->aggregated_ogms) && !own_packet) { hlist_for_each_entry(forw_packet_pos, &bat_priv->forw_bat_list, list) { if (batadv_iv_ogm_can_aggregate(batadv_ogm_packet, bat_priv, packet_len, send_time, direct_link, if_incoming, if_outgoing, forw_packet_pos)) { forw_packet_aggr = forw_packet_pos; break; } } } /* nothing to aggregate with - either aggregation disabled or no * suitable aggregation packet found */ if (!forw_packet_aggr) { /* the following section can run without the lock */ spin_unlock_bh(&bat_priv->forw_bat_list_lock); /* if we could not aggregate this packet with one of the others * we hold it back for a while, so that it might be aggregated * later on */ if (!own_packet && atomic_read(&bat_priv->aggregated_ogms)) send_time += max_aggregation_jiffies; batadv_iv_ogm_aggregate_new(packet_buff, packet_len, send_time, direct_link, if_incoming, if_outgoing, own_packet); } else { batadv_iv_ogm_aggregate(forw_packet_aggr, packet_buff, packet_len, direct_link); spin_unlock_bh(&bat_priv->forw_bat_list_lock); } } static void batadv_iv_ogm_forward(struct batadv_orig_node *orig_node, const struct ethhdr *ethhdr, struct batadv_ogm_packet *batadv_ogm_packet, bool is_single_hop_neigh, bool is_from_best_next_hop, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); u16 tvlv_len; if (batadv_ogm_packet->ttl <= 1) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "ttl exceeded\n"); return; } if (!is_from_best_next_hop) { /* Mark the forwarded packet when it is not coming from our * best next hop. We still need to forward the packet for our * neighbor link quality detection to work in case the packet * originated from a single hop neighbor. Otherwise we can * simply drop the ogm. */ if (is_single_hop_neigh) batadv_ogm_packet->flags |= BATADV_NOT_BEST_NEXT_HOP; else return; } tvlv_len = ntohs(batadv_ogm_packet->tvlv_len); batadv_ogm_packet->ttl--; ether_addr_copy(batadv_ogm_packet->prev_sender, ethhdr->h_source); /* apply hop penalty */ batadv_ogm_packet->tq = batadv_hop_penalty(batadv_ogm_packet->tq, bat_priv); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: tq: %i, ttl: %i\n", batadv_ogm_packet->tq, batadv_ogm_packet->ttl); if (is_single_hop_neigh) batadv_ogm_packet->flags |= BATADV_DIRECTLINK; else batadv_ogm_packet->flags &= ~BATADV_DIRECTLINK; batadv_iv_ogm_queue_add(bat_priv, (unsigned char *)batadv_ogm_packet, BATADV_OGM_HLEN + tvlv_len, if_incoming, if_outgoing, 0, batadv_iv_ogm_fwd_send_time()); } /** * batadv_iv_ogm_slide_own_bcast_window() - bitshift own OGM broadcast windows * for the given interface * @hard_iface: the interface for which the windows have to be shifted */ static void batadv_iv_ogm_slide_own_bcast_window(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; struct batadv_orig_node *orig_node; struct batadv_orig_ifinfo *orig_ifinfo; unsigned long *word; u32 i; u8 *w; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { hlist_for_each_entry_rcu(orig_ifinfo, &orig_node->ifinfo_list, list) { if (orig_ifinfo->if_outgoing != hard_iface) continue; spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); word = orig_ifinfo->bat_iv.bcast_own; batadv_bit_get_packet(bat_priv, word, 1, 0); w = &orig_ifinfo->bat_iv.bcast_own_sum; *w = bitmap_weight(word, BATADV_TQ_LOCAL_WINDOW_SIZE); spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); } } rcu_read_unlock(); } } /** * batadv_iv_ogm_schedule_buff() - schedule submission of hardif ogm buffer * @hard_iface: interface whose ogm buffer should be transmitted */ static void batadv_iv_ogm_schedule_buff(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); unsigned char **ogm_buff = &hard_iface->bat_iv.ogm_buff; struct batadv_ogm_packet *batadv_ogm_packet; struct batadv_hard_iface *primary_if, *tmp_hard_iface; int *ogm_buff_len = &hard_iface->bat_iv.ogm_buff_len; u32 seqno; u16 tvlv_len = 0; unsigned long send_time; lockdep_assert_held(&hard_iface->bat_iv.ogm_buff_mutex); /* interface already disabled by batadv_iv_ogm_iface_disable */ if (!*ogm_buff) return; /* the interface gets activated here to avoid race conditions between * the moment of activating the interface in * hardif_activate_interface() where the originator mac is set and * outdated packets (especially uninitialized mac addresses) in the * packet queue */ if (hard_iface->if_status == BATADV_IF_TO_BE_ACTIVATED) hard_iface->if_status = BATADV_IF_ACTIVE; primary_if = batadv_primary_if_get_selected(bat_priv); if (hard_iface == primary_if) { /* tt changes have to be committed before the tvlv data is * appended as it may alter the tt tvlv container */ batadv_tt_local_commit_changes(bat_priv); tvlv_len = batadv_tvlv_container_ogm_append(bat_priv, ogm_buff, ogm_buff_len, BATADV_OGM_HLEN); } batadv_ogm_packet = (struct batadv_ogm_packet *)(*ogm_buff); batadv_ogm_packet->tvlv_len = htons(tvlv_len); /* change sequence number to network order */ seqno = (u32)atomic_read(&hard_iface->bat_iv.ogm_seqno); batadv_ogm_packet->seqno = htonl(seqno); atomic_inc(&hard_iface->bat_iv.ogm_seqno); batadv_iv_ogm_slide_own_bcast_window(hard_iface); send_time = batadv_iv_ogm_emit_send_time(bat_priv); if (hard_iface != primary_if) { /* OGMs from secondary interfaces are only scheduled on their * respective interfaces. */ batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, hard_iface, hard_iface, 1, send_time); goto out; } /* OGMs from primary interfaces are scheduled on all * interfaces. */ rcu_read_lock(); list_for_each_entry_rcu(tmp_hard_iface, &batadv_hardif_list, list) { if (tmp_hard_iface->soft_iface != hard_iface->soft_iface) continue; if (!kref_get_unless_zero(&tmp_hard_iface->refcount)) continue; batadv_iv_ogm_queue_add(bat_priv, *ogm_buff, *ogm_buff_len, hard_iface, tmp_hard_iface, 1, send_time); batadv_hardif_put(tmp_hard_iface); } rcu_read_unlock(); out: batadv_hardif_put(primary_if); } static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) { if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) return; mutex_lock(&hard_iface->bat_iv.ogm_buff_mutex); batadv_iv_ogm_schedule_buff(hard_iface); mutex_unlock(&hard_iface->bat_iv.ogm_buff_mutex); } /** * batadv_iv_orig_ifinfo_sum() - Get bcast_own sum for originator over interface * @orig_node: originator which reproadcasted the OGMs directly * @if_outgoing: interface which transmitted the original OGM and received the * direct rebroadcast * * Return: Number of replied (rebroadcasted) OGMs which were transmitted by * an originator and directly (without intermediate hop) received by a specific * interface */ static u8 batadv_iv_orig_ifinfo_sum(struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_outgoing) { struct batadv_orig_ifinfo *orig_ifinfo; u8 sum; orig_ifinfo = batadv_orig_ifinfo_get(orig_node, if_outgoing); if (!orig_ifinfo) return 0; spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); sum = orig_ifinfo->bat_iv.bcast_own_sum; spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); batadv_orig_ifinfo_put(orig_ifinfo); return sum; } /** * batadv_iv_ogm_orig_update() - use OGM to update corresponding data in an * originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: the orig node who originally emitted the ogm packet * @orig_ifinfo: ifinfo for the outgoing interface of the orig_node * @ethhdr: Ethernet header of the OGM * @batadv_ogm_packet: the ogm packet * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * @dup_status: the duplicate status of this ogm packet. */ static void batadv_iv_ogm_orig_update(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_orig_ifinfo *orig_ifinfo, const struct ethhdr *ethhdr, const struct batadv_ogm_packet *batadv_ogm_packet, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing, enum batadv_dup_status dup_status) { struct batadv_neigh_ifinfo *neigh_ifinfo = NULL; struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *neigh_node = NULL; struct batadv_neigh_node *tmp_neigh_node = NULL; struct batadv_neigh_node *router = NULL; u8 sum_orig, sum_neigh; u8 *neigh_addr; u8 tq_avg; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s(): Searching and updating originator entry of received packet\n", __func__); rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) { neigh_addr = tmp_neigh_node->addr; if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && tmp_neigh_node->if_incoming == if_incoming && kref_get_unless_zero(&tmp_neigh_node->refcount)) { if (WARN(neigh_node, "too many matching neigh_nodes")) batadv_neigh_node_put(neigh_node); neigh_node = tmp_neigh_node; continue; } if (dup_status != BATADV_NO_DUP) continue; /* only update the entry for this outgoing interface */ neigh_ifinfo = batadv_neigh_ifinfo_get(tmp_neigh_node, if_outgoing); if (!neigh_ifinfo) continue; spin_lock_bh(&tmp_neigh_node->ifinfo_lock); batadv_ring_buffer_set(neigh_ifinfo->bat_iv.tq_recv, &neigh_ifinfo->bat_iv.tq_index, 0); tq_avg = batadv_ring_buffer_avg(neigh_ifinfo->bat_iv.tq_recv); neigh_ifinfo->bat_iv.tq_avg = tq_avg; spin_unlock_bh(&tmp_neigh_node->ifinfo_lock); batadv_neigh_ifinfo_put(neigh_ifinfo); neigh_ifinfo = NULL; } if (!neigh_node) { struct batadv_orig_node *orig_tmp; orig_tmp = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source); if (!orig_tmp) goto unlock; neigh_node = batadv_iv_ogm_neigh_new(if_incoming, ethhdr->h_source, orig_node, orig_tmp); batadv_orig_node_put(orig_tmp); if (!neigh_node) goto unlock; } else { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Updating existing last-hop neighbor of originator\n"); } rcu_read_unlock(); neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); if (!neigh_ifinfo) goto out; neigh_node->last_seen = jiffies; spin_lock_bh(&neigh_node->ifinfo_lock); batadv_ring_buffer_set(neigh_ifinfo->bat_iv.tq_recv, &neigh_ifinfo->bat_iv.tq_index, batadv_ogm_packet->tq); tq_avg = batadv_ring_buffer_avg(neigh_ifinfo->bat_iv.tq_recv); neigh_ifinfo->bat_iv.tq_avg = tq_avg; spin_unlock_bh(&neigh_node->ifinfo_lock); if (dup_status == BATADV_NO_DUP) { orig_ifinfo->last_ttl = batadv_ogm_packet->ttl; neigh_ifinfo->last_ttl = batadv_ogm_packet->ttl; } /* if this neighbor already is our next hop there is nothing * to change */ router = batadv_orig_router_get(orig_node, if_outgoing); if (router == neigh_node) goto out; if (router) { router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); if (!router_ifinfo) goto out; /* if this neighbor does not offer a better TQ we won't * consider it */ if (router_ifinfo->bat_iv.tq_avg > neigh_ifinfo->bat_iv.tq_avg) goto out; } /* if the TQ is the same and the link not more symmetric we * won't consider it either */ if (router_ifinfo && neigh_ifinfo->bat_iv.tq_avg == router_ifinfo->bat_iv.tq_avg) { sum_orig = batadv_iv_orig_ifinfo_sum(router->orig_node, router->if_incoming); sum_neigh = batadv_iv_orig_ifinfo_sum(neigh_node->orig_node, neigh_node->if_incoming); if (sum_orig >= sum_neigh) goto out; } batadv_update_route(bat_priv, orig_node, if_outgoing, neigh_node); goto out; unlock: rcu_read_unlock(); out: batadv_neigh_node_put(neigh_node); batadv_neigh_node_put(router); batadv_neigh_ifinfo_put(neigh_ifinfo); batadv_neigh_ifinfo_put(router_ifinfo); } /** * batadv_iv_ogm_calc_tq() - calculate tq for current received ogm packet * @orig_node: the orig node who originally emitted the ogm packet * @orig_neigh_node: the orig node struct of the neighbor who sent the packet * @batadv_ogm_packet: the ogm packet * @if_incoming: interface where the packet was received * @if_outgoing: interface for which the retransmission should be considered * * Return: true if the link can be considered bidirectional, false otherwise */ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, struct batadv_ogm_packet *batadv_ogm_packet, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_neigh_node *neigh_node = NULL, *tmp_neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; u8 total_count; u8 orig_eq_count, neigh_rq_count, neigh_rq_inv, tq_own; unsigned int tq_iface_hop_penalty = BATADV_TQ_MAX_VALUE; unsigned int neigh_rq_inv_cube, neigh_rq_max_cube; unsigned int tq_asym_penalty, inv_asym_penalty; unsigned int combined_tq; bool ret = false; /* find corresponding one hop neighbor */ rcu_read_lock(); hlist_for_each_entry_rcu(tmp_neigh_node, &orig_neigh_node->neigh_list, list) { if (!batadv_compare_eth(tmp_neigh_node->addr, orig_neigh_node->orig)) continue; if (tmp_neigh_node->if_incoming != if_incoming) continue; if (!kref_get_unless_zero(&tmp_neigh_node->refcount)) continue; neigh_node = tmp_neigh_node; break; } rcu_read_unlock(); if (!neigh_node) neigh_node = batadv_iv_ogm_neigh_new(if_incoming, orig_neigh_node->orig, orig_neigh_node, orig_neigh_node); if (!neigh_node) goto out; /* if orig_node is direct neighbor update neigh_node last_seen */ if (orig_node == orig_neigh_node) neigh_node->last_seen = jiffies; orig_node->last_seen = jiffies; /* find packet count of corresponding one hop neighbor */ orig_eq_count = batadv_iv_orig_ifinfo_sum(orig_neigh_node, if_incoming); neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); if (neigh_ifinfo) { neigh_rq_count = neigh_ifinfo->bat_iv.real_packet_count; batadv_neigh_ifinfo_put(neigh_ifinfo); } else { neigh_rq_count = 0; } /* pay attention to not get a value bigger than 100 % */ if (orig_eq_count > neigh_rq_count) total_count = neigh_rq_count; else total_count = orig_eq_count; /* if we have too few packets (too less data) we set tq_own to zero * if we receive too few packets it is not considered bidirectional */ if (total_count < BATADV_TQ_LOCAL_BIDRECT_SEND_MINIMUM || neigh_rq_count < BATADV_TQ_LOCAL_BIDRECT_RECV_MINIMUM) tq_own = 0; else /* neigh_node->real_packet_count is never zero as we * only purge old information when getting new * information */ tq_own = (BATADV_TQ_MAX_VALUE * total_count) / neigh_rq_count; /* 1 - ((1-x) ** 3), normalized to TQ_MAX_VALUE this does * affect the nearly-symmetric links only a little, but * punishes asymmetric links more. This will give a value * between 0 and TQ_MAX_VALUE */ neigh_rq_inv = BATADV_TQ_LOCAL_WINDOW_SIZE - neigh_rq_count; neigh_rq_inv_cube = neigh_rq_inv * neigh_rq_inv * neigh_rq_inv; neigh_rq_max_cube = BATADV_TQ_LOCAL_WINDOW_SIZE * BATADV_TQ_LOCAL_WINDOW_SIZE * BATADV_TQ_LOCAL_WINDOW_SIZE; inv_asym_penalty = BATADV_TQ_MAX_VALUE * neigh_rq_inv_cube; inv_asym_penalty /= neigh_rq_max_cube; tq_asym_penalty = BATADV_TQ_MAX_VALUE - inv_asym_penalty; tq_iface_hop_penalty -= atomic_read(&if_incoming->hop_penalty); /* penalize if the OGM is forwarded on the same interface. WiFi * interfaces and other half duplex devices suffer from throughput * drops as they can't send and receive at the same time. */ if (if_outgoing && if_incoming == if_outgoing && batadv_is_wifi_hardif(if_outgoing)) tq_iface_hop_penalty = batadv_hop_penalty(tq_iface_hop_penalty, bat_priv); combined_tq = batadv_ogm_packet->tq * tq_own * tq_asym_penalty * tq_iface_hop_penalty; combined_tq /= BATADV_TQ_MAX_VALUE * BATADV_TQ_MAX_VALUE * BATADV_TQ_MAX_VALUE; batadv_ogm_packet->tq = combined_tq; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_hop_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n", orig_node->orig, orig_neigh_node->orig, total_count, neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_hop_penalty, batadv_ogm_packet->tq, if_incoming->net_dev->name, if_outgoing ? if_outgoing->net_dev->name : "DEFAULT"); /* if link has the minimum required transmission quality * consider it bidirectional */ if (batadv_ogm_packet->tq >= BATADV_TQ_TOTAL_BIDRECT_LIMIT) ret = true; out: batadv_neigh_node_put(neigh_node); return ret; } /** * batadv_iv_ogm_update_seqnos() - process a batman packet for all interfaces, * adjust the sequence number and find out whether it is a duplicate * @ethhdr: ethernet header of the packet * @batadv_ogm_packet: OGM packet to be considered * @if_incoming: interface on which the OGM packet was received * @if_outgoing: interface for which the retransmission should be considered * * Return: duplicate status as enum batadv_dup_status */ static enum batadv_dup_status batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, const struct batadv_ogm_packet *batadv_ogm_packet, const struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_orig_node *orig_node; struct batadv_orig_ifinfo *orig_ifinfo = NULL; struct batadv_neigh_node *neigh_node; struct batadv_neigh_ifinfo *neigh_ifinfo; bool is_dup; s32 seq_diff; bool need_update = false; int set_mark; enum batadv_dup_status ret = BATADV_NO_DUP; u32 seqno = ntohl(batadv_ogm_packet->seqno); u8 *neigh_addr; u8 packet_count; unsigned long *bitmap; orig_node = batadv_iv_ogm_orig_get(bat_priv, batadv_ogm_packet->orig); if (!orig_node) return BATADV_NO_DUP; orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); if (WARN_ON(!orig_ifinfo)) { batadv_orig_node_put(orig_node); return 0; } spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); seq_diff = seqno - orig_ifinfo->last_real_seqno; /* signalize caller that the packet is to be dropped. */ if (!hlist_empty(&orig_node->neigh_list) && batadv_window_protected(bat_priv, seq_diff, BATADV_TQ_LOCAL_WINDOW_SIZE, &orig_ifinfo->batman_seqno_reset, NULL)) { ret = BATADV_PROTECTED; goto out; } rcu_read_lock(); hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { neigh_ifinfo = batadv_neigh_ifinfo_new(neigh_node, if_outgoing); if (!neigh_ifinfo) continue; neigh_addr = neigh_node->addr; is_dup = batadv_test_bit(neigh_ifinfo->bat_iv.real_bits, orig_ifinfo->last_real_seqno, seqno); if (batadv_compare_eth(neigh_addr, ethhdr->h_source) && neigh_node->if_incoming == if_incoming) { set_mark = 1; if (is_dup) ret = BATADV_NEIGH_DUP; } else { set_mark = 0; if (is_dup && ret != BATADV_NEIGH_DUP) ret = BATADV_ORIG_DUP; } /* if the window moved, set the update flag. */ bitmap = neigh_ifinfo->bat_iv.real_bits; need_update |= batadv_bit_get_packet(bat_priv, bitmap, seq_diff, set_mark); packet_count = bitmap_weight(bitmap, BATADV_TQ_LOCAL_WINDOW_SIZE); neigh_ifinfo->bat_iv.real_packet_count = packet_count; batadv_neigh_ifinfo_put(neigh_ifinfo); } rcu_read_unlock(); if (need_update) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "%s updating last_seqno: old %u, new %u\n", if_outgoing ? if_outgoing->net_dev->name : "DEFAULT", orig_ifinfo->last_real_seqno, seqno); orig_ifinfo->last_real_seqno = seqno; } out: spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); batadv_orig_node_put(orig_node); batadv_orig_ifinfo_put(orig_ifinfo); return ret; } /** * batadv_iv_ogm_process_per_outif() - process a batman iv OGM for an outgoing * interface * @skb: the skb containing the OGM * @ogm_offset: offset from skb->data to start of ogm header * @orig_node: the (cached) orig node for the originator of this OGM * @if_incoming: the interface where this packet was received * @if_outgoing: the interface for which the packet should be considered */ static void batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, struct batadv_orig_node *orig_node, struct batadv_hard_iface *if_incoming, struct batadv_hard_iface *if_outgoing) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_hardif_neigh_node *hardif_neigh = NULL; struct batadv_neigh_node *router = NULL; struct batadv_neigh_node *router_router = NULL; struct batadv_orig_node *orig_neigh_node; struct batadv_orig_ifinfo *orig_ifinfo; struct batadv_neigh_node *orig_neigh_router = NULL; struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_ogm_packet *ogm_packet; enum batadv_dup_status dup_status; bool is_from_best_next_hop = false; bool is_single_hop_neigh = false; bool sameseq, similar_ttl; struct sk_buff *skb_priv; struct ethhdr *ethhdr; u8 *prev_sender; bool is_bidirect; /* create a private copy of the skb, as some functions change tq value * and/or flags. */ skb_priv = skb_copy(skb, GFP_ATOMIC); if (!skb_priv) return; ethhdr = eth_hdr(skb_priv); ogm_packet = (struct batadv_ogm_packet *)(skb_priv->data + ogm_offset); dup_status = batadv_iv_ogm_update_seqnos(ethhdr, ogm_packet, if_incoming, if_outgoing); if (batadv_compare_eth(ethhdr->h_source, ogm_packet->orig)) is_single_hop_neigh = true; if (dup_status == BATADV_PROTECTED) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: packet within seqno protection time (sender: %pM)\n", ethhdr->h_source); goto out; } if (ogm_packet->tq == 0) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: originator packet with tq equal 0\n"); goto out; } if (is_single_hop_neigh) { hardif_neigh = batadv_hardif_neigh_get(if_incoming, ethhdr->h_source); if (hardif_neigh) hardif_neigh->last_seen = jiffies; } router = batadv_orig_router_get(orig_node, if_outgoing); if (router) { router_router = batadv_orig_router_get(router->orig_node, if_outgoing); router_ifinfo = batadv_neigh_ifinfo_get(router, if_outgoing); } if ((router_ifinfo && router_ifinfo->bat_iv.tq_avg != 0) && (batadv_compare_eth(router->addr, ethhdr->h_source))) is_from_best_next_hop = true; prev_sender = ogm_packet->prev_sender; /* avoid temporary routing loops */ if (router && router_router && (batadv_compare_eth(router->addr, prev_sender)) && !(batadv_compare_eth(ogm_packet->orig, prev_sender)) && (batadv_compare_eth(router->addr, router_router->addr))) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: ignoring all rebroadcast packets that may make me loop (sender: %pM)\n", ethhdr->h_source); goto out; } if (if_outgoing == BATADV_IF_DEFAULT) batadv_tvlv_ogm_receive(bat_priv, ogm_packet, orig_node); /* if sender is a direct neighbor the sender mac equals * originator mac */ if (is_single_hop_neigh) orig_neigh_node = orig_node; else orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source); if (!orig_neigh_node) goto out; /* Update nc_nodes of the originator */ batadv_nc_update_nc_node(bat_priv, orig_node, orig_neigh_node, ogm_packet, is_single_hop_neigh); orig_neigh_router = batadv_orig_router_get(orig_neigh_node, if_outgoing); /* drop packet if sender is not a direct neighbor and if we * don't route towards it */ if (!is_single_hop_neigh && !orig_neigh_router) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: OGM via unknown neighbor!\n"); goto out_neigh; } is_bidirect = batadv_iv_ogm_calc_tq(orig_node, orig_neigh_node, ogm_packet, if_incoming, if_outgoing); /* update ranking if it is not a duplicate or has the same * seqno and similar ttl as the non-duplicate */ orig_ifinfo = batadv_orig_ifinfo_new(orig_node, if_outgoing); if (!orig_ifinfo) goto out_neigh; sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno); similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl; if (is_bidirect && (dup_status == BATADV_NO_DUP || (sameseq && similar_ttl))) { batadv_iv_ogm_orig_update(bat_priv, orig_node, orig_ifinfo, ethhdr, ogm_packet, if_incoming, if_outgoing, dup_status); } batadv_orig_ifinfo_put(orig_ifinfo); /* only forward for specific interface, not for the default one. */ if (if_outgoing == BATADV_IF_DEFAULT) goto out_neigh; /* is single hop (direct) neighbor */ if (is_single_hop_neigh) { /* OGMs from secondary interfaces should only scheduled once * per interface where it has been received, not multiple times */ if (ogm_packet->ttl <= 2 && if_incoming != if_outgoing) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: OGM from secondary interface and wrong outgoing interface\n"); goto out_neigh; } /* mark direct link on incoming interface */ batadv_iv_ogm_forward(orig_node, ethhdr, ogm_packet, is_single_hop_neigh, is_from_best_next_hop, if_incoming, if_outgoing); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: rebroadcast neighbor packet with direct link flag\n"); goto out_neigh; } /* multihop originator */ if (!is_bidirect) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: not received via bidirectional link\n"); goto out_neigh; } if (dup_status == BATADV_NEIGH_DUP) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: duplicate packet received\n"); goto out_neigh; } batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Forwarding packet: rebroadcast originator packet\n"); batadv_iv_ogm_forward(orig_node, ethhdr, ogm_packet, is_single_hop_neigh, is_from_best_next_hop, if_incoming, if_outgoing); out_neigh: if (orig_neigh_node && !is_single_hop_neigh) batadv_orig_node_put(orig_neigh_node); out: batadv_neigh_ifinfo_put(router_ifinfo); batadv_neigh_node_put(router); batadv_neigh_node_put(router_router); batadv_neigh_node_put(orig_neigh_router); batadv_hardif_neigh_put(hardif_neigh); consume_skb(skb_priv); } /** * batadv_iv_ogm_process_reply() - Check OGM for direct reply and process it * @ogm_packet: rebroadcast OGM packet to process * @if_incoming: the interface where this packet was received * @orig_node: originator which reproadcasted the OGMs * @if_incoming_seqno: OGM sequence number when rebroadcast was received */ static void batadv_iv_ogm_process_reply(struct batadv_ogm_packet *ogm_packet, struct batadv_hard_iface *if_incoming, struct batadv_orig_node *orig_node, u32 if_incoming_seqno) { struct batadv_orig_ifinfo *orig_ifinfo; s32 bit_pos; u8 *weight; /* neighbor has to indicate direct link and it has to * come via the corresponding interface */ if (!(ogm_packet->flags & BATADV_DIRECTLINK)) return; if (!batadv_compare_eth(if_incoming->net_dev->dev_addr, ogm_packet->orig)) return; orig_ifinfo = batadv_orig_ifinfo_get(orig_node, if_incoming); if (!orig_ifinfo) return; /* save packet seqno for bidirectional check */ spin_lock_bh(&orig_node->bat_iv.ogm_cnt_lock); bit_pos = if_incoming_seqno - 2; bit_pos -= ntohl(ogm_packet->seqno); batadv_set_bit(orig_ifinfo->bat_iv.bcast_own, bit_pos); weight = &orig_ifinfo->bat_iv.bcast_own_sum; *weight = bitmap_weight(orig_ifinfo->bat_iv.bcast_own, BATADV_TQ_LOCAL_WINDOW_SIZE); spin_unlock_bh(&orig_node->bat_iv.ogm_cnt_lock); batadv_orig_ifinfo_put(orig_ifinfo); } /** * batadv_iv_ogm_process() - process an incoming batman iv OGM * @skb: the skb containing the OGM * @ogm_offset: offset to the OGM which should be processed (for aggregates) * @if_incoming: the interface where this packet was received */ static void batadv_iv_ogm_process(const struct sk_buff *skb, int ogm_offset, struct batadv_hard_iface *if_incoming) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_orig_node *orig_neigh_node, *orig_node; struct batadv_hard_iface *hard_iface; struct batadv_ogm_packet *ogm_packet; u32 if_incoming_seqno; bool has_directlink_flag; struct ethhdr *ethhdr; bool is_my_oldorig = false; bool is_my_addr = false; bool is_my_orig = false; ogm_packet = (struct batadv_ogm_packet *)(skb->data + ogm_offset); ethhdr = eth_hdr(skb); /* Silently drop when the batman packet is actually not a * correct packet. * * This might happen if a packet is padded (e.g. Ethernet has a * minimum frame length of 64 byte) and the aggregation interprets * it as an additional length. * * TODO: A more sane solution would be to have a bit in the * batadv_ogm_packet to detect whether the packet is the last * packet in an aggregation. Here we expect that the padding * is always zero (or not 0x01) */ if (ogm_packet->packet_type != BATADV_IV_OGM) return; /* could be changed by schedule_own_packet() */ if_incoming_seqno = atomic_read(&if_incoming->bat_iv.ogm_seqno); if (ogm_packet->flags & BATADV_DIRECTLINK) has_directlink_flag = true; else has_directlink_flag = false; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Received BATMAN packet via NB: %pM, IF: %s [%pM] (from OG: %pM, via prev OG: %pM, seqno %u, tq %d, TTL %d, V %d, IDF %d)\n", ethhdr->h_source, if_incoming->net_dev->name, if_incoming->net_dev->dev_addr, ogm_packet->orig, ogm_packet->prev_sender, ntohl(ogm_packet->seqno), ogm_packet->tq, ogm_packet->ttl, ogm_packet->version, has_directlink_flag); rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; if (hard_iface->soft_iface != if_incoming->soft_iface) continue; if (batadv_compare_eth(ethhdr->h_source, hard_iface->net_dev->dev_addr)) is_my_addr = true; if (batadv_compare_eth(ogm_packet->orig, hard_iface->net_dev->dev_addr)) is_my_orig = true; if (batadv_compare_eth(ogm_packet->prev_sender, hard_iface->net_dev->dev_addr)) is_my_oldorig = true; } rcu_read_unlock(); if (is_my_addr) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: received my own broadcast (sender: %pM)\n", ethhdr->h_source); return; } if (is_my_orig) { orig_neigh_node = batadv_iv_ogm_orig_get(bat_priv, ethhdr->h_source); if (!orig_neigh_node) return; batadv_iv_ogm_process_reply(ogm_packet, if_incoming, orig_neigh_node, if_incoming_seqno); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: originator packet from myself (via neighbor)\n"); batadv_orig_node_put(orig_neigh_node); return; } if (is_my_oldorig) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: ignoring all rebroadcast echos (sender: %pM)\n", ethhdr->h_source); return; } if (ogm_packet->flags & BATADV_NOT_BEST_NEXT_HOP) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: ignoring all packets not forwarded from the best next hop (sender: %pM)\n", ethhdr->h_source); return; } orig_node = batadv_iv_ogm_orig_get(bat_priv, ogm_packet->orig); if (!orig_node) return; batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node, if_incoming, BATADV_IF_DEFAULT); rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->if_status != BATADV_IF_ACTIVE) continue; if (hard_iface->soft_iface != bat_priv->soft_iface) continue; if (!kref_get_unless_zero(&hard_iface->refcount)) continue; batadv_iv_ogm_process_per_outif(skb, ogm_offset, orig_node, if_incoming, hard_iface); batadv_hardif_put(hard_iface); } rcu_read_unlock(); batadv_orig_node_put(orig_node); } static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) { struct delayed_work *delayed_work; struct batadv_forw_packet *forw_packet; struct batadv_priv *bat_priv; bool dropped = false; delayed_work = to_delayed_work(work); forw_packet = container_of(delayed_work, struct batadv_forw_packet, delayed_work); bat_priv = netdev_priv(forw_packet->if_incoming->soft_iface); if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_DEACTIVATING) { dropped = true; goto out; } batadv_iv_ogm_emit(forw_packet); /* we have to have at least one packet in the queue to determine the * queues wake up time unless we are shutting down. * * only re-schedule if this is the "original" copy, e.g. the OGM of the * primary interface should only be rescheduled once per period, but * this function will be called for the forw_packet instances of the * other secondary interfaces as well. */ if (forw_packet->own && forw_packet->if_incoming == forw_packet->if_outgoing) batadv_iv_ogm_schedule(forw_packet->if_incoming); out: /* do we get something for free()? */ if (batadv_forw_packet_steal(forw_packet, &bat_priv->forw_bat_list_lock)) batadv_forw_packet_free(forw_packet, dropped); } static int batadv_iv_ogm_receive(struct sk_buff *skb, struct batadv_hard_iface *if_incoming) { struct batadv_priv *bat_priv = netdev_priv(if_incoming->soft_iface); struct batadv_ogm_packet *ogm_packet; u8 *packet_pos; int ogm_offset; bool res; int ret = NET_RX_DROP; res = batadv_check_management_packet(skb, if_incoming, BATADV_OGM_HLEN); if (!res) goto free_skb; /* did we receive a B.A.T.M.A.N. IV OGM packet on an interface * that does not have B.A.T.M.A.N. IV enabled ? */ if (bat_priv->algo_ops->iface.enable != batadv_iv_ogm_iface_enable) goto free_skb; batadv_inc_counter(bat_priv, BATADV_CNT_MGMT_RX); batadv_add_counter(bat_priv, BATADV_CNT_MGMT_RX_BYTES, skb->len + ETH_HLEN); ogm_offset = 0; ogm_packet = (struct batadv_ogm_packet *)skb->data; /* unpack the aggregated packets and process them one by one */ while (batadv_iv_ogm_aggr_packet(ogm_offset, skb_headlen(skb), ogm_packet)) { batadv_iv_ogm_process(skb, ogm_offset, if_incoming); ogm_offset += BATADV_OGM_HLEN; ogm_offset += ntohs(ogm_packet->tvlv_len); packet_pos = skb->data + ogm_offset; ogm_packet = (struct batadv_ogm_packet *)packet_pos; } ret = NET_RX_SUCCESS; free_skb: if (ret == NET_RX_SUCCESS) consume_skb(skb); else kfree_skb(skb); return ret; } /** * batadv_iv_ogm_neigh_get_tq_avg() - Get the TQ average for a neighbour on a * given outgoing interface. * @neigh_node: Neighbour of interest * @if_outgoing: Outgoing interface of interest * @tq_avg: Pointer of where to store the TQ average * * Return: False if no average TQ available, otherwise true. */ static bool batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node, struct batadv_hard_iface *if_outgoing, u8 *tq_avg) { struct batadv_neigh_ifinfo *n_ifinfo; n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); if (!n_ifinfo) return false; *tq_avg = n_ifinfo->bat_iv.tq_avg; batadv_neigh_ifinfo_put(n_ifinfo); return true; } /** * batadv_iv_ogm_orig_dump_subentry() - Dump an originator subentry into a * message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the soft interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @neigh_node: Single hops neighbour * @best: Is the best originator * * Return: Error code, or 0 on success */ static int batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing, struct batadv_orig_node *orig_node, struct batadv_neigh_node *neigh_node, bool best) { void *hdr; u8 tq_avg; unsigned int last_seen_msecs; last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen); if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node, if_outgoing, &tq_avg)) return 0; if (if_outgoing != BATADV_IF_DEFAULT && if_outgoing != neigh_node->if_incoming) return 0; hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS); if (!hdr) return -ENOBUFS; if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) || nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, neigh_node->addr) || nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, neigh_node->if_incoming->net_dev->name) || nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, neigh_node->if_incoming->net_dev->ifindex) || nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) || nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs)) goto nla_put_failure; if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_iv_ogm_orig_dump_entry() - Dump an originator entry into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the soft interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @orig_node: Originator to dump * @sub_s: Number of sub entries to skip * * This function assumes the caller holds rcu_read_lock(). * * Return: Error code, or 0 on success */ static int batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing, struct batadv_orig_node *orig_node, int *sub_s) { struct batadv_neigh_node *neigh_node_best; struct batadv_neigh_node *neigh_node; int sub = 0; bool best; u8 tq_avg_best; neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing); if (!neigh_node_best) goto out; if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node_best, if_outgoing, &tq_avg_best)) goto out; if (tq_avg_best == 0) goto out; hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { if (sub++ < *sub_s) continue; best = (neigh_node == neigh_node_best); if (batadv_iv_ogm_orig_dump_subentry(msg, portid, seq, bat_priv, if_outgoing, orig_node, neigh_node, best)) { batadv_neigh_node_put(neigh_node_best); *sub_s = sub - 1; return -EMSGSIZE; } } out: batadv_neigh_node_put(neigh_node_best); *sub_s = 0; return 0; } /** * batadv_iv_ogm_orig_dump_bucket() - Dump an originator bucket into a * message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the soft interface information * @if_outgoing: Limit dump to entries with this outgoing interface * @head: Bucket to be dumped * @idx_s: Number of entries to be skipped * @sub: Number of sub entries to be skipped * * Return: Error code, or 0 on success */ static int batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing, struct hlist_head *head, int *idx_s, int *sub) { struct batadv_orig_node *orig_node; int idx = 0; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) { if (idx++ < *idx_s) continue; if (batadv_iv_ogm_orig_dump_entry(msg, portid, seq, bat_priv, if_outgoing, orig_node, sub)) { rcu_read_unlock(); *idx_s = idx - 1; return -EMSGSIZE; } } rcu_read_unlock(); *idx_s = 0; *sub = 0; return 0; } /** * batadv_iv_ogm_orig_dump() - Dump the originators into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information * @if_outgoing: Limit dump to entries with this outgoing interface */ static void batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_hard_iface *if_outgoing) { struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; int bucket = cb->args[0]; int idx = cb->args[1]; int sub = cb->args[2]; int portid = NETLINK_CB(cb->skb).portid; while (bucket < hash->size) { head = &hash->table[bucket]; if (batadv_iv_ogm_orig_dump_bucket(msg, portid, cb->nlh->nlmsg_seq, bat_priv, if_outgoing, head, &idx, &sub)) break; bucket++; } cb->args[0] = bucket; cb->args[1] = idx; cb->args[2] = sub; } /** * batadv_iv_ogm_neigh_diff() - calculate tq difference of two neighbors * @neigh1: the first neighbor object of the comparison * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison * @if_outgoing2: outgoing interface for the second neighbor * @diff: pointer to integer receiving the calculated difference * * The content of *@diff is only valid when this function returns true. * It is less, equal to or greater than 0 if the metric via neigh1 is lower, * the same as or higher than the metric via neigh2 * * Return: true when the difference could be calculated, false otherwise */ static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2, int *diff) { struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; u8 tq1, tq2; bool ret = true; neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); if (!neigh1_ifinfo || !neigh2_ifinfo) { ret = false; goto out; } tq1 = neigh1_ifinfo->bat_iv.tq_avg; tq2 = neigh2_ifinfo->bat_iv.tq_avg; *diff = (int)tq1 - (int)tq2; out: batadv_neigh_ifinfo_put(neigh1_ifinfo); batadv_neigh_ifinfo_put(neigh2_ifinfo); return ret; } /** * batadv_iv_ogm_neigh_dump_neigh() - Dump a neighbour into a netlink message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @hardif_neigh: Neighbour to be dumped * * Return: Error code, or 0 on success */ static int batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_hardif_neigh_node *hardif_neigh) { void *hdr; unsigned int last_seen_msecs; last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen); hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS); if (!hdr) return -ENOBUFS; if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, hardif_neigh->addr) || nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, hardif_neigh->if_incoming->net_dev->name) || nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, hardif_neigh->if_incoming->net_dev->ifindex) || nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs)) goto nla_put_failure; genlmsg_end(msg, hdr); return 0; nla_put_failure: genlmsg_cancel(msg, hdr); return -EMSGSIZE; } /** * batadv_iv_ogm_neigh_dump_hardif() - Dump the neighbours of a hard interface * into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @seq: Sequence number of netlink message * @bat_priv: The bat priv with all the soft interface information * @hard_iface: Hard interface to dump the neighbours for * @idx_s: Number of entries to skip * * This function assumes the caller holds rcu_read_lock(). * * Return: Error code, or 0 on success */ static int batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_priv *bat_priv, struct batadv_hard_iface *hard_iface, int *idx_s) { struct batadv_hardif_neigh_node *hardif_neigh; int idx = 0; hlist_for_each_entry_rcu(hardif_neigh, &hard_iface->neigh_list, list) { if (idx++ < *idx_s) continue; if (batadv_iv_ogm_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) { *idx_s = idx - 1; return -EMSGSIZE; } } *idx_s = 0; return 0; } /** * batadv_iv_ogm_neigh_dump() - Dump the neighbours into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information * @single_hardif: Limit dump to this hard interface */ static void batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_hard_iface *single_hardif) { struct batadv_hard_iface *hard_iface; int i_hardif = 0; int i_hardif_s = cb->args[0]; int idx = cb->args[1]; int portid = NETLINK_CB(cb->skb).portid; rcu_read_lock(); if (single_hardif) { if (i_hardif_s == 0) { if (batadv_iv_ogm_neigh_dump_hardif(msg, portid, cb->nlh->nlmsg_seq, bat_priv, single_hardif, &idx) == 0) i_hardif++; } } else { list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface != bat_priv->soft_iface) continue; if (i_hardif++ < i_hardif_s) continue; if (batadv_iv_ogm_neigh_dump_hardif(msg, portid, cb->nlh->nlmsg_seq, bat_priv, hard_iface, &idx)) { i_hardif--; break; } } } rcu_read_unlock(); cb->args[0] = i_hardif; cb->args[1] = idx; } /** * batadv_iv_ogm_neigh_cmp() - compare the metrics of two neighbors * @neigh1: the first neighbor object of the comparison * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison * @if_outgoing2: outgoing interface for the second neighbor * * Return: a value less, equal to or greater than 0 if the metric via neigh1 is * lower, the same as or higher than the metric via neigh2 */ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { bool ret; int diff; ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2, if_outgoing2, &diff); if (!ret) return 0; return diff; } /** * batadv_iv_ogm_neigh_is_sob() - check if neigh1 is similarly good or better * than neigh2 from the metric prospective * @neigh1: the first neighbor object of the comparison * @if_outgoing1: outgoing interface for the first neighbor * @neigh2: the second neighbor object of the comparison * @if_outgoing2: outgoing interface for the second neighbor * * Return: true if the metric via neigh1 is equally good or better than * the metric via neigh2, false otherwise. */ static bool batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { bool ret; int diff; ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2, if_outgoing2, &diff); if (!ret) return false; ret = diff > -BATADV_TQ_SIMILARITY_THRESHOLD; return ret; } static void batadv_iv_iface_enabled(struct batadv_hard_iface *hard_iface) { /* begin scheduling originator messages on that interface */ batadv_iv_ogm_schedule(hard_iface); } /** * batadv_iv_init_sel_class() - initialize GW selection class * @bat_priv: the bat priv with all the soft interface information */ static void batadv_iv_init_sel_class(struct batadv_priv *bat_priv) { /* set default TQ difference threshold to 20 */ atomic_set(&bat_priv->gw.sel_class, 20); } static struct batadv_gw_node * batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) { struct batadv_neigh_node *router; struct batadv_neigh_ifinfo *router_ifinfo; struct batadv_gw_node *gw_node, *curr_gw = NULL; u64 max_gw_factor = 0; u64 tmp_gw_factor = 0; u8 max_tq = 0; u8 tq_avg; struct batadv_orig_node *orig_node; rcu_read_lock(); hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.gateway_list, list) { orig_node = gw_node->orig_node; router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); if (!router) continue; router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); if (!router_ifinfo) goto next; if (!kref_get_unless_zero(&gw_node->refcount)) goto next; tq_avg = router_ifinfo->bat_iv.tq_avg; switch (atomic_read(&bat_priv->gw.sel_class)) { case 1: /* fast connection */ tmp_gw_factor = tq_avg * tq_avg; tmp_gw_factor *= gw_node->bandwidth_down; tmp_gw_factor *= 100 * 100; tmp_gw_factor >>= 18; if (tmp_gw_factor > max_gw_factor || (tmp_gw_factor == max_gw_factor && tq_avg > max_tq)) { batadv_gw_node_put(curr_gw); curr_gw = gw_node; kref_get(&curr_gw->refcount); } break; default: /* 2: stable connection (use best statistic) * 3: fast-switch (use best statistic but change as * soon as a better gateway appears) * XX: late-switch (use best statistic but change as * soon as a better gateway appears which has * $routing_class more tq points) */ if (tq_avg > max_tq) { batadv_gw_node_put(curr_gw); curr_gw = gw_node; kref_get(&curr_gw->refcount); } break; } if (tq_avg > max_tq) max_tq = tq_avg; if (tmp_gw_factor > max_gw_factor) max_gw_factor = tmp_gw_factor; batadv_gw_node_put(gw_node); next: batadv_neigh_node_put(router); batadv_neigh_ifinfo_put(router_ifinfo); } rcu_read_unlock(); return curr_gw; } static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv, struct batadv_orig_node *curr_gw_orig, struct batadv_orig_node *orig_node) { struct batadv_neigh_ifinfo *router_orig_ifinfo = NULL; struct batadv_neigh_ifinfo *router_gw_ifinfo = NULL; struct batadv_neigh_node *router_gw = NULL; struct batadv_neigh_node *router_orig = NULL; u8 gw_tq_avg, orig_tq_avg; bool ret = false; /* dynamic re-election is performed only on fast or late switch */ if (atomic_read(&bat_priv->gw.sel_class) <= 2) return false; router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT); if (!router_gw) { ret = true; goto out; } router_gw_ifinfo = batadv_neigh_ifinfo_get(router_gw, BATADV_IF_DEFAULT); if (!router_gw_ifinfo) { ret = true; goto out; } router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); if (!router_orig) goto out; router_orig_ifinfo = batadv_neigh_ifinfo_get(router_orig, BATADV_IF_DEFAULT); if (!router_orig_ifinfo) goto out; gw_tq_avg = router_gw_ifinfo->bat_iv.tq_avg; orig_tq_avg = router_orig_ifinfo->bat_iv.tq_avg; /* the TQ value has to be better */ if (orig_tq_avg < gw_tq_avg) goto out; /* if the routing class is greater than 3 the value tells us how much * greater the TQ value of the new gateway must be */ if ((atomic_read(&bat_priv->gw.sel_class) > 3) && (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class))) goto out; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n", gw_tq_avg, orig_tq_avg); ret = true; out: batadv_neigh_ifinfo_put(router_gw_ifinfo); batadv_neigh_ifinfo_put(router_orig_ifinfo); batadv_neigh_node_put(router_gw); batadv_neigh_node_put(router_orig); return ret; } /** * batadv_iv_gw_dump_entry() - Dump a gateway into a message * @msg: Netlink message to dump into * @portid: Port making netlink request * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information * @gw_node: Gateway to be dumped * * Return: Error code, or 0 on success */ static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, struct netlink_callback *cb, struct batadv_priv *bat_priv, struct batadv_gw_node *gw_node) { struct batadv_neigh_ifinfo *router_ifinfo = NULL; struct batadv_neigh_node *router; struct batadv_gw_node *curr_gw = NULL; int ret = 0; void *hdr; router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); if (!router) goto out; router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); if (!router_ifinfo) goto out; curr_gw = batadv_gw_get_selected_gw_node(bat_priv); hdr = genlmsg_put(msg, portid, cb->nlh->nlmsg_seq, &batadv_netlink_family, NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS); if (!hdr) { ret = -ENOBUFS; goto out; } genl_dump_check_consistent(cb, hdr); ret = -EMSGSIZE; if (curr_gw == gw_node) if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) { genlmsg_cancel(msg, hdr); goto out; } if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, gw_node->orig_node->orig) || nla_put_u8(msg, BATADV_ATTR_TQ, router_ifinfo->bat_iv.tq_avg) || nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr) || nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, router->if_incoming->net_dev->name) || nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, router->if_incoming->net_dev->ifindex) || nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN, gw_node->bandwidth_down) || nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) { genlmsg_cancel(msg, hdr); goto out; } genlmsg_end(msg, hdr); ret = 0; out: batadv_gw_node_put(curr_gw); batadv_neigh_ifinfo_put(router_ifinfo); batadv_neigh_node_put(router); return ret; } /** * batadv_iv_gw_dump() - Dump gateways into a message * @msg: Netlink message to dump into * @cb: Control block containing additional options * @bat_priv: The bat priv with all the soft interface information */ static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, struct batadv_priv *bat_priv) { int portid = NETLINK_CB(cb->skb).portid; struct batadv_gw_node *gw_node; int idx_skip = cb->args[0]; int idx = 0; spin_lock_bh(&bat_priv->gw.list_lock); cb->seq = bat_priv->gw.generation << 1 | 1; hlist_for_each_entry(gw_node, &bat_priv->gw.gateway_list, list) { if (idx++ < idx_skip) continue; if (batadv_iv_gw_dump_entry(msg, portid, cb, bat_priv, gw_node)) { idx_skip = idx - 1; goto unlock; } } idx_skip = idx; unlock: spin_unlock_bh(&bat_priv->gw.list_lock); cb->args[0] = idx_skip; } static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", .iface = { .enable = batadv_iv_ogm_iface_enable, .enabled = batadv_iv_iface_enabled, .disable = batadv_iv_ogm_iface_disable, .update_mac = batadv_iv_ogm_iface_update_mac, .primary_set = batadv_iv_ogm_primary_iface_set, }, .neigh = { .cmp = batadv_iv_ogm_neigh_cmp, .is_similar_or_better = batadv_iv_ogm_neigh_is_sob, .dump = batadv_iv_ogm_neigh_dump, }, .orig = { .dump = batadv_iv_ogm_orig_dump, }, .gw = { .init_sel_class = batadv_iv_init_sel_class, .sel_class_max = BATADV_TQ_MAX_VALUE, .get_best_gw_node = batadv_iv_gw_get_best_gw_node, .is_eligible = batadv_iv_gw_is_eligible, .dump = batadv_iv_gw_dump, }, }; /** * batadv_iv_init() - B.A.T.M.A.N. IV initialization function * * Return: 0 on success or negative error number in case of failure */ int __init batadv_iv_init(void) { int ret; /* batman originator packet */ ret = batadv_recv_handler_register(BATADV_IV_OGM, batadv_iv_ogm_receive); if (ret < 0) goto out; ret = batadv_algo_register(&batadv_batman_iv); if (ret < 0) goto handler_unregister; goto out; handler_unregister: batadv_recv_handler_unregister(BATADV_IV_OGM); out: return ret; }
9 9 9 3 3 3 1 3 3 1 1 1 2 3 2 2 1 7 7 4 1 2 2 2 1 2 2 4 3 1 13 3 2 4 1 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 // SPDX-License-Identifier: GPL-2.0-or-later /* * taskstats.c - Export per-task statistics to userland * * Copyright (C) Shailabh Nagar, IBM Corp. 2006 * (C) Balbir Singh, IBM Corp. 2006 */ #include <linux/kernel.h> #include <linux/taskstats_kern.h> #include <linux/tsacct_kern.h> #include <linux/acct.h> #include <linux/delayacct.h> #include <linux/cpumask.h> #include <linux/percpu.h> #include <linux/slab.h> #include <linux/cgroupstats.h> #include <linux/cgroup.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pid_namespace.h> #include <net/genetlink.h> #include <linux/atomic.h> #include <linux/sched/cputime.h> /* * Maximum length of a cpumask that can be specified in * the TASKSTATS_CMD_ATTR_REGISTER/DEREGISTER_CPUMASK attribute */ #define TASKSTATS_CPUMASK_MAXLEN (100+6*NR_CPUS) static DEFINE_PER_CPU(__u32, taskstats_seqnum); static int family_registered; struct kmem_cache *taskstats_cache; static struct genl_family family; static const struct nla_policy taskstats_cmd_get_policy[] = { [TASKSTATS_CMD_ATTR_PID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_TGID] = { .type = NLA_U32 }, [TASKSTATS_CMD_ATTR_REGISTER_CPUMASK] = { .type = NLA_STRING }, [TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK] = { .type = NLA_STRING },}; static const struct nla_policy cgroupstats_cmd_get_policy[] = { [CGROUPSTATS_CMD_ATTR_FD] = { .type = NLA_U32 }, }; struct listener { struct list_head list; pid_t pid; char valid; }; struct listener_list { struct rw_semaphore sem; struct list_head list; }; static DEFINE_PER_CPU(struct listener_list, listener_array); enum actions { REGISTER, DEREGISTER, CPU_DONT_CARE }; static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, size_t size) { struct sk_buff *skb; void *reply; /* * If new attributes are added, please revisit this allocation */ skb = genlmsg_new(size, GFP_KERNEL); if (!skb) return -ENOMEM; if (!info) { int seq = this_cpu_inc_return(taskstats_seqnum) - 1; reply = genlmsg_put(skb, 0, seq, &family, 0, cmd); } else reply = genlmsg_put_reply(skb, info, &family, 0, cmd); if (reply == NULL) { nlmsg_free(skb); return -EINVAL; } *skbp = skb; return 0; } /* * Send taskstats data in @skb to listener with nl_pid @pid */ static int send_reply(struct sk_buff *skb, struct genl_info *info) { struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); void *reply = genlmsg_data(genlhdr); genlmsg_end(skb, reply); return genlmsg_reply(skb, info); } /* * Send taskstats data in @skb to listeners registered for @cpu's exit data */ static void send_cpu_listeners(struct sk_buff *skb, struct listener_list *listeners) { struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb)); struct listener *s, *tmp; struct sk_buff *skb_next, *skb_cur = skb; void *reply = genlmsg_data(genlhdr); int delcount = 0; genlmsg_end(skb, reply); down_read(&listeners->sem); list_for_each_entry(s, &listeners->list, list) { int rc; skb_next = NULL; if (!list_is_last(&s->list, &listeners->list)) { skb_next = skb_clone(skb_cur, GFP_KERNEL); if (!skb_next) break; } rc = genlmsg_unicast(&init_net, skb_cur, s->pid); if (rc == -ECONNREFUSED) { s->valid = 0; delcount++; } skb_cur = skb_next; } up_read(&listeners->sem); if (skb_cur) nlmsg_free(skb_cur); if (!delcount) return; /* Delete invalidated entries */ down_write(&listeners->sem); list_for_each_entry_safe(s, tmp, &listeners->list, list) { if (!s->valid) { list_del(&s->list); kfree(s); } } up_write(&listeners->sem); } static void exe_add_tsk(struct taskstats *stats, struct task_struct *tsk) { /* No idea if I'm allowed to access that here, now. */ struct file *exe_file = get_task_exe_file(tsk); if (exe_file) { /* Following cp_new_stat64() in stat.c . */ stats->ac_exe_dev = huge_encode_dev(exe_file->f_inode->i_sb->s_dev); stats->ac_exe_inode = exe_file->f_inode->i_ino; fput(exe_file); } else { stats->ac_exe_dev = 0; stats->ac_exe_inode = 0; } } static void fill_stats(struct user_namespace *user_ns, struct pid_namespace *pid_ns, struct task_struct *tsk, struct taskstats *stats) { memset(stats, 0, sizeof(*stats)); /* * Each accounting subsystem adds calls to its functions to * fill in relevant parts of struct taskstsats as follows * * per-task-foo(stats, tsk); */ delayacct_add_tsk(stats, tsk); /* fill in basic acct fields */ stats->version = TASKSTATS_VERSION; stats->nvcsw = tsk->nvcsw; stats->nivcsw = tsk->nivcsw; bacct_add_tsk(user_ns, pid_ns, stats, tsk); /* fill in extended acct fields */ xacct_add_tsk(stats, tsk); /* add executable info */ exe_add_tsk(stats, tsk); } static int fill_stats_for_pid(pid_t pid, struct taskstats *stats) { struct task_struct *tsk; tsk = find_get_task_by_vpid(pid); if (!tsk) return -ESRCH; fill_stats(current_user_ns(), task_active_pid_ns(current), tsk, stats); put_task_struct(tsk); return 0; } static int fill_stats_for_tgid(pid_t tgid, struct taskstats *stats) { struct task_struct *tsk, *first; unsigned long flags; int rc = -ESRCH; u64 delta, utime, stime; u64 start_time; /* * Add additional stats from live tasks except zombie thread group * leaders who are already counted with the dead tasks */ rcu_read_lock(); first = find_task_by_vpid(tgid); if (!first || !lock_task_sighand(first, &flags)) goto out; if (first->signal->stats) memcpy(stats, first->signal->stats, sizeof(*stats)); else memset(stats, 0, sizeof(*stats)); start_time = ktime_get_ns(); for_each_thread(first, tsk) { if (tsk->exit_state) continue; /* * Accounting subsystem can call its functions here to * fill in relevant parts of struct taskstsats as follows * * per-task-foo(stats, tsk); */ delayacct_add_tsk(stats, tsk); /* calculate task elapsed time in nsec */ delta = start_time - tsk->start_time; /* Convert to micro seconds */ do_div(delta, NSEC_PER_USEC); stats->ac_etime += delta; task_cputime(tsk, &utime, &stime); stats->ac_utime += div_u64(utime, NSEC_PER_USEC); stats->ac_stime += div_u64(stime, NSEC_PER_USEC); stats->nvcsw += tsk->nvcsw; stats->nivcsw += tsk->nivcsw; } unlock_task_sighand(first, &flags); rc = 0; out: rcu_read_unlock(); stats->version = TASKSTATS_VERSION; /* * Accounting subsystems can also add calls here to modify * fields of taskstats. */ return rc; } static void fill_tgid_exit(struct task_struct *tsk) { unsigned long flags; spin_lock_irqsave(&tsk->sighand->siglock, flags); if (!tsk->signal->stats) goto ret; /* * Each accounting subsystem calls its functions here to * accumalate its per-task stats for tsk, into the per-tgid structure * * per-task-foo(tsk->signal->stats, tsk); */ delayacct_add_tsk(tsk->signal->stats, tsk); ret: spin_unlock_irqrestore(&tsk->sighand->siglock, flags); return; } static int add_del_listener(pid_t pid, const struct cpumask *mask, int isadd) { struct listener_list *listeners; struct listener *s, *tmp, *s2; unsigned int cpu; int ret = 0; if (!cpumask_subset(mask, cpu_possible_mask)) return -EINVAL; if (current_user_ns() != &init_user_ns) return -EINVAL; if (task_active_pid_ns(current) != &init_pid_ns) return -EINVAL; if (isadd == REGISTER) { for_each_cpu(cpu, mask) { s = kmalloc_node(sizeof(struct listener), GFP_KERNEL, cpu_to_node(cpu)); if (!s) { ret = -ENOMEM; goto cleanup; } s->pid = pid; s->valid = 1; listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); list_for_each_entry(s2, &listeners->list, list) { if (s2->pid == pid && s2->valid) goto exists; } list_add(&s->list, &listeners->list); s = NULL; exists: up_write(&listeners->sem); kfree(s); /* nop if NULL */ } return 0; } /* Deregister or cleanup */ cleanup: for_each_cpu(cpu, mask) { listeners = &per_cpu(listener_array, cpu); down_write(&listeners->sem); list_for_each_entry_safe(s, tmp, &listeners->list, list) { if (s->pid == pid) { list_del(&s->list); kfree(s); break; } } up_write(&listeners->sem); } return ret; } static int parse(struct nlattr *na, struct cpumask *mask) { char *data; int len; int ret; if (na == NULL) return 1; len = nla_len(na); if (len > TASKSTATS_CPUMASK_MAXLEN) return -E2BIG; if (len < 1) return -EINVAL; data = kmalloc(len, GFP_KERNEL); if (!data) return -ENOMEM; nla_strscpy(data, na, len); ret = cpulist_parse(data, mask); kfree(data); return ret; } static struct taskstats *mk_reply(struct sk_buff *skb, int type, u32 pid) { struct nlattr *na, *ret; int aggr; aggr = (type == TASKSTATS_TYPE_PID) ? TASKSTATS_TYPE_AGGR_PID : TASKSTATS_TYPE_AGGR_TGID; na = nla_nest_start_noflag(skb, aggr); if (!na) goto err; if (nla_put(skb, type, sizeof(pid), &pid) < 0) { nla_nest_cancel(skb, na); goto err; } ret = nla_reserve_64bit(skb, TASKSTATS_TYPE_STATS, sizeof(struct taskstats), TASKSTATS_TYPE_NULL); if (!ret) { nla_nest_cancel(skb, na); goto err; } nla_nest_end(skb, na); return nla_data(ret); err: return NULL; } static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info) { int rc = 0; struct sk_buff *rep_skb; struct cgroupstats *stats; struct nlattr *na; size_t size; u32 fd; struct fd f; na = info->attrs[CGROUPSTATS_CMD_ATTR_FD]; if (!na) return -EINVAL; fd = nla_get_u32(info->attrs[CGROUPSTATS_CMD_ATTR_FD]); f = fdget(fd); if (!f.file) return 0; size = nla_total_size(sizeof(struct cgroupstats)); rc = prepare_reply(info, CGROUPSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) goto err; na = nla_reserve(rep_skb, CGROUPSTATS_TYPE_CGROUP_STATS, sizeof(struct cgroupstats)); if (na == NULL) { nlmsg_free(rep_skb); rc = -EMSGSIZE; goto err; } stats = nla_data(na); memset(stats, 0, sizeof(*stats)); rc = cgroupstats_build(stats, f.file->f_path.dentry); if (rc < 0) { nlmsg_free(rep_skb); goto err; } rc = send_reply(rep_skb, info); err: fdput(f); return rc; } static int cmd_attr_register_cpumask(struct genl_info *info) { cpumask_var_t mask; int rc; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); if (rc < 0) goto out; rc = add_del_listener(info->snd_portid, mask, REGISTER); out: free_cpumask_var(mask); return rc; } static int cmd_attr_deregister_cpumask(struct genl_info *info) { cpumask_var_t mask; int rc; if (!alloc_cpumask_var(&mask, GFP_KERNEL)) return -ENOMEM; rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); if (rc < 0) goto out; rc = add_del_listener(info->snd_portid, mask, DEREGISTER); out: free_cpumask_var(mask); return rc; } static size_t taskstats_packet_size(void) { size_t size; size = nla_total_size(sizeof(u32)) + nla_total_size_64bit(sizeof(struct taskstats)) + nla_total_size(0); return size; } static int cmd_attr_pid(struct genl_info *info) { struct taskstats *stats; struct sk_buff *rep_skb; size_t size; u32 pid; int rc; size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return rc; rc = -EINVAL; pid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_PID]); stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, pid); if (!stats) goto err; rc = fill_stats_for_pid(pid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); err: nlmsg_free(rep_skb); return rc; } static int cmd_attr_tgid(struct genl_info *info) { struct taskstats *stats; struct sk_buff *rep_skb; size_t size; u32 tgid; int rc; size = taskstats_packet_size(); rc = prepare_reply(info, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return rc; rc = -EINVAL; tgid = nla_get_u32(info->attrs[TASKSTATS_CMD_ATTR_TGID]); stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, tgid); if (!stats) goto err; rc = fill_stats_for_tgid(tgid, stats); if (rc < 0) goto err; return send_reply(rep_skb, info); err: nlmsg_free(rep_skb); return rc; } static int taskstats_user_cmd(struct sk_buff *skb, struct genl_info *info) { if (info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK]) return cmd_attr_register_cpumask(info); else if (info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK]) return cmd_attr_deregister_cpumask(info); else if (info->attrs[TASKSTATS_CMD_ATTR_PID]) return cmd_attr_pid(info); else if (info->attrs[TASKSTATS_CMD_ATTR_TGID]) return cmd_attr_tgid(info); else return -EINVAL; } static struct taskstats *taskstats_tgid_alloc(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; struct taskstats *stats_new, *stats; /* Pairs with smp_store_release() below. */ stats = smp_load_acquire(&sig->stats); if (stats || thread_group_empty(tsk)) return stats; /* No problem if kmem_cache_zalloc() fails */ stats_new = kmem_cache_zalloc(taskstats_cache, GFP_KERNEL); spin_lock_irq(&tsk->sighand->siglock); stats = sig->stats; if (!stats) { /* * Pairs with smp_store_release() above and order the * kmem_cache_zalloc(). */ smp_store_release(&sig->stats, stats_new); stats = stats_new; stats_new = NULL; } spin_unlock_irq(&tsk->sighand->siglock); if (stats_new) kmem_cache_free(taskstats_cache, stats_new); return stats; } /* Send pid data out on exit */ void taskstats_exit(struct task_struct *tsk, int group_dead) { int rc; struct listener_list *listeners; struct taskstats *stats; struct sk_buff *rep_skb; size_t size; int is_thread_group; if (!family_registered) return; /* * Size includes space for nested attributes */ size = taskstats_packet_size(); is_thread_group = !!taskstats_tgid_alloc(tsk); if (is_thread_group) { /* PID + STATS + TGID + STATS */ size = 2 * size; /* fill the tsk->signal->stats structure */ fill_tgid_exit(tsk); } listeners = raw_cpu_ptr(&listener_array); if (list_empty(&listeners->list)) return; rc = prepare_reply(NULL, TASKSTATS_CMD_NEW, &rep_skb, size); if (rc < 0) return; stats = mk_reply(rep_skb, TASKSTATS_TYPE_PID, task_pid_nr_ns(tsk, &init_pid_ns)); if (!stats) goto err; fill_stats(&init_user_ns, &init_pid_ns, tsk, stats); if (group_dead) stats->ac_flag |= AGROUP; /* * Doesn't matter if tsk is the leader or the last group member leaving */ if (!is_thread_group || !group_dead) goto send; stats = mk_reply(rep_skb, TASKSTATS_TYPE_TGID, task_tgid_nr_ns(tsk, &init_pid_ns)); if (!stats) goto err; memcpy(stats, tsk->signal->stats, sizeof(*stats)); send: send_cpu_listeners(rep_skb, listeners); return; err: nlmsg_free(rep_skb); } static const struct genl_ops taskstats_ops[] = { { .cmd = TASKSTATS_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = taskstats_user_cmd, .policy = taskstats_cmd_get_policy, .maxattr = ARRAY_SIZE(taskstats_cmd_get_policy) - 1, .flags = GENL_ADMIN_PERM, }, { .cmd = CGROUPSTATS_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = cgroupstats_user_cmd, .policy = cgroupstats_cmd_get_policy, .maxattr = ARRAY_SIZE(cgroupstats_cmd_get_policy) - 1, }, }; static struct genl_family family __ro_after_init = { .name = TASKSTATS_GENL_NAME, .version = TASKSTATS_GENL_VERSION, .module = THIS_MODULE, .ops = taskstats_ops, .n_ops = ARRAY_SIZE(taskstats_ops), .resv_start_op = CGROUPSTATS_CMD_GET + 1, .netnsok = true, }; /* Needed early in initialization */ void __init taskstats_init_early(void) { unsigned int i; taskstats_cache = KMEM_CACHE(taskstats, SLAB_PANIC); for_each_possible_cpu(i) { INIT_LIST_HEAD(&(per_cpu(listener_array, i).list)); init_rwsem(&(per_cpu(listener_array, i).sem)); } } static int __init taskstats_init(void) { int rc; rc = genl_register_family(&family); if (rc) return rc; family_registered = 1; pr_info("registered taskstats version %d\n", TASKSTATS_GENL_VERSION); return 0; } /* * late initcall ensures initialization of statistics collection * mechanisms precedes initialization of the taskstats interface */ late_initcall(taskstats_init);
245 4 4 39 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 // SPDX-License-Identifier: MIT #include <linux/fb.h> #include <drm/drm_crtc_helper.h> #include <drm/drm_drv.h> #include <drm/drm_fb_helper.h> #include <drm/drm_framebuffer.h> #include <drm/drm_gem_framebuffer_helper.h> #include <drm/drm_gem_shmem_helper.h> #include <drm/drm_fbdev_shmem.h> /* * struct fb_ops */ static int drm_fbdev_shmem_fb_open(struct fb_info *info, int user) { struct drm_fb_helper *fb_helper = info->par; /* No need to take a ref for fbcon because it unbinds on unregister */ if (user && !try_module_get(fb_helper->dev->driver->fops->owner)) return -ENODEV; return 0; } static int drm_fbdev_shmem_fb_release(struct fb_info *info, int user) { struct drm_fb_helper *fb_helper = info->par; if (user) module_put(fb_helper->dev->driver->fops->owner); return 0; } FB_GEN_DEFAULT_DEFERRED_SYSMEM_OPS(drm_fbdev_shmem, drm_fb_helper_damage_range, drm_fb_helper_damage_area); static int drm_fbdev_shmem_fb_mmap(struct fb_info *info, struct vm_area_struct *vma) { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; struct drm_gem_object *obj = drm_gem_fb_get_obj(fb, 0); struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); if (shmem->map_wc) vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); return fb_deferred_io_mmap(info, vma); } static void drm_fbdev_shmem_fb_destroy(struct fb_info *info) { struct drm_fb_helper *fb_helper = info->par; if (!fb_helper->dev) return; fb_deferred_io_cleanup(info); drm_fb_helper_fini(fb_helper); drm_client_buffer_vunmap(fb_helper->buffer); drm_client_framebuffer_delete(fb_helper->buffer); drm_client_release(&fb_helper->client); drm_fb_helper_unprepare(fb_helper); kfree(fb_helper); } static const struct fb_ops drm_fbdev_shmem_fb_ops = { .owner = THIS_MODULE, .fb_open = drm_fbdev_shmem_fb_open, .fb_release = drm_fbdev_shmem_fb_release, __FB_DEFAULT_DEFERRED_OPS_RDWR(drm_fbdev_shmem), DRM_FB_HELPER_DEFAULT_OPS, __FB_DEFAULT_DEFERRED_OPS_DRAW(drm_fbdev_shmem), .fb_mmap = drm_fbdev_shmem_fb_mmap, .fb_destroy = drm_fbdev_shmem_fb_destroy, }; static struct page *drm_fbdev_shmem_get_page(struct fb_info *info, unsigned long offset) { struct drm_fb_helper *fb_helper = info->par; struct drm_framebuffer *fb = fb_helper->fb; struct drm_gem_object *obj = drm_gem_fb_get_obj(fb, 0); struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); unsigned int i = offset >> PAGE_SHIFT; struct page *page; if (fb_WARN_ON_ONCE(info, offset > obj->size)) return NULL; page = shmem->pages[i]; // protected by active vmap if (page) get_page(page); fb_WARN_ON_ONCE(info, !page); return page; } /* * struct drm_fb_helper */ static int drm_fbdev_shmem_helper_fb_probe(struct drm_fb_helper *fb_helper, struct drm_fb_helper_surface_size *sizes) { struct drm_client_dev *client = &fb_helper->client; struct drm_device *dev = fb_helper->dev; struct drm_client_buffer *buffer; struct drm_gem_shmem_object *shmem; struct drm_framebuffer *fb; struct fb_info *info; u32 format; struct iosys_map map; int ret; drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n", sizes->surface_width, sizes->surface_height, sizes->surface_bpp); format = drm_driver_legacy_fb_format(dev, sizes->surface_bpp, sizes->surface_depth); buffer = drm_client_framebuffer_create(client, sizes->surface_width, sizes->surface_height, format); if (IS_ERR(buffer)) return PTR_ERR(buffer); shmem = to_drm_gem_shmem_obj(buffer->gem); fb = buffer->fb; ret = drm_client_buffer_vmap(buffer, &map); if (ret) { goto err_drm_client_buffer_delete; } else if (drm_WARN_ON(dev, map.is_iomem)) { ret = -ENODEV; /* I/O memory not supported; use generic emulation */ goto err_drm_client_buffer_delete; } fb_helper->buffer = buffer; fb_helper->fb = fb; info = drm_fb_helper_alloc_info(fb_helper); if (IS_ERR(info)) { ret = PTR_ERR(info); goto err_drm_client_buffer_vunmap; } drm_fb_helper_fill_info(info, fb_helper, sizes); info->fbops = &drm_fbdev_shmem_fb_ops; /* screen */ info->flags |= FBINFO_VIRTFB; /* system memory */ if (!shmem->map_wc) info->flags |= FBINFO_READS_FAST; /* signal caching */ info->screen_size = sizes->surface_height * fb->pitches[0]; info->screen_buffer = map.vaddr; info->fix.smem_len = info->screen_size; /* deferred I/O */ fb_helper->fbdefio.delay = HZ / 20; fb_helper->fbdefio.get_page = drm_fbdev_shmem_get_page; fb_helper->fbdefio.deferred_io = drm_fb_helper_deferred_io; info->fbdefio = &fb_helper->fbdefio; ret = fb_deferred_io_init(info); if (ret) goto err_drm_fb_helper_release_info; return 0; err_drm_fb_helper_release_info: drm_fb_helper_release_info(fb_helper); err_drm_client_buffer_vunmap: fb_helper->fb = NULL; fb_helper->buffer = NULL; drm_client_buffer_vunmap(buffer); err_drm_client_buffer_delete: drm_client_framebuffer_delete(buffer); return ret; } static int drm_fbdev_shmem_helper_fb_dirty(struct drm_fb_helper *helper, struct drm_clip_rect *clip) { struct drm_device *dev = helper->dev; int ret; /* Call damage handlers only if necessary */ if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2)) return 0; if (helper->fb->funcs->dirty) { ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1); if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret)) return ret; } return 0; } static const struct drm_fb_helper_funcs drm_fbdev_shmem_helper_funcs = { .fb_probe = drm_fbdev_shmem_helper_fb_probe, .fb_dirty = drm_fbdev_shmem_helper_fb_dirty, }; /* * struct drm_client_funcs */ static void drm_fbdev_shmem_client_unregister(struct drm_client_dev *client) { struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); if (fb_helper->info) { drm_fb_helper_unregister_info(fb_helper); } else { drm_client_release(&fb_helper->client); drm_fb_helper_unprepare(fb_helper); kfree(fb_helper); } } static int drm_fbdev_shmem_client_restore(struct drm_client_dev *client) { drm_fb_helper_lastclose(client->dev); return 0; } static int drm_fbdev_shmem_client_hotplug(struct drm_client_dev *client) { struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client); struct drm_device *dev = client->dev; int ret; if (dev->fb_helper) return drm_fb_helper_hotplug_event(dev->fb_helper); ret = drm_fb_helper_init(dev, fb_helper); if (ret) goto err_drm_err; if (!drm_drv_uses_atomic_modeset(dev)) drm_helper_disable_unused_functions(dev); ret = drm_fb_helper_initial_config(fb_helper); if (ret) goto err_drm_fb_helper_fini; return 0; err_drm_fb_helper_fini: drm_fb_helper_fini(fb_helper); err_drm_err: drm_err(dev, "fbdev-shmem: Failed to setup emulation (ret=%d)\n", ret); return ret; } static const struct drm_client_funcs drm_fbdev_shmem_client_funcs = { .owner = THIS_MODULE, .unregister = drm_fbdev_shmem_client_unregister, .restore = drm_fbdev_shmem_client_restore, .hotplug = drm_fbdev_shmem_client_hotplug, }; /** * drm_fbdev_shmem_setup() - Setup fbdev emulation for GEM SHMEM helpers * @dev: DRM device * @preferred_bpp: Preferred bits per pixel for the device. * 32 is used if this is zero. * * This function sets up fbdev emulation for GEM DMA drivers that support * dumb buffers with a virtual address and that can be mmap'ed. * drm_fbdev_shmem_setup() shall be called after the DRM driver registered * the new DRM device with drm_dev_register(). * * Restore, hotplug events and teardown are all taken care of. Drivers that do * suspend/resume need to call drm_fb_helper_set_suspend_unlocked() themselves. * Simple drivers might use drm_mode_config_helper_suspend(). * * This function is safe to call even when there are no connectors present. * Setup will be retried on the next hotplug event. * * The fbdev is destroyed by drm_dev_unregister(). */ void drm_fbdev_shmem_setup(struct drm_device *dev, unsigned int preferred_bpp) { struct drm_fb_helper *fb_helper; int ret; drm_WARN(dev, !dev->registered, "Device has not been registered.\n"); drm_WARN(dev, dev->fb_helper, "fb_helper is already set!\n"); fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL); if (!fb_helper) return; drm_fb_helper_prepare(dev, fb_helper, preferred_bpp, &drm_fbdev_shmem_helper_funcs); ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_shmem_client_funcs); if (ret) { drm_err(dev, "Failed to register client: %d\n", ret); goto err_drm_client_init; } drm_client_register(&fb_helper->client); return; err_drm_client_init: drm_fb_helper_unprepare(fb_helper); kfree(fb_helper); } EXPORT_SYMBOL(drm_fbdev_shmem_setup);
4 15 2493 2493 2493 2 1 7 1 3 2 1 5 1259 1390 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 // SPDX-License-Identifier: GPL-2.0-only /* * Landlock LSM - Network management and hooks * * Copyright © 2022-2023 Huawei Tech. Co., Ltd. * Copyright © 2022-2023 Microsoft Corporation */ #include <linux/in.h> #include <linux/net.h> #include <linux/socket.h> #include <net/ipv6.h> #include "common.h" #include "cred.h" #include "limits.h" #include "net.h" #include "ruleset.h" int landlock_append_net_rule(struct landlock_ruleset *const ruleset, const u16 port, access_mask_t access_rights) { int err; const struct landlock_id id = { .key.data = (__force uintptr_t)htons(port), .type = LANDLOCK_KEY_NET_PORT, }; BUILD_BUG_ON(sizeof(port) > sizeof(id.key.data)); /* Transforms relative access rights to absolute ones. */ access_rights |= LANDLOCK_MASK_ACCESS_NET & ~landlock_get_net_access_mask(ruleset, 0); mutex_lock(&ruleset->lock); err = landlock_insert_rule(ruleset, id, access_rights); mutex_unlock(&ruleset->lock); return err; } static access_mask_t get_raw_handled_net_accesses(const struct landlock_ruleset *const domain) { access_mask_t access_dom = 0; size_t layer_level; for (layer_level = 0; layer_level < domain->num_layers; layer_level++) access_dom |= landlock_get_net_access_mask(domain, layer_level); return access_dom; } static const struct landlock_ruleset *get_current_net_domain(void) { const struct landlock_ruleset *const dom = landlock_get_current_domain(); if (!dom || !get_raw_handled_net_accesses(dom)) return NULL; return dom; } static int current_check_access_socket(struct socket *const sock, struct sockaddr *const address, const int addrlen, access_mask_t access_request) { __be16 port; layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_NET] = {}; const struct landlock_rule *rule; struct landlock_id id = { .type = LANDLOCK_KEY_NET_PORT, }; const struct landlock_ruleset *const dom = get_current_net_domain(); if (!dom) return 0; if (WARN_ON_ONCE(dom->num_layers < 1)) return -EACCES; /* Checks if it's a (potential) TCP socket. */ if (sock->type != SOCK_STREAM) return 0; /* Checks for minimal header length to safely read sa_family. */ if (addrlen < offsetofend(typeof(*address), sa_family)) return -EINVAL; switch (address->sa_family) { case AF_UNSPEC: case AF_INET: if (addrlen < sizeof(struct sockaddr_in)) return -EINVAL; port = ((struct sockaddr_in *)address)->sin_port; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: if (addrlen < SIN6_LEN_RFC2133) return -EINVAL; port = ((struct sockaddr_in6 *)address)->sin6_port; break; #endif /* IS_ENABLED(CONFIG_IPV6) */ default: return 0; } /* Specific AF_UNSPEC handling. */ if (address->sa_family == AF_UNSPEC) { /* * Connecting to an address with AF_UNSPEC dissolves the TCP * association, which have the same effect as closing the * connection while retaining the socket object (i.e., the file * descriptor). As for dropping privileges, closing * connections is always allowed. * * For a TCP access control system, this request is legitimate. * Let the network stack handle potential inconsistencies and * return -EINVAL if needed. */ if (access_request == LANDLOCK_ACCESS_NET_CONNECT_TCP) return 0; /* * For compatibility reason, accept AF_UNSPEC for bind * accesses (mapped to AF_INET) only if the address is * INADDR_ANY (cf. __inet_bind). Checking the address is * required to not wrongfully return -EACCES instead of * -EAFNOSUPPORT. * * We could return 0 and let the network stack handle these * checks, but it is safer to return a proper error and test * consistency thanks to kselftest. */ if (access_request == LANDLOCK_ACCESS_NET_BIND_TCP) { /* addrlen has already been checked for AF_UNSPEC. */ const struct sockaddr_in *const sockaddr = (struct sockaddr_in *)address; if (sock->sk->__sk_common.skc_family != AF_INET) return -EINVAL; if (sockaddr->sin_addr.s_addr != htonl(INADDR_ANY)) return -EAFNOSUPPORT; } } else { /* * Checks sa_family consistency to not wrongfully return * -EACCES instead of -EINVAL. Valid sa_family changes are * only (from AF_INET or AF_INET6) to AF_UNSPEC. * * We could return 0 and let the network stack handle this * check, but it is safer to return a proper error and test * consistency thanks to kselftest. */ if (address->sa_family != sock->sk->__sk_common.skc_family) return -EINVAL; } id.key.data = (__force uintptr_t)port; BUILD_BUG_ON(sizeof(port) > sizeof(id.key.data)); rule = landlock_find_rule(dom, id); access_request = landlock_init_layer_masks( dom, access_request, &layer_masks, LANDLOCK_KEY_NET_PORT); if (landlock_unmask_layers(rule, access_request, &layer_masks, ARRAY_SIZE(layer_masks))) return 0; return -EACCES; } static int hook_socket_bind(struct socket *const sock, struct sockaddr *const address, const int addrlen) { return current_check_access_socket(sock, address, addrlen, LANDLOCK_ACCESS_NET_BIND_TCP); } static int hook_socket_connect(struct socket *const sock, struct sockaddr *const address, const int addrlen) { return current_check_access_socket(sock, address, addrlen, LANDLOCK_ACCESS_NET_CONNECT_TCP); } static struct security_hook_list landlock_hooks[] __ro_after_init = { LSM_HOOK_INIT(socket_bind, hook_socket_bind), LSM_HOOK_INIT(socket_connect, hook_socket_connect), }; __init void landlock_add_net_hooks(void) { security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), &landlock_lsmid); }
3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 // SPDX-License-Identifier: GPL-2.0+ /* * MCT (Magic Control Technology Corp.) USB RS232 Converter Driver * * Copyright (C) 2000 Wolfgang Grandegger (wolfgang@ces.ch) * * This program is largely derived from the Belkin USB Serial Adapter Driver * (see belkin_sa.[ch]). All of the information about the device was acquired * by using SniffUSB on Windows98. For technical details see mct_u232.h. * * William G. Greathouse and Greg Kroah-Hartman provided great help on how to * do the reverse engineering and how to write a USB serial device driver. * * TO BE DONE, TO BE CHECKED: * DTR/RTS signal handling may be incomplete or incorrect. I have mainly * implemented what I have seen with SniffUSB or found in belkin_sa.c. * For further TODOs check also belkin_sa.c. */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/uaccess.h> #include <asm/unaligned.h> #include <linux/usb.h> #include <linux/usb/serial.h> #include <linux/serial.h> #include "mct_u232.h" #define DRIVER_AUTHOR "Wolfgang Grandegger <wolfgang@ces.ch>" #define DRIVER_DESC "Magic Control Technology USB-RS232 converter driver" /* * Function prototypes */ static int mct_u232_port_probe(struct usb_serial_port *port); static void mct_u232_port_remove(struct usb_serial_port *remove); static int mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port); static void mct_u232_close(struct usb_serial_port *port); static void mct_u232_dtr_rts(struct usb_serial_port *port, int on); static void mct_u232_read_int_callback(struct urb *urb); static void mct_u232_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios); static int mct_u232_break_ctl(struct tty_struct *tty, int break_state); static int mct_u232_tiocmget(struct tty_struct *tty); static int mct_u232_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear); static void mct_u232_throttle(struct tty_struct *tty); static void mct_u232_unthrottle(struct tty_struct *tty); /* * All of the device info needed for the MCT USB-RS232 converter. */ static const struct usb_device_id id_table[] = { { USB_DEVICE(MCT_U232_VID, MCT_U232_PID) }, { USB_DEVICE(MCT_U232_VID, MCT_U232_SITECOM_PID) }, { USB_DEVICE(MCT_U232_VID, MCT_U232_DU_H3SP_PID) }, { USB_DEVICE(MCT_U232_BELKIN_F5U109_VID, MCT_U232_BELKIN_F5U109_PID) }, { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, id_table); static struct usb_serial_driver mct_u232_device = { .driver = { .owner = THIS_MODULE, .name = "mct_u232", }, .description = "MCT U232", .id_table = id_table, .num_ports = 1, .open = mct_u232_open, .close = mct_u232_close, .dtr_rts = mct_u232_dtr_rts, .throttle = mct_u232_throttle, .unthrottle = mct_u232_unthrottle, .read_int_callback = mct_u232_read_int_callback, .set_termios = mct_u232_set_termios, .break_ctl = mct_u232_break_ctl, .tiocmget = mct_u232_tiocmget, .tiocmset = mct_u232_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .port_probe = mct_u232_port_probe, .port_remove = mct_u232_port_remove, .get_icount = usb_serial_generic_get_icount, }; static struct usb_serial_driver * const serial_drivers[] = { &mct_u232_device, NULL }; struct mct_u232_private { struct urb *read_urb; spinlock_t lock; unsigned int control_state; /* Modem Line Setting (TIOCM) */ unsigned char last_lcr; /* Line Control Register */ unsigned char last_lsr; /* Line Status Register */ unsigned char last_msr; /* Modem Status Register */ unsigned int rx_flags; /* Throttling flags */ }; #define THROTTLED 0x01 /* * Handle vendor specific USB requests */ #define WDR_TIMEOUT 5000 /* default urb timeout */ /* * Later day 2.6.0-test kernels have new baud rates like B230400 which * we do not know how to support. We ignore them for the moment. */ static int mct_u232_calculate_baud_rate(struct usb_serial *serial, speed_t value, speed_t *result) { *result = value; if (le16_to_cpu(serial->dev->descriptor.idProduct) == MCT_U232_SITECOM_PID || le16_to_cpu(serial->dev->descriptor.idProduct) == MCT_U232_BELKIN_F5U109_PID) { switch (value) { case 300: return 0x01; case 600: return 0x02; /* this one not tested */ case 1200: return 0x03; case 2400: return 0x04; case 4800: return 0x06; case 9600: return 0x08; case 19200: return 0x09; case 38400: return 0x0a; case 57600: return 0x0b; case 115200: return 0x0c; default: *result = 9600; return 0x08; } } else { /* FIXME: Can we use any divider - should we do divider = 115200/value; real baud = 115200/divider */ switch (value) { case 300: break; case 600: break; case 1200: break; case 2400: break; case 4800: break; case 9600: break; case 19200: break; case 38400: break; case 57600: break; case 115200: break; default: value = 9600; *result = 9600; } return 115200/value; } } static int mct_u232_set_baud_rate(struct tty_struct *tty, struct usb_serial *serial, struct usb_serial_port *port, speed_t value) { unsigned int divisor; int rc; unsigned char *buf; unsigned char cts_enable_byte = 0; speed_t speed; buf = kmalloc(MCT_U232_MAX_SIZE, GFP_KERNEL); if (buf == NULL) return -ENOMEM; divisor = mct_u232_calculate_baud_rate(serial, value, &speed); put_unaligned_le32(divisor, buf); rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), MCT_U232_SET_BAUD_RATE_REQUEST, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_BAUD_RATE_SIZE, WDR_TIMEOUT); if (rc < 0) /*FIXME: What value speed results */ dev_err(&port->dev, "Set BAUD RATE %d failed (error = %d)\n", value, rc); else tty_encode_baud_rate(tty, speed, speed); dev_dbg(&port->dev, "set_baud_rate: value: 0x%x, divisor: 0x%x\n", value, divisor); /* Mimic the MCT-supplied Windows driver (version 1.21P.0104), which always sends two extra USB 'device request' messages after the 'baud rate change' message. The actual functionality of the request codes in these messages is not fully understood but these particular codes are never seen in any operation besides a baud rate change. Both of these messages send a single byte of data. In the first message, the value of this byte is always zero. The second message has been determined experimentally to control whether data will be transmitted to a device which is not asserting the 'CTS' signal. If the second message's data byte is zero, data will be transmitted even if 'CTS' is not asserted (i.e. no hardware flow control). if the second message's data byte is nonzero (a value of 1 is used by this driver), data will not be transmitted to a device which is not asserting 'CTS'. */ buf[0] = 0; rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), MCT_U232_SET_UNKNOWN1_REQUEST, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_UNKNOWN1_SIZE, WDR_TIMEOUT); if (rc < 0) dev_err(&port->dev, "Sending USB device request code %d " "failed (error = %d)\n", MCT_U232_SET_UNKNOWN1_REQUEST, rc); if (port && C_CRTSCTS(tty)) cts_enable_byte = 1; dev_dbg(&port->dev, "set_baud_rate: send second control message, data = %02X\n", cts_enable_byte); buf[0] = cts_enable_byte; rc = usb_control_msg(serial->dev, usb_sndctrlpipe(serial->dev, 0), MCT_U232_SET_CTS_REQUEST, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_CTS_SIZE, WDR_TIMEOUT); if (rc < 0) dev_err(&port->dev, "Sending USB device request code %d " "failed (error = %d)\n", MCT_U232_SET_CTS_REQUEST, rc); kfree(buf); return rc; } /* mct_u232_set_baud_rate */ static int mct_u232_set_line_ctrl(struct usb_serial_port *port, unsigned char lcr) { int rc; unsigned char *buf; buf = kmalloc(MCT_U232_MAX_SIZE, GFP_KERNEL); if (buf == NULL) return -ENOMEM; buf[0] = lcr; rc = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), MCT_U232_SET_LINE_CTRL_REQUEST, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_LINE_CTRL_SIZE, WDR_TIMEOUT); if (rc < 0) dev_err(&port->dev, "Set LINE CTRL 0x%x failed (error = %d)\n", lcr, rc); dev_dbg(&port->dev, "set_line_ctrl: 0x%x\n", lcr); kfree(buf); return rc; } /* mct_u232_set_line_ctrl */ static int mct_u232_set_modem_ctrl(struct usb_serial_port *port, unsigned int control_state) { int rc; unsigned char mcr; unsigned char *buf; buf = kmalloc(MCT_U232_MAX_SIZE, GFP_KERNEL); if (buf == NULL) return -ENOMEM; mcr = MCT_U232_MCR_NONE; if (control_state & TIOCM_DTR) mcr |= MCT_U232_MCR_DTR; if (control_state & TIOCM_RTS) mcr |= MCT_U232_MCR_RTS; buf[0] = mcr; rc = usb_control_msg(port->serial->dev, usb_sndctrlpipe(port->serial->dev, 0), MCT_U232_SET_MODEM_CTRL_REQUEST, MCT_U232_SET_REQUEST_TYPE, 0, 0, buf, MCT_U232_SET_MODEM_CTRL_SIZE, WDR_TIMEOUT); kfree(buf); dev_dbg(&port->dev, "set_modem_ctrl: state=0x%x ==> mcr=0x%x\n", control_state, mcr); if (rc < 0) { dev_err(&port->dev, "Set MODEM CTRL 0x%x failed (error = %d)\n", mcr, rc); return rc; } return 0; } /* mct_u232_set_modem_ctrl */ static int mct_u232_get_modem_stat(struct usb_serial_port *port, unsigned char *msr) { int rc; unsigned char *buf; buf = kmalloc(MCT_U232_MAX_SIZE, GFP_KERNEL); if (buf == NULL) { *msr = 0; return -ENOMEM; } rc = usb_control_msg(port->serial->dev, usb_rcvctrlpipe(port->serial->dev, 0), MCT_U232_GET_MODEM_STAT_REQUEST, MCT_U232_GET_REQUEST_TYPE, 0, 0, buf, MCT_U232_GET_MODEM_STAT_SIZE, WDR_TIMEOUT); if (rc < MCT_U232_GET_MODEM_STAT_SIZE) { dev_err(&port->dev, "Get MODEM STATus failed (error = %d)\n", rc); if (rc >= 0) rc = -EIO; *msr = 0; } else { *msr = buf[0]; } dev_dbg(&port->dev, "get_modem_stat: 0x%x\n", *msr); kfree(buf); return rc; } /* mct_u232_get_modem_stat */ static void mct_u232_msr_to_icount(struct async_icount *icount, unsigned char msr) { /* Translate Control Line states */ if (msr & MCT_U232_MSR_DDSR) icount->dsr++; if (msr & MCT_U232_MSR_DCTS) icount->cts++; if (msr & MCT_U232_MSR_DRI) icount->rng++; if (msr & MCT_U232_MSR_DCD) icount->dcd++; } /* mct_u232_msr_to_icount */ static void mct_u232_msr_to_state(struct usb_serial_port *port, unsigned int *control_state, unsigned char msr) { /* Translate Control Line states */ if (msr & MCT_U232_MSR_DSR) *control_state |= TIOCM_DSR; else *control_state &= ~TIOCM_DSR; if (msr & MCT_U232_MSR_CTS) *control_state |= TIOCM_CTS; else *control_state &= ~TIOCM_CTS; if (msr & MCT_U232_MSR_RI) *control_state |= TIOCM_RI; else *control_state &= ~TIOCM_RI; if (msr & MCT_U232_MSR_CD) *control_state |= TIOCM_CD; else *control_state &= ~TIOCM_CD; dev_dbg(&port->dev, "msr_to_state: msr=0x%x ==> state=0x%x\n", msr, *control_state); } /* mct_u232_msr_to_state */ /* * Driver's tty interface functions */ static int mct_u232_port_probe(struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct mct_u232_private *priv; /* check first to simplify error handling */ if (!serial->port[1] || !serial->port[1]->interrupt_in_urb) { dev_err(&port->dev, "expected endpoint missing\n"); return -ENODEV; } priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; /* Use second interrupt-in endpoint for reading. */ priv->read_urb = serial->port[1]->interrupt_in_urb; priv->read_urb->context = port; spin_lock_init(&priv->lock); usb_set_serial_port_data(port, priv); return 0; } static void mct_u232_port_remove(struct usb_serial_port *port) { struct mct_u232_private *priv; priv = usb_get_serial_port_data(port); kfree(priv); } static int mct_u232_open(struct tty_struct *tty, struct usb_serial_port *port) { struct usb_serial *serial = port->serial; struct mct_u232_private *priv = usb_get_serial_port_data(port); int retval = 0; unsigned int control_state; unsigned long flags; unsigned char last_lcr; unsigned char last_msr; /* Compensate for a hardware bug: although the Sitecom U232-P25 * device reports a maximum output packet size of 32 bytes, * it seems to be able to accept only 16 bytes (and that's what * SniffUSB says too...) */ if (le16_to_cpu(serial->dev->descriptor.idProduct) == MCT_U232_SITECOM_PID) port->bulk_out_size = 16; /* Do a defined restart: the normal serial device seems to * always turn on DTR and RTS here, so do the same. I'm not * sure if this is really necessary. But it should not harm * either. */ spin_lock_irqsave(&priv->lock, flags); if (tty && C_BAUD(tty)) priv->control_state = TIOCM_DTR | TIOCM_RTS; else priv->control_state = 0; priv->last_lcr = (MCT_U232_DATA_BITS_8 | MCT_U232_PARITY_NONE | MCT_U232_STOP_BITS_1); control_state = priv->control_state; last_lcr = priv->last_lcr; spin_unlock_irqrestore(&priv->lock, flags); mct_u232_set_modem_ctrl(port, control_state); mct_u232_set_line_ctrl(port, last_lcr); /* Read modem status and update control state */ mct_u232_get_modem_stat(port, &last_msr); spin_lock_irqsave(&priv->lock, flags); priv->last_msr = last_msr; mct_u232_msr_to_state(port, &priv->control_state, priv->last_msr); spin_unlock_irqrestore(&priv->lock, flags); retval = usb_submit_urb(priv->read_urb, GFP_KERNEL); if (retval) { dev_err(&port->dev, "usb_submit_urb(read) failed pipe 0x%x err %d\n", port->read_urb->pipe, retval); goto error; } retval = usb_submit_urb(port->interrupt_in_urb, GFP_KERNEL); if (retval) { usb_kill_urb(priv->read_urb); dev_err(&port->dev, "usb_submit_urb(read int) failed pipe 0x%x err %d", port->interrupt_in_urb->pipe, retval); goto error; } return 0; error: return retval; } /* mct_u232_open */ static void mct_u232_dtr_rts(struct usb_serial_port *port, int on) { unsigned int control_state; struct mct_u232_private *priv = usb_get_serial_port_data(port); spin_lock_irq(&priv->lock); if (on) priv->control_state |= TIOCM_DTR | TIOCM_RTS; else priv->control_state &= ~(TIOCM_DTR | TIOCM_RTS); control_state = priv->control_state; spin_unlock_irq(&priv->lock); mct_u232_set_modem_ctrl(port, control_state); } static void mct_u232_close(struct usb_serial_port *port) { struct mct_u232_private *priv = usb_get_serial_port_data(port); usb_kill_urb(priv->read_urb); usb_kill_urb(port->interrupt_in_urb); usb_serial_generic_close(port); } /* mct_u232_close */ static void mct_u232_read_int_callback(struct urb *urb) { struct usb_serial_port *port = urb->context; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned char *data = urb->transfer_buffer; int retval; int status = urb->status; unsigned long flags; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(&port->dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(&port->dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } usb_serial_debug_data(&port->dev, __func__, urb->actual_length, data); /* * Work-a-round: handle the 'usual' bulk-in pipe here */ if (urb->transfer_buffer_length > 2) { if (urb->actual_length) { tty_insert_flip_string(&port->port, data, urb->actual_length); tty_flip_buffer_push(&port->port); } goto exit; } /* * The interrupt-in pipe signals exceptional conditions (modem line * signal changes and errors). data[0] holds MSR, data[1] holds LSR. */ spin_lock_irqsave(&priv->lock, flags); priv->last_msr = data[MCT_U232_MSR_INDEX]; /* Record Control Line states */ mct_u232_msr_to_state(port, &priv->control_state, priv->last_msr); mct_u232_msr_to_icount(&port->icount, priv->last_msr); #if 0 /* Not yet handled. See belkin_sa.c for further information */ /* Now to report any errors */ priv->last_lsr = data[MCT_U232_LSR_INDEX]; /* * fill in the flip buffer here, but I do not know the relation * to the current/next receive buffer or characters. I need * to look in to this before committing any code. */ if (priv->last_lsr & MCT_U232_LSR_ERR) { tty = tty_port_tty_get(&port->port); /* Overrun Error */ if (priv->last_lsr & MCT_U232_LSR_OE) { } /* Parity Error */ if (priv->last_lsr & MCT_U232_LSR_PE) { } /* Framing Error */ if (priv->last_lsr & MCT_U232_LSR_FE) { } /* Break Indicator */ if (priv->last_lsr & MCT_U232_LSR_BI) { } tty_kref_put(tty); } #endif wake_up_interruptible(&port->port.delta_msr_wait); spin_unlock_irqrestore(&priv->lock, flags); exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(&port->dev, "%s - usb_submit_urb failed with result %d\n", __func__, retval); } /* mct_u232_read_int_callback */ static void mct_u232_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct usb_serial *serial = port->serial; struct mct_u232_private *priv = usb_get_serial_port_data(port); struct ktermios *termios = &tty->termios; unsigned int cflag = termios->c_cflag; unsigned int old_cflag = old_termios->c_cflag; unsigned long flags; unsigned int control_state; unsigned char last_lcr; /* get a local copy of the current port settings */ spin_lock_irqsave(&priv->lock, flags); control_state = priv->control_state; spin_unlock_irqrestore(&priv->lock, flags); last_lcr = 0; /* * Update baud rate. * Do not attempt to cache old rates and skip settings, * disconnects screw such tricks up completely. * Premature optimization is the root of all evil. */ /* reassert DTR and RTS on transition from B0 */ if ((old_cflag & CBAUD) == B0) { dev_dbg(&port->dev, "%s: baud was B0\n", __func__); control_state |= TIOCM_DTR | TIOCM_RTS; mct_u232_set_modem_ctrl(port, control_state); } mct_u232_set_baud_rate(tty, serial, port, tty_get_baud_rate(tty)); if ((cflag & CBAUD) == B0) { dev_dbg(&port->dev, "%s: baud is B0\n", __func__); /* Drop RTS and DTR */ control_state &= ~(TIOCM_DTR | TIOCM_RTS); mct_u232_set_modem_ctrl(port, control_state); } /* * Update line control register (LCR) */ /* set the parity */ if (cflag & PARENB) last_lcr |= (cflag & PARODD) ? MCT_U232_PARITY_ODD : MCT_U232_PARITY_EVEN; else last_lcr |= MCT_U232_PARITY_NONE; /* set the number of data bits */ switch (cflag & CSIZE) { case CS5: last_lcr |= MCT_U232_DATA_BITS_5; break; case CS6: last_lcr |= MCT_U232_DATA_BITS_6; break; case CS7: last_lcr |= MCT_U232_DATA_BITS_7; break; case CS8: last_lcr |= MCT_U232_DATA_BITS_8; break; default: dev_err(&port->dev, "CSIZE was not CS5-CS8, using default of 8\n"); last_lcr |= MCT_U232_DATA_BITS_8; break; } termios->c_cflag &= ~CMSPAR; /* set the number of stop bits */ last_lcr |= (cflag & CSTOPB) ? MCT_U232_STOP_BITS_2 : MCT_U232_STOP_BITS_1; mct_u232_set_line_ctrl(port, last_lcr); /* save off the modified port settings */ spin_lock_irqsave(&priv->lock, flags); priv->control_state = control_state; priv->last_lcr = last_lcr; spin_unlock_irqrestore(&priv->lock, flags); } /* mct_u232_set_termios */ static int mct_u232_break_ctl(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned char lcr; unsigned long flags; spin_lock_irqsave(&priv->lock, flags); lcr = priv->last_lcr; if (break_state) lcr |= MCT_U232_SET_BREAK; spin_unlock_irqrestore(&priv->lock, flags); return mct_u232_set_line_ctrl(port, lcr); } /* mct_u232_break_ctl */ static int mct_u232_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned int control_state; unsigned long flags; spin_lock_irqsave(&priv->lock, flags); control_state = priv->control_state; spin_unlock_irqrestore(&priv->lock, flags); return control_state; } static int mct_u232_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned int control_state; unsigned long flags; spin_lock_irqsave(&priv->lock, flags); control_state = priv->control_state; if (set & TIOCM_RTS) control_state |= TIOCM_RTS; if (set & TIOCM_DTR) control_state |= TIOCM_DTR; if (clear & TIOCM_RTS) control_state &= ~TIOCM_RTS; if (clear & TIOCM_DTR) control_state &= ~TIOCM_DTR; priv->control_state = control_state; spin_unlock_irqrestore(&priv->lock, flags); return mct_u232_set_modem_ctrl(port, control_state); } static void mct_u232_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned int control_state; spin_lock_irq(&priv->lock); priv->rx_flags |= THROTTLED; if (C_CRTSCTS(tty)) { priv->control_state &= ~TIOCM_RTS; control_state = priv->control_state; spin_unlock_irq(&priv->lock); mct_u232_set_modem_ctrl(port, control_state); } else { spin_unlock_irq(&priv->lock); } } static void mct_u232_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct mct_u232_private *priv = usb_get_serial_port_data(port); unsigned int control_state; spin_lock_irq(&priv->lock); if ((priv->rx_flags & THROTTLED) && C_CRTSCTS(tty)) { priv->rx_flags &= ~THROTTLED; priv->control_state |= TIOCM_RTS; control_state = priv->control_state; spin_unlock_irq(&priv->lock); mct_u232_set_modem_ctrl(port, control_state); } else { spin_unlock_irq(&priv->lock); } } module_usb_serial_driver(serial_drivers, id_table); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL");
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Symmetric key ciphers. * * Copyright (c) 2007-2015 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_SKCIPHER_H #define _CRYPTO_SKCIPHER_H #include <linux/atomic.h> #include <linux/container_of.h> #include <linux/crypto.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/types.h> /* Set this bit if the lskcipher operation is a continuation. */ #define CRYPTO_LSKCIPHER_FLAG_CONT 0x00000001 /* Set this bit if the lskcipher operation is final. */ #define CRYPTO_LSKCIPHER_FLAG_FINAL 0x00000002 /* The bit CRYPTO_TFM_REQ_MAY_SLEEP can also be set if needed. */ /* Set this bit if the skcipher operation is a continuation. */ #define CRYPTO_SKCIPHER_REQ_CONT 0x00000001 /* Set this bit if the skcipher operation is not final. */ #define CRYPTO_SKCIPHER_REQ_NOTFINAL 0x00000002 struct scatterlist; /** * struct skcipher_request - Symmetric key cipher request * @cryptlen: Number of bytes to encrypt or decrypt * @iv: Initialisation Vector * @src: Source SG list * @dst: Destination SG list * @base: Underlying async request * @__ctx: Start of private context data */ struct skcipher_request { unsigned int cryptlen; u8 *iv; struct scatterlist *src; struct scatterlist *dst; struct crypto_async_request base; void *__ctx[] CRYPTO_MINALIGN_ATTR; }; struct crypto_skcipher { unsigned int reqsize; struct crypto_tfm base; }; struct crypto_sync_skcipher { struct crypto_skcipher base; }; struct crypto_lskcipher { struct crypto_tfm base; }; /* * struct skcipher_alg_common - common properties of skcipher_alg * @min_keysize: Minimum key size supported by the transformation. This is the * smallest key length supported by this transformation algorithm. * This must be set to one of the pre-defined values as this is * not hardware specific. Possible values for this field can be * found via git grep "_MIN_KEY_SIZE" include/crypto/ * @max_keysize: Maximum key size supported by the transformation. This is the * largest key length supported by this transformation algorithm. * This must be set to one of the pre-defined values as this is * not hardware specific. Possible values for this field can be * found via git grep "_MAX_KEY_SIZE" include/crypto/ * @ivsize: IV size applicable for transformation. The consumer must provide an * IV of exactly that size to perform the encrypt or decrypt operation. * @chunksize: Equal to the block size except for stream ciphers such as * CTR where it is set to the underlying block size. * @statesize: Size of the internal state for the algorithm. * @base: Definition of a generic crypto algorithm. */ #define SKCIPHER_ALG_COMMON { \ unsigned int min_keysize; \ unsigned int max_keysize; \ unsigned int ivsize; \ unsigned int chunksize; \ unsigned int statesize; \ \ struct crypto_alg base; \ } struct skcipher_alg_common SKCIPHER_ALG_COMMON; /** * struct skcipher_alg - symmetric key cipher definition * @setkey: Set key for the transformation. This function is used to either * program a supplied key into the hardware or store the key in the * transformation context for programming it later. Note that this * function does modify the transformation context. This function can * be called multiple times during the existence of the transformation * object, so one must make sure the key is properly reprogrammed into * the hardware. This function is also responsible for checking the key * length for validity. In case a software fallback was put in place in * the @cra_init call, this function might need to use the fallback if * the algorithm doesn't support all of the key sizes. * @encrypt: Encrypt a scatterlist of blocks. This function is used to encrypt * the supplied scatterlist containing the blocks of data. The crypto * API consumer is responsible for aligning the entries of the * scatterlist properly and making sure the chunks are correctly * sized. In case a software fallback was put in place in the * @cra_init call, this function might need to use the fallback if * the algorithm doesn't support all of the key sizes. In case the * key was stored in transformation context, the key might need to be * re-programmed into the hardware in this function. This function * shall not modify the transformation context, as this function may * be called in parallel with the same transformation object. * @decrypt: Decrypt a single block. This is a reverse counterpart to @encrypt * and the conditions are exactly the same. * @export: Export partial state of the transformation. This function dumps the * entire state of the ongoing transformation into a provided block of * data so it can be @import 'ed back later on. This is useful in case * you want to save partial result of the transformation after * processing certain amount of data and reload this partial result * multiple times later on for multiple re-use. No data processing * happens at this point. * @import: Import partial state of the transformation. This function loads the * entire state of the ongoing transformation from a provided block of * data so the transformation can continue from this point onward. No * data processing happens at this point. * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. In case the * cryptographic hardware has some special requirements which need to * be handled by software, this function shall check for the precise * requirement of the transformation and put any software fallbacks * in place. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * @walksize: Equal to the chunk size except in cases where the algorithm is * considerably more efficient if it can operate on multiple chunks * in parallel. Should be a multiple of chunksize. * @co: see struct skcipher_alg_common * * All fields except @ivsize are mandatory and must be filled. */ struct skcipher_alg { int (*setkey)(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); int (*encrypt)(struct skcipher_request *req); int (*decrypt)(struct skcipher_request *req); int (*export)(struct skcipher_request *req, void *out); int (*import)(struct skcipher_request *req, const void *in); int (*init)(struct crypto_skcipher *tfm); void (*exit)(struct crypto_skcipher *tfm); unsigned int walksize; union { struct SKCIPHER_ALG_COMMON; struct skcipher_alg_common co; }; }; /** * struct lskcipher_alg - linear symmetric key cipher definition * @setkey: Set key for the transformation. This function is used to either * program a supplied key into the hardware or store the key in the * transformation context for programming it later. Note that this * function does modify the transformation context. This function can * be called multiple times during the existence of the transformation * object, so one must make sure the key is properly reprogrammed into * the hardware. This function is also responsible for checking the key * length for validity. In case a software fallback was put in place in * the @cra_init call, this function might need to use the fallback if * the algorithm doesn't support all of the key sizes. * @encrypt: Encrypt a number of bytes. This function is used to encrypt * the supplied data. This function shall not modify * the transformation context, as this function may be called * in parallel with the same transformation object. Data * may be left over if length is not a multiple of blocks * and there is more to come (final == false). The number of * left-over bytes should be returned in case of success. * The siv field shall be as long as ivsize + statesize with * the IV placed at the front. The state will be used by the * algorithm internally. * @decrypt: Decrypt a number of bytes. This is a reverse counterpart to * @encrypt and the conditions are exactly the same. * @init: Initialize the cryptographic transformation object. This function * is used to initialize the cryptographic transformation object. * This function is called only once at the instantiation time, right * after the transformation context was allocated. * @exit: Deinitialize the cryptographic transformation object. This is a * counterpart to @init, used to remove various changes set in * @init. * @co: see struct skcipher_alg_common */ struct lskcipher_alg { int (*setkey)(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen); int (*encrypt)(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv, u32 flags); int (*decrypt)(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv, u32 flags); int (*init)(struct crypto_lskcipher *tfm); void (*exit)(struct crypto_lskcipher *tfm); struct skcipher_alg_common co; }; #define MAX_SYNC_SKCIPHER_REQSIZE 384 /* * This performs a type-check against the "tfm" argument to make sure * all users have the correct skcipher tfm for doing on-stack requests. */ #define SYNC_SKCIPHER_REQUEST_ON_STACK(name, tfm) \ char __##name##_desc[sizeof(struct skcipher_request) + \ MAX_SYNC_SKCIPHER_REQSIZE + \ (!(sizeof((struct crypto_sync_skcipher *)1 == \ (typeof(tfm))1))) \ ] CRYPTO_MINALIGN_ATTR; \ struct skcipher_request *name = (void *)__##name##_desc /** * DOC: Symmetric Key Cipher API * * Symmetric key cipher API is used with the ciphers of type * CRYPTO_ALG_TYPE_SKCIPHER (listed as type "skcipher" in /proc/crypto). * * Asynchronous cipher operations imply that the function invocation for a * cipher request returns immediately before the completion of the operation. * The cipher request is scheduled as a separate kernel thread and therefore * load-balanced on the different CPUs via the process scheduler. To allow * the kernel crypto API to inform the caller about the completion of a cipher * request, the caller must provide a callback function. That function is * invoked with the cipher handle when the request completes. * * To support the asynchronous operation, additional information than just the * cipher handle must be supplied to the kernel crypto API. That additional * information is given by filling in the skcipher_request data structure. * * For the symmetric key cipher API, the state is maintained with the tfm * cipher handle. A single tfm can be used across multiple calls and in * parallel. For asynchronous block cipher calls, context data supplied and * only used by the caller can be referenced the request data structure in * addition to the IV used for the cipher request. The maintenance of such * state information would be important for a crypto driver implementer to * have, because when calling the callback function upon completion of the * cipher operation, that callback function may need some information about * which operation just finished if it invoked multiple in parallel. This * state information is unused by the kernel crypto API. */ static inline struct crypto_skcipher *__crypto_skcipher_cast( struct crypto_tfm *tfm) { return container_of(tfm, struct crypto_skcipher, base); } /** * crypto_alloc_skcipher() - allocate symmetric key cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * skcipher cipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for an skcipher. The returned struct * crypto_skcipher is the cipher handle that is required for any subsequent * API invocation for that skcipher. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_skcipher *crypto_alloc_skcipher(const char *alg_name, u32 type, u32 mask); struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(const char *alg_name, u32 type, u32 mask); /** * crypto_alloc_lskcipher() - allocate linear symmetric key cipher handle * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * lskcipher * @type: specifies the type of the cipher * @mask: specifies the mask for the cipher * * Allocate a cipher handle for an lskcipher. The returned struct * crypto_lskcipher is the cipher handle that is required for any subsequent * API invocation for that lskcipher. * * Return: allocated cipher handle in case of success; IS_ERR() is true in case * of an error, PTR_ERR() returns the error code. */ struct crypto_lskcipher *crypto_alloc_lskcipher(const char *alg_name, u32 type, u32 mask); static inline struct crypto_tfm *crypto_skcipher_tfm( struct crypto_skcipher *tfm) { return &tfm->base; } static inline struct crypto_tfm *crypto_lskcipher_tfm( struct crypto_lskcipher *tfm) { return &tfm->base; } /** * crypto_free_skcipher() - zeroize and free cipher handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_skcipher(struct crypto_skcipher *tfm) { crypto_destroy_tfm(tfm, crypto_skcipher_tfm(tfm)); } static inline void crypto_free_sync_skcipher(struct crypto_sync_skcipher *tfm) { crypto_free_skcipher(&tfm->base); } /** * crypto_free_lskcipher() - zeroize and free cipher handle * @tfm: cipher handle to be freed * * If @tfm is a NULL or error pointer, this function does nothing. */ static inline void crypto_free_lskcipher(struct crypto_lskcipher *tfm) { crypto_destroy_tfm(tfm, crypto_lskcipher_tfm(tfm)); } /** * crypto_has_skcipher() - Search for the availability of an skcipher. * @alg_name: is the cra_name / name or cra_driver_name / driver name of the * skcipher * @type: specifies the type of the skcipher * @mask: specifies the mask for the skcipher * * Return: true when the skcipher is known to the kernel crypto API; false * otherwise */ int crypto_has_skcipher(const char *alg_name, u32 type, u32 mask); static inline const char *crypto_skcipher_driver_name( struct crypto_skcipher *tfm) { return crypto_tfm_alg_driver_name(crypto_skcipher_tfm(tfm)); } static inline const char *crypto_lskcipher_driver_name( struct crypto_lskcipher *tfm) { return crypto_tfm_alg_driver_name(crypto_lskcipher_tfm(tfm)); } static inline struct skcipher_alg_common *crypto_skcipher_alg_common( struct crypto_skcipher *tfm) { return container_of(crypto_skcipher_tfm(tfm)->__crt_alg, struct skcipher_alg_common, base); } static inline struct skcipher_alg *crypto_skcipher_alg( struct crypto_skcipher *tfm) { return container_of(crypto_skcipher_tfm(tfm)->__crt_alg, struct skcipher_alg, base); } static inline struct lskcipher_alg *crypto_lskcipher_alg( struct crypto_lskcipher *tfm) { return container_of(crypto_lskcipher_tfm(tfm)->__crt_alg, struct lskcipher_alg, co.base); } /** * crypto_skcipher_ivsize() - obtain IV size * @tfm: cipher handle * * The size of the IV for the skcipher referenced by the cipher handle is * returned. This IV size may be zero if the cipher does not need an IV. * * Return: IV size in bytes */ static inline unsigned int crypto_skcipher_ivsize(struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->ivsize; } static inline unsigned int crypto_sync_skcipher_ivsize( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_ivsize(&tfm->base); } /** * crypto_lskcipher_ivsize() - obtain IV size * @tfm: cipher handle * * The size of the IV for the lskcipher referenced by the cipher handle is * returned. This IV size may be zero if the cipher does not need an IV. * * Return: IV size in bytes */ static inline unsigned int crypto_lskcipher_ivsize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.ivsize; } /** * crypto_skcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle * * The block size for the skcipher referenced with the cipher handle is * returned. The caller may use that information to allocate appropriate * memory for the data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_skcipher_blocksize( struct crypto_skcipher *tfm) { return crypto_tfm_alg_blocksize(crypto_skcipher_tfm(tfm)); } /** * crypto_lskcipher_blocksize() - obtain block size of cipher * @tfm: cipher handle * * The block size for the lskcipher referenced with the cipher handle is * returned. The caller may use that information to allocate appropriate * memory for the data returned by the encryption or decryption operation * * Return: block size of cipher */ static inline unsigned int crypto_lskcipher_blocksize( struct crypto_lskcipher *tfm) { return crypto_tfm_alg_blocksize(crypto_lskcipher_tfm(tfm)); } /** * crypto_skcipher_chunksize() - obtain chunk size * @tfm: cipher handle * * The block size is set to one for ciphers such as CTR. However, * you still need to provide incremental updates in multiples of * the underlying block size as the IV does not have sub-block * granularity. This is known in this API as the chunk size. * * Return: chunk size in bytes */ static inline unsigned int crypto_skcipher_chunksize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->chunksize; } /** * crypto_lskcipher_chunksize() - obtain chunk size * @tfm: cipher handle * * The block size is set to one for ciphers such as CTR. However, * you still need to provide incremental updates in multiples of * the underlying block size as the IV does not have sub-block * granularity. This is known in this API as the chunk size. * * Return: chunk size in bytes */ static inline unsigned int crypto_lskcipher_chunksize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.chunksize; } /** * crypto_skcipher_statesize() - obtain state size * @tfm: cipher handle * * Some algorithms cannot be chained with the IV alone. They carry * internal state which must be replicated if data is to be processed * incrementally. The size of that state can be obtained with this * function. * * Return: state size in bytes */ static inline unsigned int crypto_skcipher_statesize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->statesize; } /** * crypto_lskcipher_statesize() - obtain state size * @tfm: cipher handle * * Some algorithms cannot be chained with the IV alone. They carry * internal state which must be replicated if data is to be processed * incrementally. The size of that state can be obtained with this * function. * * Return: state size in bytes */ static inline unsigned int crypto_lskcipher_statesize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.statesize; } static inline unsigned int crypto_sync_skcipher_blocksize( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_blocksize(&tfm->base); } static inline unsigned int crypto_skcipher_alignmask( struct crypto_skcipher *tfm) { return crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)); } static inline unsigned int crypto_lskcipher_alignmask( struct crypto_lskcipher *tfm) { return crypto_tfm_alg_alignmask(crypto_lskcipher_tfm(tfm)); } static inline u32 crypto_skcipher_get_flags(struct crypto_skcipher *tfm) { return crypto_tfm_get_flags(crypto_skcipher_tfm(tfm)); } static inline void crypto_skcipher_set_flags(struct crypto_skcipher *tfm, u32 flags) { crypto_tfm_set_flags(crypto_skcipher_tfm(tfm), flags); } static inline void crypto_skcipher_clear_flags(struct crypto_skcipher *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_skcipher_tfm(tfm), flags); } static inline u32 crypto_sync_skcipher_get_flags( struct crypto_sync_skcipher *tfm) { return crypto_skcipher_get_flags(&tfm->base); } static inline void crypto_sync_skcipher_set_flags( struct crypto_sync_skcipher *tfm, u32 flags) { crypto_skcipher_set_flags(&tfm->base, flags); } static inline void crypto_sync_skcipher_clear_flags( struct crypto_sync_skcipher *tfm, u32 flags) { crypto_skcipher_clear_flags(&tfm->base, flags); } static inline u32 crypto_lskcipher_get_flags(struct crypto_lskcipher *tfm) { return crypto_tfm_get_flags(crypto_lskcipher_tfm(tfm)); } static inline void crypto_lskcipher_set_flags(struct crypto_lskcipher *tfm, u32 flags) { crypto_tfm_set_flags(crypto_lskcipher_tfm(tfm), flags); } static inline void crypto_lskcipher_clear_flags(struct crypto_lskcipher *tfm, u32 flags) { crypto_tfm_clear_flags(crypto_lskcipher_tfm(tfm), flags); } /** * crypto_skcipher_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the skcipher referenced by the cipher * handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen); static inline int crypto_sync_skcipher_setkey(struct crypto_sync_skcipher *tfm, const u8 *key, unsigned int keylen) { return crypto_skcipher_setkey(&tfm->base, key, keylen); } /** * crypto_lskcipher_setkey() - set key for cipher * @tfm: cipher handle * @key: buffer holding the key * @keylen: length of the key in bytes * * The caller provided key is set for the lskcipher referenced by the cipher * handle. * * Note, the key length determines the cipher type. Many block ciphers implement * different cipher modes depending on the key size, such as AES-128 vs AES-192 * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 * is performed. * * Return: 0 if the setting of the key was successful; < 0 if an error occurred */ int crypto_lskcipher_setkey(struct crypto_lskcipher *tfm, const u8 *key, unsigned int keylen); static inline unsigned int crypto_skcipher_min_keysize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->min_keysize; } static inline unsigned int crypto_skcipher_max_keysize( struct crypto_skcipher *tfm) { return crypto_skcipher_alg_common(tfm)->max_keysize; } static inline unsigned int crypto_lskcipher_min_keysize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.min_keysize; } static inline unsigned int crypto_lskcipher_max_keysize( struct crypto_lskcipher *tfm) { return crypto_lskcipher_alg(tfm)->co.max_keysize; } /** * crypto_skcipher_reqtfm() - obtain cipher handle from request * @req: skcipher_request out of which the cipher handle is to be obtained * * Return the crypto_skcipher handle when furnishing an skcipher_request * data structure. * * Return: crypto_skcipher handle */ static inline struct crypto_skcipher *crypto_skcipher_reqtfm( struct skcipher_request *req) { return __crypto_skcipher_cast(req->base.tfm); } static inline struct crypto_sync_skcipher *crypto_sync_skcipher_reqtfm( struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); return container_of(tfm, struct crypto_sync_skcipher, base); } /** * crypto_skcipher_encrypt() - encrypt plaintext * @req: reference to the skcipher_request handle that holds all information * needed to perform the cipher operation * * Encrypt plaintext data using the skcipher_request handle. That data * structure and how it is filled with data is discussed with the * skcipher_request_* functions. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_encrypt(struct skcipher_request *req); /** * crypto_skcipher_decrypt() - decrypt ciphertext * @req: reference to the skcipher_request handle that holds all information * needed to perform the cipher operation * * Decrypt ciphertext data using the skcipher_request handle. That data * structure and how it is filled with data is discussed with the * skcipher_request_* functions. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_decrypt(struct skcipher_request *req); /** * crypto_skcipher_export() - export partial state * @req: reference to the skcipher_request handle that holds all information * needed to perform the operation * @out: output buffer of sufficient size that can hold the state * * Export partial state of the transformation. This function dumps the * entire state of the ongoing transformation into a provided block of * data so it can be @import 'ed back later on. This is useful in case * you want to save partial result of the transformation after * processing certain amount of data and reload this partial result * multiple times later on for multiple re-use. No data processing * happens at this point. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_export(struct skcipher_request *req, void *out); /** * crypto_skcipher_import() - import partial state * @req: reference to the skcipher_request handle that holds all information * needed to perform the operation * @in: buffer holding the state * * Import partial state of the transformation. This function loads the * entire state of the ongoing transformation from a provided block of * data so the transformation can continue from this point onward. No * data processing happens at this point. * * Return: 0 if the cipher operation was successful; < 0 if an error occurred */ int crypto_skcipher_import(struct skcipher_request *req, const void *in); /** * crypto_lskcipher_encrypt() - encrypt plaintext * @tfm: lskcipher handle * @src: source buffer * @dst: destination buffer * @len: number of bytes to process * @siv: IV + state for the cipher operation. The length of the IV must * comply with the IV size defined by crypto_lskcipher_ivsize. The * IV is then followed with a buffer with the length as specified by * crypto_lskcipher_statesize. * Encrypt plaintext data using the lskcipher handle. * * Return: >=0 if the cipher operation was successful, if positive * then this many bytes have been left unprocessed; * < 0 if an error occurred */ int crypto_lskcipher_encrypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv); /** * crypto_lskcipher_decrypt() - decrypt ciphertext * @tfm: lskcipher handle * @src: source buffer * @dst: destination buffer * @len: number of bytes to process * @siv: IV + state for the cipher operation. The length of the IV must * comply with the IV size defined by crypto_lskcipher_ivsize. The * IV is then followed with a buffer with the length as specified by * crypto_lskcipher_statesize. * * Decrypt ciphertext data using the lskcipher handle. * * Return: >=0 if the cipher operation was successful, if positive * then this many bytes have been left unprocessed; * < 0 if an error occurred */ int crypto_lskcipher_decrypt(struct crypto_lskcipher *tfm, const u8 *src, u8 *dst, unsigned len, u8 *siv); /** * DOC: Symmetric Key Cipher Request Handle * * The skcipher_request data structure contains all pointers to data * required for the symmetric key cipher operation. This includes the cipher * handle (which can be used by multiple skcipher_request instances), pointer * to plaintext and ciphertext, asynchronous callback function, etc. It acts * as a handle to the skcipher_request_* API calls in a similar way as * skcipher handle to the crypto_skcipher_* API calls. */ /** * crypto_skcipher_reqsize() - obtain size of the request data structure * @tfm: cipher handle * * Return: number of bytes */ static inline unsigned int crypto_skcipher_reqsize(struct crypto_skcipher *tfm) { return tfm->reqsize; } /** * skcipher_request_set_tfm() - update cipher handle reference in request * @req: request handle to be modified * @tfm: cipher handle that shall be added to the request handle * * Allow the caller to replace the existing skcipher handle in the request * data structure with a different one. */ static inline void skcipher_request_set_tfm(struct skcipher_request *req, struct crypto_skcipher *tfm) { req->base.tfm = crypto_skcipher_tfm(tfm); } static inline void skcipher_request_set_sync_tfm(struct skcipher_request *req, struct crypto_sync_skcipher *tfm) { skcipher_request_set_tfm(req, &tfm->base); } static inline struct skcipher_request *skcipher_request_cast( struct crypto_async_request *req) { return container_of(req, struct skcipher_request, base); } /** * skcipher_request_alloc() - allocate request data structure * @tfm: cipher handle to be registered with the request * @gfp: memory allocation flag that is handed to kmalloc by the API call. * * Allocate the request data structure that must be used with the skcipher * encrypt and decrypt API calls. During the allocation, the provided skcipher * handle is registered in the request data structure. * * Return: allocated request handle in case of success, or NULL if out of memory */ static inline struct skcipher_request *skcipher_request_alloc_noprof( struct crypto_skcipher *tfm, gfp_t gfp) { struct skcipher_request *req; req = kmalloc_noprof(sizeof(struct skcipher_request) + crypto_skcipher_reqsize(tfm), gfp); if (likely(req)) skcipher_request_set_tfm(req, tfm); return req; } #define skcipher_request_alloc(...) alloc_hooks(skcipher_request_alloc_noprof(__VA_ARGS__)) /** * skcipher_request_free() - zeroize and free request data structure * @req: request data structure cipher handle to be freed */ static inline void skcipher_request_free(struct skcipher_request *req) { kfree_sensitive(req); } static inline void skcipher_request_zero(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); memzero_explicit(req, sizeof(*req) + crypto_skcipher_reqsize(tfm)); } /** * skcipher_request_set_callback() - set asynchronous callback function * @req: request handle * @flags: specify zero or an ORing of the flags * CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and * increase the wait queue beyond the initial maximum size; * CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep * @compl: callback function pointer to be registered with the request handle * @data: The data pointer refers to memory that is not used by the kernel * crypto API, but provided to the callback function for it to use. Here, * the caller can provide a reference to memory the callback function can * operate on. As the callback function is invoked asynchronously to the * related functionality, it may need to access data structures of the * related functionality which can be referenced using this pointer. The * callback function can access the memory via the "data" field in the * crypto_async_request data structure provided to the callback function. * * This function allows setting the callback function that is triggered once the * cipher operation completes. * * The callback function is registered with the skcipher_request handle and * must comply with the following template:: * * void callback_function(struct crypto_async_request *req, int error) */ static inline void skcipher_request_set_callback(struct skcipher_request *req, u32 flags, crypto_completion_t compl, void *data) { req->base.complete = compl; req->base.data = data; req->base.flags = flags; } /** * skcipher_request_set_crypt() - set data buffers * @req: request handle * @src: source scatter / gather list * @dst: destination scatter / gather list * @cryptlen: number of bytes to process from @src * @iv: IV for the cipher operation which must comply with the IV size defined * by crypto_skcipher_ivsize * * This function allows setting of the source data and destination data * scatter / gather lists. * * For encryption, the source is treated as the plaintext and the * destination is the ciphertext. For a decryption operation, the use is * reversed - the source is the ciphertext and the destination is the plaintext. */ static inline void skcipher_request_set_crypt( struct skcipher_request *req, struct scatterlist *src, struct scatterlist *dst, unsigned int cryptlen, void *iv) { req->src = src; req->dst = dst; req->cryptlen = cryptlen; req->iv = iv; } #endif /* _CRYPTO_SKCIPHER_H */
25 11 14 14 14 14 14 14 3 3 1 1 1 20 20 15 5 137 137 137 136 137 2 1 1 15 15 14 9 4 11 2 13 1 1 2 9 3 8 16 16 16 136 136 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 // SPDX-License-Identifier: GPL-2.0-only /* * vxcan.c - Virtual CAN Tunnel for cross namespace communication * * This code is derived from drivers/net/can/vcan.c for the virtual CAN * specific parts and from drivers/net/veth.c to implement the netlink API * for network interface pairs in a common and established way. * * Copyright (c) 2017 Oliver Hartkopp <socketcan@hartkopp.net> */ #include <linux/ethtool.h> #include <linux/module.h> #include <linux/init.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/skb.h> #include <linux/can/vxcan.h> #include <linux/can/can-ml.h> #include <linux/slab.h> #include <net/rtnetlink.h> #define DRV_NAME "vxcan" MODULE_DESCRIPTION("Virtual CAN Tunnel"); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Oliver Hartkopp <socketcan@hartkopp.net>"); MODULE_ALIAS_RTNL_LINK(DRV_NAME); struct vxcan_priv { struct net_device __rcu *peer; }; static netdev_tx_t vxcan_xmit(struct sk_buff *oskb, struct net_device *dev) { struct vxcan_priv *priv = netdev_priv(dev); struct net_device *peer; struct net_device_stats *peerstats, *srcstats = &dev->stats; struct sk_buff *skb; unsigned int len; if (can_dropped_invalid_skb(dev, oskb)) return NETDEV_TX_OK; rcu_read_lock(); peer = rcu_dereference(priv->peer); if (unlikely(!peer)) { kfree_skb(oskb); dev->stats.tx_dropped++; goto out_unlock; } skb_tx_timestamp(oskb); skb = skb_clone(oskb, GFP_ATOMIC); if (skb) { consume_skb(oskb); } else { kfree_skb(oskb); goto out_unlock; } /* reset CAN GW hop counter */ skb->csum_start = 0; skb->pkt_type = PACKET_BROADCAST; skb->dev = peer; skb->ip_summed = CHECKSUM_UNNECESSARY; len = can_skb_get_data_len(skb); if (netif_rx(skb) == NET_RX_SUCCESS) { srcstats->tx_packets++; srcstats->tx_bytes += len; peerstats = &peer->stats; peerstats->rx_packets++; peerstats->rx_bytes += len; } out_unlock: rcu_read_unlock(); return NETDEV_TX_OK; } static int vxcan_open(struct net_device *dev) { struct vxcan_priv *priv = netdev_priv(dev); struct net_device *peer = rtnl_dereference(priv->peer); if (!peer) return -ENOTCONN; if (peer->flags & IFF_UP) { netif_carrier_on(dev); netif_carrier_on(peer); } return 0; } static int vxcan_close(struct net_device *dev) { struct vxcan_priv *priv = netdev_priv(dev); struct net_device *peer = rtnl_dereference(priv->peer); netif_carrier_off(dev); if (peer) netif_carrier_off(peer); return 0; } static int vxcan_get_iflink(const struct net_device *dev) { struct vxcan_priv *priv = netdev_priv(dev); struct net_device *peer; int iflink; rcu_read_lock(); peer = rcu_dereference(priv->peer); iflink = peer ? READ_ONCE(peer->ifindex) : 0; rcu_read_unlock(); return iflink; } static int vxcan_change_mtu(struct net_device *dev, int new_mtu) { /* Do not allow changing the MTU while running */ if (dev->flags & IFF_UP) return -EBUSY; if (new_mtu != CAN_MTU && new_mtu != CANFD_MTU && !can_is_canxl_dev_mtu(new_mtu)) return -EINVAL; WRITE_ONCE(dev->mtu, new_mtu); return 0; } static const struct net_device_ops vxcan_netdev_ops = { .ndo_open = vxcan_open, .ndo_stop = vxcan_close, .ndo_start_xmit = vxcan_xmit, .ndo_get_iflink = vxcan_get_iflink, .ndo_change_mtu = vxcan_change_mtu, }; static const struct ethtool_ops vxcan_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, }; static void vxcan_setup(struct net_device *dev) { struct can_ml_priv *can_ml; dev->type = ARPHRD_CAN; dev->mtu = CANFD_MTU; dev->hard_header_len = 0; dev->addr_len = 0; dev->tx_queue_len = 0; dev->flags = IFF_NOARP; dev->netdev_ops = &vxcan_netdev_ops; dev->ethtool_ops = &vxcan_ethtool_ops; dev->needs_free_netdev = true; can_ml = netdev_priv(dev) + ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN); can_set_ml_priv(dev, can_ml); } /* forward declaration for rtnl_create_link() */ static struct rtnl_link_ops vxcan_link_ops; static int vxcan_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct vxcan_priv *priv; struct net_device *peer; struct net *peer_net; struct nlattr *peer_tb[IFLA_MAX + 1], **tbp = tb; char ifname[IFNAMSIZ]; unsigned char name_assign_type; struct ifinfomsg *ifmp = NULL; int err; /* register peer device */ if (data && data[VXCAN_INFO_PEER]) { struct nlattr *nla_peer; nla_peer = data[VXCAN_INFO_PEER]; ifmp = nla_data(nla_peer); err = rtnl_nla_parse_ifinfomsg(peer_tb, nla_peer, extack); if (err < 0) return err; tbp = peer_tb; } if (ifmp && tbp[IFLA_IFNAME]) { nla_strscpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ); name_assign_type = NET_NAME_USER; } else { snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d"); name_assign_type = NET_NAME_ENUM; } peer_net = rtnl_link_get_net(net, tbp); if (IS_ERR(peer_net)) return PTR_ERR(peer_net); peer = rtnl_create_link(peer_net, ifname, name_assign_type, &vxcan_link_ops, tbp, extack); if (IS_ERR(peer)) { put_net(peer_net); return PTR_ERR(peer); } if (ifmp && dev->ifindex) peer->ifindex = ifmp->ifi_index; err = register_netdevice(peer); put_net(peer_net); peer_net = NULL; if (err < 0) { free_netdev(peer); return err; } netif_carrier_off(peer); err = rtnl_configure_link(peer, ifmp, 0, NULL); if (err < 0) goto unregister_network_device; /* register first device */ if (tb[IFLA_IFNAME]) nla_strscpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ); else snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d"); err = register_netdevice(dev); if (err < 0) goto unregister_network_device; netif_carrier_off(dev); /* cross link the device pair */ priv = netdev_priv(dev); rcu_assign_pointer(priv->peer, peer); priv = netdev_priv(peer); rcu_assign_pointer(priv->peer, dev); return 0; unregister_network_device: unregister_netdevice(peer); return err; } static void vxcan_dellink(struct net_device *dev, struct list_head *head) { struct vxcan_priv *priv; struct net_device *peer; priv = netdev_priv(dev); peer = rtnl_dereference(priv->peer); /* Note : dellink() is called from default_device_exit_batch(), * before a rcu_synchronize() point. The devices are guaranteed * not being freed before one RCU grace period. */ RCU_INIT_POINTER(priv->peer, NULL); unregister_netdevice_queue(dev, head); if (peer) { priv = netdev_priv(peer); RCU_INIT_POINTER(priv->peer, NULL); unregister_netdevice_queue(peer, head); } } static const struct nla_policy vxcan_policy[VXCAN_INFO_MAX + 1] = { [VXCAN_INFO_PEER] = { .len = sizeof(struct ifinfomsg) }, }; static struct net *vxcan_get_link_net(const struct net_device *dev) { struct vxcan_priv *priv = netdev_priv(dev); struct net_device *peer = rtnl_dereference(priv->peer); return peer ? dev_net(peer) : dev_net(dev); } static struct rtnl_link_ops vxcan_link_ops = { .kind = DRV_NAME, .priv_size = ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN) + sizeof(struct can_ml_priv), .setup = vxcan_setup, .newlink = vxcan_newlink, .dellink = vxcan_dellink, .policy = vxcan_policy, .maxtype = VXCAN_INFO_MAX, .get_link_net = vxcan_get_link_net, }; static __init int vxcan_init(void) { pr_info("vxcan: Virtual CAN Tunnel driver\n"); return rtnl_link_register(&vxcan_link_ops); } static __exit void vxcan_exit(void) { rtnl_link_unregister(&vxcan_link_ops); } module_init(vxcan_init); module_exit(vxcan_exit);
7 1 7 6 7 16 16 15 13 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 // SPDX-License-Identifier: GPL-2.0-only #include "netlink.h" #include "common.h" #include "bitset.h" struct debug_req_info { struct ethnl_req_info base; }; struct debug_reply_data { struct ethnl_reply_data base; u32 msg_mask; }; #define DEBUG_REPDATA(__reply_base) \ container_of(__reply_base, struct debug_reply_data, base) const struct nla_policy ethnl_debug_get_policy[] = { [ETHTOOL_A_DEBUG_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static int debug_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct debug_reply_data *data = DEBUG_REPDATA(reply_base); struct net_device *dev = reply_base->dev; int ret; if (!dev->ethtool_ops->get_msglevel) return -EOPNOTSUPP; ret = ethnl_ops_begin(dev); if (ret < 0) return ret; data->msg_mask = dev->ethtool_ops->get_msglevel(dev); ethnl_ops_complete(dev); return 0; } static int debug_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct debug_reply_data *data = DEBUG_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; return ethnl_bitset32_size(&data->msg_mask, NULL, NETIF_MSG_CLASS_COUNT, netif_msg_class_names, compact); } static int debug_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct debug_reply_data *data = DEBUG_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; return ethnl_put_bitset32(skb, ETHTOOL_A_DEBUG_MSGMASK, &data->msg_mask, NULL, NETIF_MSG_CLASS_COUNT, netif_msg_class_names, compact); } /* DEBUG_SET */ const struct nla_policy ethnl_debug_set_policy[] = { [ETHTOOL_A_DEBUG_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_DEBUG_MSGMASK] = { .type = NLA_NESTED }, }; static int ethnl_set_debug_validate(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_ops *ops = req_info->dev->ethtool_ops; return ops->get_msglevel && ops->set_msglevel ? 1 : -EOPNOTSUPP; } static int ethnl_set_debug(struct ethnl_req_info *req_info, struct genl_info *info) { struct net_device *dev = req_info->dev; struct nlattr **tb = info->attrs; bool mod = false; u32 msg_mask; int ret; msg_mask = dev->ethtool_ops->get_msglevel(dev); ret = ethnl_update_bitset32(&msg_mask, NETIF_MSG_CLASS_COUNT, tb[ETHTOOL_A_DEBUG_MSGMASK], netif_msg_class_names, info->extack, &mod); if (ret < 0 || !mod) return ret; dev->ethtool_ops->set_msglevel(dev, msg_mask); return 1; } const struct ethnl_request_ops ethnl_debug_request_ops = { .request_cmd = ETHTOOL_MSG_DEBUG_GET, .reply_cmd = ETHTOOL_MSG_DEBUG_GET_REPLY, .hdr_attr = ETHTOOL_A_DEBUG_HEADER, .req_info_size = sizeof(struct debug_req_info), .reply_data_size = sizeof(struct debug_reply_data), .prepare_data = debug_prepare_data, .reply_size = debug_reply_size, .fill_reply = debug_fill_reply, .set_validate = ethnl_set_debug_validate, .set = ethnl_set_debug, .set_ntf_cmd = ETHTOOL_MSG_DEBUG_NTF, };
849 21 92 12 7 847 50 473 825 203 161 5 5 95 1 53 41 24 719 27 220 935 199 7 57 9 3 3 7 229 38 1 61 4 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 /* SPDX-License-Identifier: GPL-2.0 */ #if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KVM_H #include <linux/tracepoint.h> #include <asm/vmx.h> #include <asm/svm.h> #include <asm/clocksource.h> #include <asm/pvclock-abi.h> #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm /* * Tracepoint for guest mode entry. */ TRACE_EVENT(kvm_entry, TP_PROTO(struct kvm_vcpu *vcpu, bool force_immediate_exit), TP_ARGS(vcpu, force_immediate_exit), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned long, rip ) __field( bool, immediate_exit ) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->rip = kvm_rip_read(vcpu); __entry->immediate_exit = force_immediate_exit; ), TP_printk("vcpu %u, rip 0x%lx%s", __entry->vcpu_id, __entry->rip, __entry->immediate_exit ? "[immediate exit]" : "") ); /* * Tracepoint for hypercall. */ TRACE_EVENT(kvm_hypercall, TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3), TP_ARGS(nr, a0, a1, a2, a3), TP_STRUCT__entry( __field( unsigned long, nr ) __field( unsigned long, a0 ) __field( unsigned long, a1 ) __field( unsigned long, a2 ) __field( unsigned long, a3 ) ), TP_fast_assign( __entry->nr = nr; __entry->a0 = a0; __entry->a1 = a1; __entry->a2 = a2; __entry->a3 = a3; ), TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx", __entry->nr, __entry->a0, __entry->a1, __entry->a2, __entry->a3) ); /* * Tracepoint for hypercall. */ TRACE_EVENT(kvm_hv_hypercall, TP_PROTO(__u16 code, bool fast, __u16 var_cnt, __u16 rep_cnt, __u16 rep_idx, __u64 ingpa, __u64 outgpa), TP_ARGS(code, fast, var_cnt, rep_cnt, rep_idx, ingpa, outgpa), TP_STRUCT__entry( __field( __u16, rep_cnt ) __field( __u16, rep_idx ) __field( __u64, ingpa ) __field( __u64, outgpa ) __field( __u16, code ) __field( __u16, var_cnt ) __field( bool, fast ) ), TP_fast_assign( __entry->rep_cnt = rep_cnt; __entry->rep_idx = rep_idx; __entry->ingpa = ingpa; __entry->outgpa = outgpa; __entry->code = code; __entry->var_cnt = var_cnt; __entry->fast = fast; ), TP_printk("code 0x%x %s var_cnt 0x%x rep_cnt 0x%x idx 0x%x in 0x%llx out 0x%llx", __entry->code, __entry->fast ? "fast" : "slow", __entry->var_cnt, __entry->rep_cnt, __entry->rep_idx, __entry->ingpa, __entry->outgpa) ); TRACE_EVENT(kvm_hv_hypercall_done, TP_PROTO(u64 result), TP_ARGS(result), TP_STRUCT__entry( __field(__u64, result) ), TP_fast_assign( __entry->result = result; ), TP_printk("result 0x%llx", __entry->result) ); /* * Tracepoint for Xen hypercall. */ TRACE_EVENT(kvm_xen_hypercall, TP_PROTO(u8 cpl, unsigned long nr, unsigned long a0, unsigned long a1, unsigned long a2, unsigned long a3, unsigned long a4, unsigned long a5), TP_ARGS(cpl, nr, a0, a1, a2, a3, a4, a5), TP_STRUCT__entry( __field(u8, cpl) __field(unsigned long, nr) __field(unsigned long, a0) __field(unsigned long, a1) __field(unsigned long, a2) __field(unsigned long, a3) __field(unsigned long, a4) __field(unsigned long, a5) ), TP_fast_assign( __entry->cpl = cpl; __entry->nr = nr; __entry->a0 = a0; __entry->a1 = a1; __entry->a2 = a2; __entry->a3 = a3; __entry->a4 = a4; __entry->a4 = a5; ), TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx", __entry->cpl, __entry->nr, __entry->a0, __entry->a1, __entry->a2, __entry->a3, __entry->a4, __entry->a5) ); /* * Tracepoint for PIO. */ #define KVM_PIO_IN 0 #define KVM_PIO_OUT 1 TRACE_EVENT(kvm_pio, TP_PROTO(unsigned int rw, unsigned int port, unsigned int size, unsigned int count, const void *data), TP_ARGS(rw, port, size, count, data), TP_STRUCT__entry( __field( unsigned int, rw ) __field( unsigned int, port ) __field( unsigned int, size ) __field( unsigned int, count ) __field( unsigned int, val ) ), TP_fast_assign( __entry->rw = rw; __entry->port = port; __entry->size = size; __entry->count = count; if (size == 1) __entry->val = *(unsigned char *)data; else if (size == 2) __entry->val = *(unsigned short *)data; else __entry->val = *(unsigned int *)data; ), TP_printk("pio_%s at 0x%x size %d count %d val 0x%x %s", __entry->rw ? "write" : "read", __entry->port, __entry->size, __entry->count, __entry->val, __entry->count > 1 ? "(...)" : "") ); /* * Tracepoint for fast mmio. */ TRACE_EVENT(kvm_fast_mmio, TP_PROTO(u64 gpa), TP_ARGS(gpa), TP_STRUCT__entry( __field(u64, gpa) ), TP_fast_assign( __entry->gpa = gpa; ), TP_printk("fast mmio at gpa 0x%llx", __entry->gpa) ); /* * Tracepoint for cpuid. */ TRACE_EVENT(kvm_cpuid, TP_PROTO(unsigned int function, unsigned int index, unsigned long rax, unsigned long rbx, unsigned long rcx, unsigned long rdx, bool found, bool used_max_basic), TP_ARGS(function, index, rax, rbx, rcx, rdx, found, used_max_basic), TP_STRUCT__entry( __field( unsigned int, function ) __field( unsigned int, index ) __field( unsigned long, rax ) __field( unsigned long, rbx ) __field( unsigned long, rcx ) __field( unsigned long, rdx ) __field( bool, found ) __field( bool, used_max_basic ) ), TP_fast_assign( __entry->function = function; __entry->index = index; __entry->rax = rax; __entry->rbx = rbx; __entry->rcx = rcx; __entry->rdx = rdx; __entry->found = found; __entry->used_max_basic = used_max_basic; ), TP_printk("func %x idx %x rax %lx rbx %lx rcx %lx rdx %lx, cpuid entry %s%s", __entry->function, __entry->index, __entry->rax, __entry->rbx, __entry->rcx, __entry->rdx, __entry->found ? "found" : "not found", __entry->used_max_basic ? ", used max basic" : "") ); #define AREG(x) { APIC_##x, "APIC_" #x } #define kvm_trace_symbol_apic \ AREG(ID), AREG(LVR), AREG(TASKPRI), AREG(ARBPRI), AREG(PROCPRI), \ AREG(EOI), AREG(RRR), AREG(LDR), AREG(DFR), AREG(SPIV), AREG(ISR), \ AREG(TMR), AREG(IRR), AREG(ESR), AREG(ICR), AREG(ICR2), AREG(LVTT), \ AREG(LVTTHMR), AREG(LVTPC), AREG(LVT0), AREG(LVT1), AREG(LVTERR), \ AREG(TMICT), AREG(TMCCT), AREG(TDCR), AREG(SELF_IPI), AREG(EFEAT), \ AREG(ECTRL) /* * Tracepoint for apic access. */ TRACE_EVENT(kvm_apic, TP_PROTO(unsigned int rw, unsigned int reg, u64 val), TP_ARGS(rw, reg, val), TP_STRUCT__entry( __field( unsigned int, rw ) __field( unsigned int, reg ) __field( u64, val ) ), TP_fast_assign( __entry->rw = rw; __entry->reg = reg; __entry->val = val; ), TP_printk("apic_%s %s = 0x%llx", __entry->rw ? "write" : "read", __print_symbolic(__entry->reg, kvm_trace_symbol_apic), __entry->val) ); #define trace_kvm_apic_read(reg, val) trace_kvm_apic(0, reg, val) #define trace_kvm_apic_write(reg, val) trace_kvm_apic(1, reg, val) #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 #define kvm_print_exit_reason(exit_reason, isa) \ (isa == KVM_ISA_VMX) ? \ __print_symbolic(exit_reason & 0xffff, VMX_EXIT_REASONS) : \ __print_symbolic(exit_reason, SVM_EXIT_REASONS), \ (isa == KVM_ISA_VMX && exit_reason & ~0xffff) ? " " : "", \ (isa == KVM_ISA_VMX) ? \ __print_flags(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : "" #define TRACE_EVENT_KVM_EXIT(name) \ TRACE_EVENT(name, \ TP_PROTO(struct kvm_vcpu *vcpu, u32 isa), \ TP_ARGS(vcpu, isa), \ \ TP_STRUCT__entry( \ __field( unsigned int, exit_reason ) \ __field( unsigned long, guest_rip ) \ __field( u32, isa ) \ __field( u64, info1 ) \ __field( u64, info2 ) \ __field( u32, intr_info ) \ __field( u32, error_code ) \ __field( unsigned int, vcpu_id ) \ ), \ \ TP_fast_assign( \ __entry->guest_rip = kvm_rip_read(vcpu); \ __entry->isa = isa; \ __entry->vcpu_id = vcpu->vcpu_id; \ static_call(kvm_x86_get_exit_info)(vcpu, \ &__entry->exit_reason, \ &__entry->info1, \ &__entry->info2, \ &__entry->intr_info, \ &__entry->error_code); \ ), \ \ TP_printk("vcpu %u reason %s%s%s rip 0x%lx info1 0x%016llx " \ "info2 0x%016llx intr_info 0x%08x error_code 0x%08x", \ __entry->vcpu_id, \ kvm_print_exit_reason(__entry->exit_reason, __entry->isa), \ __entry->guest_rip, __entry->info1, __entry->info2, \ __entry->intr_info, __entry->error_code) \ ) /* * Tracepoint for kvm guest exit: */ TRACE_EVENT_KVM_EXIT(kvm_exit); /* * Tracepoint for kvm interrupt injection: */ TRACE_EVENT(kvm_inj_virq, TP_PROTO(unsigned int vector, bool soft, bool reinjected), TP_ARGS(vector, soft, reinjected), TP_STRUCT__entry( __field( unsigned int, vector ) __field( bool, soft ) __field( bool, reinjected ) ), TP_fast_assign( __entry->vector = vector; __entry->soft = soft; __entry->reinjected = reinjected; ), TP_printk("%s 0x%x%s", __entry->soft ? "Soft/INTn" : "IRQ", __entry->vector, __entry->reinjected ? " [reinjected]" : "") ); #define EXS(x) { x##_VECTOR, "#" #x } #define kvm_trace_sym_exc \ EXS(DE), EXS(DB), EXS(BP), EXS(OF), EXS(BR), EXS(UD), EXS(NM), \ EXS(DF), EXS(TS), EXS(NP), EXS(SS), EXS(GP), EXS(PF), \ EXS(MF), EXS(AC), EXS(MC) /* * Tracepoint for kvm interrupt injection: */ TRACE_EVENT(kvm_inj_exception, TP_PROTO(unsigned exception, bool has_error, unsigned error_code, bool reinjected), TP_ARGS(exception, has_error, error_code, reinjected), TP_STRUCT__entry( __field( u8, exception ) __field( u8, has_error ) __field( u32, error_code ) __field( bool, reinjected ) ), TP_fast_assign( __entry->exception = exception; __entry->has_error = has_error; __entry->error_code = error_code; __entry->reinjected = reinjected; ), TP_printk("%s%s%s%s%s", __print_symbolic(__entry->exception, kvm_trace_sym_exc), !__entry->has_error ? "" : " (", !__entry->has_error ? "" : __print_symbolic(__entry->error_code, { }), !__entry->has_error ? "" : ")", __entry->reinjected ? " [reinjected]" : "") ); /* * Tracepoint for page fault. */ TRACE_EVENT(kvm_page_fault, TP_PROTO(struct kvm_vcpu *vcpu, u64 fault_address, u64 error_code), TP_ARGS(vcpu, fault_address, error_code), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned long, guest_rip ) __field( u64, fault_address ) __field( u64, error_code ) ), TP_fast_assign( __entry->vcpu_id = vcpu->vcpu_id; __entry->guest_rip = kvm_rip_read(vcpu); __entry->fault_address = fault_address; __entry->error_code = error_code; ), TP_printk("vcpu %u rip 0x%lx address 0x%016llx error_code 0x%llx", __entry->vcpu_id, __entry->guest_rip, __entry->fault_address, __entry->error_code) ); /* * Tracepoint for guest MSR access. */ TRACE_EVENT(kvm_msr, TP_PROTO(unsigned write, u32 ecx, u64 data, bool exception), TP_ARGS(write, ecx, data, exception), TP_STRUCT__entry( __field( unsigned, write ) __field( u32, ecx ) __field( u64, data ) __field( u8, exception ) ), TP_fast_assign( __entry->write = write; __entry->ecx = ecx; __entry->data = data; __entry->exception = exception; ), TP_printk("msr_%s %x = 0x%llx%s", __entry->write ? "write" : "read", __entry->ecx, __entry->data, __entry->exception ? " (#GP)" : "") ); #define trace_kvm_msr_read(ecx, data) trace_kvm_msr(0, ecx, data, false) #define trace_kvm_msr_write(ecx, data) trace_kvm_msr(1, ecx, data, false) #define trace_kvm_msr_read_ex(ecx) trace_kvm_msr(0, ecx, 0, true) #define trace_kvm_msr_write_ex(ecx, data) trace_kvm_msr(1, ecx, data, true) /* * Tracepoint for guest CR access. */ TRACE_EVENT(kvm_cr, TP_PROTO(unsigned int rw, unsigned int cr, unsigned long val), TP_ARGS(rw, cr, val), TP_STRUCT__entry( __field( unsigned int, rw ) __field( unsigned int, cr ) __field( unsigned long, val ) ), TP_fast_assign( __entry->rw = rw; __entry->cr = cr; __entry->val = val; ), TP_printk("cr_%s %x = 0x%lx", __entry->rw ? "write" : "read", __entry->cr, __entry->val) ); #define trace_kvm_cr_read(cr, val) trace_kvm_cr(0, cr, val) #define trace_kvm_cr_write(cr, val) trace_kvm_cr(1, cr, val) TRACE_EVENT(kvm_pic_set_irq, TP_PROTO(__u8 chip, __u8 pin, __u8 elcr, __u8 imr, bool coalesced), TP_ARGS(chip, pin, elcr, imr, coalesced), TP_STRUCT__entry( __field( __u8, chip ) __field( __u8, pin ) __field( __u8, elcr ) __field( __u8, imr ) __field( bool, coalesced ) ), TP_fast_assign( __entry->chip = chip; __entry->pin = pin; __entry->elcr = elcr; __entry->imr = imr; __entry->coalesced = coalesced; ), TP_printk("chip %u pin %u (%s%s)%s", __entry->chip, __entry->pin, (__entry->elcr & (1 << __entry->pin)) ? "level":"edge", (__entry->imr & (1 << __entry->pin)) ? "|masked":"", __entry->coalesced ? " (coalesced)" : "") ); #define kvm_apic_dst_shorthand \ {0x0, "dst"}, \ {0x1, "self"}, \ {0x2, "all"}, \ {0x3, "all-but-self"} TRACE_EVENT(kvm_apic_ipi, TP_PROTO(__u32 icr_low, __u32 dest_id), TP_ARGS(icr_low, dest_id), TP_STRUCT__entry( __field( __u32, icr_low ) __field( __u32, dest_id ) ), TP_fast_assign( __entry->icr_low = icr_low; __entry->dest_id = dest_id; ), TP_printk("dst %x vec %u (%s|%s|%s|%s|%s)", __entry->dest_id, (u8)__entry->icr_low, __print_symbolic((__entry->icr_low >> 8 & 0x7), kvm_deliver_mode), (__entry->icr_low & (1<<11)) ? "logical" : "physical", (__entry->icr_low & (1<<14)) ? "assert" : "de-assert", (__entry->icr_low & (1<<15)) ? "level" : "edge", __print_symbolic((__entry->icr_low >> 18 & 0x3), kvm_apic_dst_shorthand)) ); TRACE_EVENT(kvm_apic_accept_irq, TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec), TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) __field( __u16, tm ) __field( __u8, vec ) ), TP_fast_assign( __entry->apicid = apicid; __entry->dm = dm; __entry->tm = tm; __entry->vec = vec; ), TP_printk("apicid %x vec %u (%s|%s)", __entry->apicid, __entry->vec, __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), __entry->tm ? "level" : "edge") ); TRACE_EVENT(kvm_eoi, TP_PROTO(struct kvm_lapic *apic, int vector), TP_ARGS(apic, vector), TP_STRUCT__entry( __field( __u32, apicid ) __field( int, vector ) ), TP_fast_assign( __entry->apicid = apic->vcpu->vcpu_id; __entry->vector = vector; ), TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) ); TRACE_EVENT(kvm_pv_eoi, TP_PROTO(struct kvm_lapic *apic, int vector), TP_ARGS(apic, vector), TP_STRUCT__entry( __field( __u32, apicid ) __field( int, vector ) ), TP_fast_assign( __entry->apicid = apic->vcpu->vcpu_id; __entry->vector = vector; ), TP_printk("apicid %x vector %d", __entry->apicid, __entry->vector) ); /* * Tracepoint for nested VMRUN */ TRACE_EVENT(kvm_nested_vmenter, TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl, __u32 event_inj, bool tdp_enabled, __u64 guest_tdp_pgd, __u64 guest_cr3, __u32 isa), TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, tdp_enabled, guest_tdp_pgd, guest_cr3, isa), TP_STRUCT__entry( __field( __u64, rip ) __field( __u64, vmcb ) __field( __u64, nested_rip ) __field( __u32, int_ctl ) __field( __u32, event_inj ) __field( bool, tdp_enabled ) __field( __u64, guest_pgd ) __field( __u32, isa ) ), TP_fast_assign( __entry->rip = rip; __entry->vmcb = vmcb; __entry->nested_rip = nested_rip; __entry->int_ctl = int_ctl; __entry->event_inj = event_inj; __entry->tdp_enabled = tdp_enabled; __entry->guest_pgd = tdp_enabled ? guest_tdp_pgd : guest_cr3; __entry->isa = isa; ), TP_printk("rip: 0x%016llx %s: 0x%016llx nested_rip: 0x%016llx " "int_ctl: 0x%08x event_inj: 0x%08x nested_%s=%s %s: 0x%016llx", __entry->rip, __entry->isa == KVM_ISA_VMX ? "vmcs" : "vmcb", __entry->vmcb, __entry->nested_rip, __entry->int_ctl, __entry->event_inj, __entry->isa == KVM_ISA_VMX ? "ept" : "npt", __entry->tdp_enabled ? "y" : "n", !__entry->tdp_enabled ? "guest_cr3" : __entry->isa == KVM_ISA_VMX ? "nested_eptp" : "nested_cr3", __entry->guest_pgd) ); TRACE_EVENT(kvm_nested_intercepts, TP_PROTO(__u16 cr_read, __u16 cr_write, __u32 exceptions, __u32 intercept1, __u32 intercept2, __u32 intercept3), TP_ARGS(cr_read, cr_write, exceptions, intercept1, intercept2, intercept3), TP_STRUCT__entry( __field( __u16, cr_read ) __field( __u16, cr_write ) __field( __u32, exceptions ) __field( __u32, intercept1 ) __field( __u32, intercept2 ) __field( __u32, intercept3 ) ), TP_fast_assign( __entry->cr_read = cr_read; __entry->cr_write = cr_write; __entry->exceptions = exceptions; __entry->intercept1 = intercept1; __entry->intercept2 = intercept2; __entry->intercept3 = intercept3; ), TP_printk("cr_read: %04x cr_write: %04x excp: %08x " "intercepts: %08x %08x %08x", __entry->cr_read, __entry->cr_write, __entry->exceptions, __entry->intercept1, __entry->intercept2, __entry->intercept3) ); /* * Tracepoint for #VMEXIT while nested */ TRACE_EVENT_KVM_EXIT(kvm_nested_vmexit); /* * Tracepoint for #VMEXIT reinjected to the guest */ TRACE_EVENT(kvm_nested_vmexit_inject, TP_PROTO(__u32 exit_code, __u64 exit_info1, __u64 exit_info2, __u32 exit_int_info, __u32 exit_int_info_err, __u32 isa), TP_ARGS(exit_code, exit_info1, exit_info2, exit_int_info, exit_int_info_err, isa), TP_STRUCT__entry( __field( __u32, exit_code ) __field( __u64, exit_info1 ) __field( __u64, exit_info2 ) __field( __u32, exit_int_info ) __field( __u32, exit_int_info_err ) __field( __u32, isa ) ), TP_fast_assign( __entry->exit_code = exit_code; __entry->exit_info1 = exit_info1; __entry->exit_info2 = exit_info2; __entry->exit_int_info = exit_int_info; __entry->exit_int_info_err = exit_int_info_err; __entry->isa = isa; ), TP_printk("reason: %s%s%s ext_inf1: 0x%016llx " "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", kvm_print_exit_reason(__entry->exit_code, __entry->isa), __entry->exit_info1, __entry->exit_info2, __entry->exit_int_info, __entry->exit_int_info_err) ); /* * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_nested_intr_vmexit, TP_PROTO(__u64 rip), TP_ARGS(rip), TP_STRUCT__entry( __field( __u64, rip ) ), TP_fast_assign( __entry->rip = rip ), TP_printk("rip: 0x%016llx", __entry->rip) ); /* * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_invlpga, TP_PROTO(__u64 rip, unsigned int asid, u64 address), TP_ARGS(rip, asid, address), TP_STRUCT__entry( __field( __u64, rip ) __field( unsigned int, asid ) __field( __u64, address ) ), TP_fast_assign( __entry->rip = rip; __entry->asid = asid; __entry->address = address; ), TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx", __entry->rip, __entry->asid, __entry->address) ); /* * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_skinit, TP_PROTO(__u64 rip, __u32 slb), TP_ARGS(rip, slb), TP_STRUCT__entry( __field( __u64, rip ) __field( __u32, slb ) ), TP_fast_assign( __entry->rip = rip; __entry->slb = slb; ), TP_printk("rip: 0x%016llx slb: 0x%08x", __entry->rip, __entry->slb) ); #define KVM_EMUL_INSN_F_CR0_PE (1 << 0) #define KVM_EMUL_INSN_F_EFL_VM (1 << 1) #define KVM_EMUL_INSN_F_CS_D (1 << 2) #define KVM_EMUL_INSN_F_CS_L (1 << 3) #define kvm_trace_symbol_emul_flags \ { 0, "real" }, \ { KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_EFL_VM, "vm16" }, \ { KVM_EMUL_INSN_F_CR0_PE, "prot16" }, \ { KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_D, "prot32" }, \ { KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_L, "prot64" } #define kei_decode_mode(mode) ({ \ u8 flags = 0xff; \ switch (mode) { \ case X86EMUL_MODE_REAL: \ flags = 0; \ break; \ case X86EMUL_MODE_VM86: \ flags = KVM_EMUL_INSN_F_EFL_VM; \ break; \ case X86EMUL_MODE_PROT16: \ flags = KVM_EMUL_INSN_F_CR0_PE; \ break; \ case X86EMUL_MODE_PROT32: \ flags = KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_D; \ break; \ case X86EMUL_MODE_PROT64: \ flags = KVM_EMUL_INSN_F_CR0_PE \ | KVM_EMUL_INSN_F_CS_L; \ break; \ } \ flags; \ }) TRACE_EVENT(kvm_emulate_insn, TP_PROTO(struct kvm_vcpu *vcpu, __u8 failed), TP_ARGS(vcpu, failed), TP_STRUCT__entry( __field( __u64, rip ) __field( __u32, csbase ) __field( __u8, len ) __array( __u8, insn, 15 ) __field( __u8, flags ) __field( __u8, failed ) ), TP_fast_assign( __entry->csbase = static_call(kvm_x86_get_segment_base)(vcpu, VCPU_SREG_CS); __entry->len = vcpu->arch.emulate_ctxt->fetch.ptr - vcpu->arch.emulate_ctxt->fetch.data; __entry->rip = vcpu->arch.emulate_ctxt->_eip - __entry->len; memcpy(__entry->insn, vcpu->arch.emulate_ctxt->fetch.data, 15); __entry->flags = kei_decode_mode(vcpu->arch.emulate_ctxt->mode); __entry->failed = failed; ), TP_printk("%x:%llx:%s (%s)%s", __entry->csbase, __entry->rip, __print_hex(__entry->insn, __entry->len), __print_symbolic(__entry->flags, kvm_trace_symbol_emul_flags), __entry->failed ? " failed" : "" ) ); #define trace_kvm_emulate_insn_start(vcpu) trace_kvm_emulate_insn(vcpu, 0) #define trace_kvm_emulate_insn_failed(vcpu) trace_kvm_emulate_insn(vcpu, 1) TRACE_EVENT( vcpu_match_mmio, TP_PROTO(gva_t gva, gpa_t gpa, bool write, bool gpa_match), TP_ARGS(gva, gpa, write, gpa_match), TP_STRUCT__entry( __field(gva_t, gva) __field(gpa_t, gpa) __field(bool, write) __field(bool, gpa_match) ), TP_fast_assign( __entry->gva = gva; __entry->gpa = gpa; __entry->write = write; __entry->gpa_match = gpa_match ), TP_printk("gva %#lx gpa %#llx %s %s", __entry->gva, __entry->gpa, __entry->write ? "Write" : "Read", __entry->gpa_match ? "GPA" : "GVA") ); TRACE_EVENT(kvm_write_tsc_offset, TP_PROTO(unsigned int vcpu_id, __u64 previous_tsc_offset, __u64 next_tsc_offset), TP_ARGS(vcpu_id, previous_tsc_offset, next_tsc_offset), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( __u64, previous_tsc_offset ) __field( __u64, next_tsc_offset ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->previous_tsc_offset = previous_tsc_offset; __entry->next_tsc_offset = next_tsc_offset; ), TP_printk("vcpu=%u prev=%llu next=%llu", __entry->vcpu_id, __entry->previous_tsc_offset, __entry->next_tsc_offset) ); #ifdef CONFIG_X86_64 #define host_clocks \ {VDSO_CLOCKMODE_NONE, "none"}, \ {VDSO_CLOCKMODE_TSC, "tsc"} \ TRACE_EVENT(kvm_update_master_clock, TP_PROTO(bool use_master_clock, unsigned int host_clock, bool offset_matched), TP_ARGS(use_master_clock, host_clock, offset_matched), TP_STRUCT__entry( __field( bool, use_master_clock ) __field( unsigned int, host_clock ) __field( bool, offset_matched ) ), TP_fast_assign( __entry->use_master_clock = use_master_clock; __entry->host_clock = host_clock; __entry->offset_matched = offset_matched; ), TP_printk("masterclock %d hostclock %s offsetmatched %u", __entry->use_master_clock, __print_symbolic(__entry->host_clock, host_clocks), __entry->offset_matched) ); TRACE_EVENT(kvm_track_tsc, TP_PROTO(unsigned int vcpu_id, unsigned int nr_matched, unsigned int online_vcpus, bool use_master_clock, unsigned int host_clock), TP_ARGS(vcpu_id, nr_matched, online_vcpus, use_master_clock, host_clock), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned int, nr_vcpus_matched_tsc ) __field( unsigned int, online_vcpus ) __field( bool, use_master_clock ) __field( unsigned int, host_clock ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->nr_vcpus_matched_tsc = nr_matched; __entry->online_vcpus = online_vcpus; __entry->use_master_clock = use_master_clock; __entry->host_clock = host_clock; ), TP_printk("vcpu_id %u masterclock %u offsetmatched %u nr_online %u" " hostclock %s", __entry->vcpu_id, __entry->use_master_clock, __entry->nr_vcpus_matched_tsc, __entry->online_vcpus, __print_symbolic(__entry->host_clock, host_clocks)) ); #endif /* CONFIG_X86_64 */ /* * Tracepoint for PML full VMEXIT. */ TRACE_EVENT(kvm_pml_full, TP_PROTO(unsigned int vcpu_id), TP_ARGS(vcpu_id), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; ), TP_printk("vcpu %d: PML full", __entry->vcpu_id) ); TRACE_EVENT(kvm_ple_window_update, TP_PROTO(unsigned int vcpu_id, unsigned int new, unsigned int old), TP_ARGS(vcpu_id, new, old), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( unsigned int, new ) __field( unsigned int, old ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->new = new; __entry->old = old; ), TP_printk("vcpu %u old %u new %u (%s)", __entry->vcpu_id, __entry->old, __entry->new, __entry->old < __entry->new ? "growed" : "shrinked") ); TRACE_EVENT(kvm_pvclock_update, TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock), TP_ARGS(vcpu_id, pvclock), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( __u32, version ) __field( __u64, tsc_timestamp ) __field( __u64, system_time ) __field( __u32, tsc_to_system_mul ) __field( __s8, tsc_shift ) __field( __u8, flags ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->version = pvclock->version; __entry->tsc_timestamp = pvclock->tsc_timestamp; __entry->system_time = pvclock->system_time; __entry->tsc_to_system_mul = pvclock->tsc_to_system_mul; __entry->tsc_shift = pvclock->tsc_shift; __entry->flags = pvclock->flags; ), TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, " "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, " "flags 0x%x }", __entry->vcpu_id, __entry->version, __entry->tsc_timestamp, __entry->system_time, __entry->tsc_to_system_mul, __entry->tsc_shift, __entry->flags) ); TRACE_EVENT(kvm_wait_lapic_expire, TP_PROTO(unsigned int vcpu_id, s64 delta), TP_ARGS(vcpu_id, delta), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( s64, delta ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->delta = delta; ), TP_printk("vcpu %u: delta %lld (%s)", __entry->vcpu_id, __entry->delta, __entry->delta < 0 ? "early" : "late") ); TRACE_EVENT(kvm_smm_transition, TP_PROTO(unsigned int vcpu_id, u64 smbase, bool entering), TP_ARGS(vcpu_id, smbase, entering), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( u64, smbase ) __field( bool, entering ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->smbase = smbase; __entry->entering = entering; ), TP_printk("vcpu %u: %s SMM, smbase 0x%llx", __entry->vcpu_id, __entry->entering ? "entering" : "leaving", __entry->smbase) ); /* * Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC. */ TRACE_EVENT(kvm_pi_irte_update, TP_PROTO(unsigned int host_irq, unsigned int vcpu_id, unsigned int gsi, unsigned int gvec, u64 pi_desc_addr, bool set), TP_ARGS(host_irq, vcpu_id, gsi, gvec, pi_desc_addr, set), TP_STRUCT__entry( __field( unsigned int, host_irq ) __field( unsigned int, vcpu_id ) __field( unsigned int, gsi ) __field( unsigned int, gvec ) __field( u64, pi_desc_addr ) __field( bool, set ) ), TP_fast_assign( __entry->host_irq = host_irq; __entry->vcpu_id = vcpu_id; __entry->gsi = gsi; __entry->gvec = gvec; __entry->pi_desc_addr = pi_desc_addr; __entry->set = set; ), TP_printk("PI is %s for irq %u, vcpu %u, gsi: 0x%x, " "gvec: 0x%x, pi_desc_addr: 0x%llx", __entry->set ? "enabled and being updated" : "disabled", __entry->host_irq, __entry->vcpu_id, __entry->gsi, __entry->gvec, __entry->pi_desc_addr) ); /* * Tracepoint for kvm_hv_notify_acked_sint. */ TRACE_EVENT(kvm_hv_notify_acked_sint, TP_PROTO(int vcpu_id, u32 sint), TP_ARGS(vcpu_id, sint), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, sint) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->sint = sint; ), TP_printk("vcpu_id %d sint %u", __entry->vcpu_id, __entry->sint) ); /* * Tracepoint for synic_set_irq. */ TRACE_EVENT(kvm_hv_synic_set_irq, TP_PROTO(int vcpu_id, u32 sint, int vector, int ret), TP_ARGS(vcpu_id, sint, vector, ret), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, sint) __field(int, vector) __field(int, ret) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->sint = sint; __entry->vector = vector; __entry->ret = ret; ), TP_printk("vcpu_id %d sint %u vector %d ret %d", __entry->vcpu_id, __entry->sint, __entry->vector, __entry->ret) ); /* * Tracepoint for kvm_hv_synic_send_eoi. */ TRACE_EVENT(kvm_hv_synic_send_eoi, TP_PROTO(int vcpu_id, int vector), TP_ARGS(vcpu_id, vector), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, sint) __field(int, vector) __field(int, ret) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->vector = vector; ), TP_printk("vcpu_id %d vector %d", __entry->vcpu_id, __entry->vector) ); /* * Tracepoint for synic_set_msr. */ TRACE_EVENT(kvm_hv_synic_set_msr, TP_PROTO(int vcpu_id, u32 msr, u64 data, bool host), TP_ARGS(vcpu_id, msr, data, host), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, msr) __field(u64, data) __field(bool, host) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->msr = msr; __entry->data = data; __entry->host = host ), TP_printk("vcpu_id %d msr 0x%x data 0x%llx host %d", __entry->vcpu_id, __entry->msr, __entry->data, __entry->host) ); /* * Tracepoint for stimer_set_config. */ TRACE_EVENT(kvm_hv_stimer_set_config, TP_PROTO(int vcpu_id, int timer_index, u64 config, bool host), TP_ARGS(vcpu_id, timer_index, config, host), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, config) __field(bool, host) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->config = config; __entry->host = host; ), TP_printk("vcpu_id %d timer %d config 0x%llx host %d", __entry->vcpu_id, __entry->timer_index, __entry->config, __entry->host) ); /* * Tracepoint for stimer_set_count. */ TRACE_EVENT(kvm_hv_stimer_set_count, TP_PROTO(int vcpu_id, int timer_index, u64 count, bool host), TP_ARGS(vcpu_id, timer_index, count, host), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, count) __field(bool, host) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->count = count; __entry->host = host; ), TP_printk("vcpu_id %d timer %d count %llu host %d", __entry->vcpu_id, __entry->timer_index, __entry->count, __entry->host) ); /* * Tracepoint for stimer_start(periodic timer case). */ TRACE_EVENT(kvm_hv_stimer_start_periodic, TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 exp_time), TP_ARGS(vcpu_id, timer_index, time_now, exp_time), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, time_now) __field(u64, exp_time) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->time_now = time_now; __entry->exp_time = exp_time; ), TP_printk("vcpu_id %d timer %d time_now %llu exp_time %llu", __entry->vcpu_id, __entry->timer_index, __entry->time_now, __entry->exp_time) ); /* * Tracepoint for stimer_start(one-shot timer case). */ TRACE_EVENT(kvm_hv_stimer_start_one_shot, TP_PROTO(int vcpu_id, int timer_index, u64 time_now, u64 count), TP_ARGS(vcpu_id, timer_index, time_now, count), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(u64, time_now) __field(u64, count) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->time_now = time_now; __entry->count = count; ), TP_printk("vcpu_id %d timer %d time_now %llu count %llu", __entry->vcpu_id, __entry->timer_index, __entry->time_now, __entry->count) ); /* * Tracepoint for stimer_timer_callback. */ TRACE_EVENT(kvm_hv_stimer_callback, TP_PROTO(int vcpu_id, int timer_index), TP_ARGS(vcpu_id, timer_index), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; ), TP_printk("vcpu_id %d timer %d", __entry->vcpu_id, __entry->timer_index) ); /* * Tracepoint for stimer_expiration. */ TRACE_EVENT(kvm_hv_stimer_expiration, TP_PROTO(int vcpu_id, int timer_index, int direct, int msg_send_result), TP_ARGS(vcpu_id, timer_index, direct, msg_send_result), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) __field(int, direct) __field(int, msg_send_result) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; __entry->direct = direct; __entry->msg_send_result = msg_send_result; ), TP_printk("vcpu_id %d timer %d direct %d send result %d", __entry->vcpu_id, __entry->timer_index, __entry->direct, __entry->msg_send_result) ); /* * Tracepoint for stimer_cleanup. */ TRACE_EVENT(kvm_hv_stimer_cleanup, TP_PROTO(int vcpu_id, int timer_index), TP_ARGS(vcpu_id, timer_index), TP_STRUCT__entry( __field(int, vcpu_id) __field(int, timer_index) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->timer_index = timer_index; ), TP_printk("vcpu_id %d timer %d", __entry->vcpu_id, __entry->timer_index) ); TRACE_EVENT(kvm_apicv_inhibit_changed, TP_PROTO(int reason, bool set, unsigned long inhibits), TP_ARGS(reason, set, inhibits), TP_STRUCT__entry( __field(int, reason) __field(bool, set) __field(unsigned long, inhibits) ), TP_fast_assign( __entry->reason = reason; __entry->set = set; __entry->inhibits = inhibits; ), TP_printk("%s reason=%u, inhibits=0x%lx", __entry->set ? "set" : "cleared", __entry->reason, __entry->inhibits) ); TRACE_EVENT(kvm_apicv_accept_irq, TP_PROTO(__u32 apicid, __u16 dm, __u16 tm, __u8 vec), TP_ARGS(apicid, dm, tm, vec), TP_STRUCT__entry( __field( __u32, apicid ) __field( __u16, dm ) __field( __u16, tm ) __field( __u8, vec ) ), TP_fast_assign( __entry->apicid = apicid; __entry->dm = dm; __entry->tm = tm; __entry->vec = vec; ), TP_printk("apicid %x vec %u (%s|%s)", __entry->apicid, __entry->vec, __print_symbolic((__entry->dm >> 8 & 0x7), kvm_deliver_mode), __entry->tm ? "level" : "edge") ); /* * Tracepoint for AMD AVIC */ TRACE_EVENT(kvm_avic_incomplete_ipi, TP_PROTO(u32 vcpu, u32 icrh, u32 icrl, u32 id, u32 index), TP_ARGS(vcpu, icrh, icrl, id, index), TP_STRUCT__entry( __field(u32, vcpu) __field(u32, icrh) __field(u32, icrl) __field(u32, id) __field(u32, index) ), TP_fast_assign( __entry->vcpu = vcpu; __entry->icrh = icrh; __entry->icrl = icrl; __entry->id = id; __entry->index = index; ), TP_printk("vcpu=%u, icrh:icrl=%#010x:%08x, id=%u, index=%u", __entry->vcpu, __entry->icrh, __entry->icrl, __entry->id, __entry->index) ); TRACE_EVENT(kvm_avic_unaccelerated_access, TP_PROTO(u32 vcpu, u32 offset, bool ft, bool rw, u32 vec), TP_ARGS(vcpu, offset, ft, rw, vec), TP_STRUCT__entry( __field(u32, vcpu) __field(u32, offset) __field(bool, ft) __field(bool, rw) __field(u32, vec) ), TP_fast_assign( __entry->vcpu = vcpu; __entry->offset = offset; __entry->ft = ft; __entry->rw = rw; __entry->vec = vec; ), TP_printk("vcpu=%u, offset=%#x(%s), %s, %s, vec=%#x", __entry->vcpu, __entry->offset, __print_symbolic(__entry->offset, kvm_trace_symbol_apic), __entry->ft ? "trap" : "fault", __entry->rw ? "write" : "read", __entry->vec) ); TRACE_EVENT(kvm_avic_ga_log, TP_PROTO(u32 vmid, u32 vcpuid), TP_ARGS(vmid, vcpuid), TP_STRUCT__entry( __field(u32, vmid) __field(u32, vcpuid) ), TP_fast_assign( __entry->vmid = vmid; __entry->vcpuid = vcpuid; ), TP_printk("vmid=%u, vcpuid=%u", __entry->vmid, __entry->vcpuid) ); TRACE_EVENT(kvm_avic_kick_vcpu_slowpath, TP_PROTO(u32 icrh, u32 icrl, u32 index), TP_ARGS(icrh, icrl, index), TP_STRUCT__entry( __field(u32, icrh) __field(u32, icrl) __field(u32, index) ), TP_fast_assign( __entry->icrh = icrh; __entry->icrl = icrl; __entry->index = index; ), TP_printk("icrh:icrl=%#08x:%08x, index=%u", __entry->icrh, __entry->icrl, __entry->index) ); TRACE_EVENT(kvm_avic_doorbell, TP_PROTO(u32 vcpuid, u32 apicid), TP_ARGS(vcpuid, apicid), TP_STRUCT__entry( __field(u32, vcpuid) __field(u32, apicid) ), TP_fast_assign( __entry->vcpuid = vcpuid; __entry->apicid = apicid; ), TP_printk("vcpuid=%u, apicid=%u", __entry->vcpuid, __entry->apicid) ); TRACE_EVENT(kvm_hv_timer_state, TP_PROTO(unsigned int vcpu_id, unsigned int hv_timer_in_use), TP_ARGS(vcpu_id, hv_timer_in_use), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(unsigned int, hv_timer_in_use) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->hv_timer_in_use = hv_timer_in_use; ), TP_printk("vcpu_id %x hv_timer %x", __entry->vcpu_id, __entry->hv_timer_in_use) ); /* * Tracepoint for kvm_hv_flush_tlb. */ TRACE_EVENT(kvm_hv_flush_tlb, TP_PROTO(u64 processor_mask, u64 address_space, u64 flags, bool guest_mode), TP_ARGS(processor_mask, address_space, flags, guest_mode), TP_STRUCT__entry( __field(u64, processor_mask) __field(u64, address_space) __field(u64, flags) __field(bool, guest_mode) ), TP_fast_assign( __entry->processor_mask = processor_mask; __entry->address_space = address_space; __entry->flags = flags; __entry->guest_mode = guest_mode; ), TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx %s", __entry->processor_mask, __entry->address_space, __entry->flags, __entry->guest_mode ? "(L2)" : "") ); /* * Tracepoint for kvm_hv_flush_tlb_ex. */ TRACE_EVENT(kvm_hv_flush_tlb_ex, TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags, bool guest_mode), TP_ARGS(valid_bank_mask, format, address_space, flags, guest_mode), TP_STRUCT__entry( __field(u64, valid_bank_mask) __field(u64, format) __field(u64, address_space) __field(u64, flags) __field(bool, guest_mode) ), TP_fast_assign( __entry->valid_bank_mask = valid_bank_mask; __entry->format = format; __entry->address_space = address_space; __entry->flags = flags; __entry->guest_mode = guest_mode; ), TP_printk("valid_bank_mask 0x%llx format 0x%llx " "address_space 0x%llx flags 0x%llx %s", __entry->valid_bank_mask, __entry->format, __entry->address_space, __entry->flags, __entry->guest_mode ? "(L2)" : "") ); /* * Tracepoints for kvm_hv_send_ipi. */ TRACE_EVENT(kvm_hv_send_ipi, TP_PROTO(u32 vector, u64 processor_mask), TP_ARGS(vector, processor_mask), TP_STRUCT__entry( __field(u32, vector) __field(u64, processor_mask) ), TP_fast_assign( __entry->vector = vector; __entry->processor_mask = processor_mask; ), TP_printk("vector %x processor_mask 0x%llx", __entry->vector, __entry->processor_mask) ); TRACE_EVENT(kvm_hv_send_ipi_ex, TP_PROTO(u32 vector, u64 format, u64 valid_bank_mask), TP_ARGS(vector, format, valid_bank_mask), TP_STRUCT__entry( __field(u32, vector) __field(u64, format) __field(u64, valid_bank_mask) ), TP_fast_assign( __entry->vector = vector; __entry->format = format; __entry->valid_bank_mask = valid_bank_mask; ), TP_printk("vector %x format %llx valid_bank_mask 0x%llx", __entry->vector, __entry->format, __entry->valid_bank_mask) ); TRACE_EVENT(kvm_pv_tlb_flush, TP_PROTO(unsigned int vcpu_id, bool need_flush_tlb), TP_ARGS(vcpu_id, need_flush_tlb), TP_STRUCT__entry( __field( unsigned int, vcpu_id ) __field( bool, need_flush_tlb ) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->need_flush_tlb = need_flush_tlb; ), TP_printk("vcpu %u need_flush_tlb %s", __entry->vcpu_id, __entry->need_flush_tlb ? "true" : "false") ); /* * Tracepoint for failed nested VMX VM-Enter. */ TRACE_EVENT(kvm_nested_vmenter_failed, TP_PROTO(const char *msg, u32 err), TP_ARGS(msg, err), TP_STRUCT__entry( __string(msg, msg) __field(u32, err) ), TP_fast_assign( __assign_str(msg); __entry->err = err; ), TP_printk("%s%s", __get_str(msg), !__entry->err ? "" : __print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS)) ); /* * Tracepoint for syndbg_set_msr. */ TRACE_EVENT(kvm_hv_syndbg_set_msr, TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data), TP_ARGS(vcpu_id, vp_index, msr, data), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, vp_index) __field(u32, msr) __field(u64, data) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->vp_index = vp_index; __entry->msr = msr; __entry->data = data; ), TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx", __entry->vcpu_id, __entry->vp_index, __entry->msr, __entry->data) ); /* * Tracepoint for syndbg_get_msr. */ TRACE_EVENT(kvm_hv_syndbg_get_msr, TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data), TP_ARGS(vcpu_id, vp_index, msr, data), TP_STRUCT__entry( __field(int, vcpu_id) __field(u32, vp_index) __field(u32, msr) __field(u64, data) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->vp_index = vp_index; __entry->msr = msr; __entry->data = data; ), TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx", __entry->vcpu_id, __entry->vp_index, __entry->msr, __entry->data) ); /* * Tracepoint for the start of VMGEXIT processing */ TRACE_EVENT(kvm_vmgexit_enter, TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb), TP_ARGS(vcpu_id, ghcb), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, exit_reason) __field(u64, info1) __field(u64, info2) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->exit_reason = ghcb->save.sw_exit_code; __entry->info1 = ghcb->save.sw_exit_info_1; __entry->info2 = ghcb->save.sw_exit_info_2; ), TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx", __entry->vcpu_id, __entry->exit_reason, __entry->info1, __entry->info2) ); /* * Tracepoint for the end of VMGEXIT processing */ TRACE_EVENT(kvm_vmgexit_exit, TP_PROTO(unsigned int vcpu_id, struct ghcb *ghcb), TP_ARGS(vcpu_id, ghcb), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, exit_reason) __field(u64, info1) __field(u64, info2) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->exit_reason = ghcb->save.sw_exit_code; __entry->info1 = ghcb->save.sw_exit_info_1; __entry->info2 = ghcb->save.sw_exit_info_2; ), TP_printk("vcpu %u, exit_reason %llx, exit_info1 %llx, exit_info2 %llx", __entry->vcpu_id, __entry->exit_reason, __entry->info1, __entry->info2) ); /* * Tracepoint for the start of VMGEXIT MSR procotol processing */ TRACE_EVENT(kvm_vmgexit_msr_protocol_enter, TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa), TP_ARGS(vcpu_id, ghcb_gpa), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, ghcb_gpa) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->ghcb_gpa = ghcb_gpa; ), TP_printk("vcpu %u, ghcb_gpa %016llx", __entry->vcpu_id, __entry->ghcb_gpa) ); /* * Tracepoint for the end of VMGEXIT MSR procotol processing */ TRACE_EVENT(kvm_vmgexit_msr_protocol_exit, TP_PROTO(unsigned int vcpu_id, u64 ghcb_gpa, int result), TP_ARGS(vcpu_id, ghcb_gpa, result), TP_STRUCT__entry( __field(unsigned int, vcpu_id) __field(u64, ghcb_gpa) __field(int, result) ), TP_fast_assign( __entry->vcpu_id = vcpu_id; __entry->ghcb_gpa = ghcb_gpa; __entry->result = result; ), TP_printk("vcpu %u, ghcb_gpa %016llx, result %d", __entry->vcpu_id, __entry->ghcb_gpa, __entry->result) ); #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH #define TRACE_INCLUDE_PATH ../../arch/x86/kvm #undef TRACE_INCLUDE_FILE #define TRACE_INCLUDE_FILE trace /* This part must be outside protection */ #include <trace/define_trace.h>
9 9 6 6 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 // SPDX-License-Identifier: GPL-2.0-only /* * lowlevel.c * * PURPOSE * Low Level Device Routines for the UDF filesystem * * COPYRIGHT * (C) 1999-2001 Ben Fennema * * HISTORY * * 03/26/99 blf Created. */ #include "udfdecl.h" #include <linux/blkdev.h> #include <linux/cdrom.h> #include <linux/uaccess.h> #include "udf_sb.h" unsigned int udf_get_last_session(struct super_block *sb) { struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk); struct cdrom_multisession ms_info; if (!cdi) { udf_debug("CDROMMULTISESSION not supported.\n"); return 0; } ms_info.addr_format = CDROM_LBA; if (cdrom_multisession(cdi, &ms_info) == 0) { udf_debug("XA disk: %s, vol_desc_start=%d\n", ms_info.xa_flag ? "yes" : "no", ms_info.addr.lba); if (ms_info.xa_flag) /* necessary for a valid ms_info.addr */ return ms_info.addr.lba; } return 0; } udf_pblk_t udf_get_last_block(struct super_block *sb) { struct cdrom_device_info *cdi = disk_to_cdi(sb->s_bdev->bd_disk); unsigned long lblock = 0; /* * The cdrom layer call failed or returned obviously bogus value? * Try using the device size... */ if (!cdi || cdrom_get_last_written(cdi, &lblock) || lblock == 0) { if (sb_bdev_nr_blocks(sb) > ~(udf_pblk_t)0) return 0; lblock = sb_bdev_nr_blocks(sb); } if (lblock) return lblock - 1; return 0; }
162 126 1 3 88 84 5 62 163 128 2 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_DCCP_H #define _LINUX_DCCP_H #include <linux/in.h> #include <linux/interrupt.h> #include <linux/ktime.h> #include <linux/list.h> #include <linux/uio.h> #include <linux/workqueue.h> #include <net/inet_connection_sock.h> #include <net/inet_sock.h> #include <net/inet_timewait_sock.h> #include <net/tcp_states.h> #include <uapi/linux/dccp.h> enum dccp_state { DCCP_OPEN = TCP_ESTABLISHED, DCCP_REQUESTING = TCP_SYN_SENT, DCCP_LISTEN = TCP_LISTEN, DCCP_RESPOND = TCP_SYN_RECV, /* * States involved in closing a DCCP connection: * 1) ACTIVE_CLOSEREQ is entered by a server sending a CloseReq. * * 2) CLOSING can have three different meanings (RFC 4340, 8.3): * a. Client has performed active-close, has sent a Close to the server * from state OPEN or PARTOPEN, and is waiting for the final Reset * (in this case, SOCK_DONE == 1). * b. Client is asked to perform passive-close, by receiving a CloseReq * in (PART)OPEN state. It sends a Close and waits for final Reset * (in this case, SOCK_DONE == 0). * c. Server performs an active-close as in (a), keeps TIMEWAIT state. * * 3) The following intermediate states are employed to give passively * closing nodes a chance to process their unread data: * - PASSIVE_CLOSE (from OPEN => CLOSED) and * - PASSIVE_CLOSEREQ (from (PART)OPEN to CLOSING; case (b) above). */ DCCP_ACTIVE_CLOSEREQ = TCP_FIN_WAIT1, DCCP_PASSIVE_CLOSE = TCP_CLOSE_WAIT, /* any node receiving a Close */ DCCP_CLOSING = TCP_CLOSING, DCCP_TIME_WAIT = TCP_TIME_WAIT, DCCP_CLOSED = TCP_CLOSE, DCCP_NEW_SYN_RECV = TCP_NEW_SYN_RECV, DCCP_PARTOPEN = TCP_MAX_STATES, DCCP_PASSIVE_CLOSEREQ, /* clients receiving CloseReq */ DCCP_MAX_STATES }; enum { DCCPF_OPEN = TCPF_ESTABLISHED, DCCPF_REQUESTING = TCPF_SYN_SENT, DCCPF_LISTEN = TCPF_LISTEN, DCCPF_RESPOND = TCPF_SYN_RECV, DCCPF_ACTIVE_CLOSEREQ = TCPF_FIN_WAIT1, DCCPF_CLOSING = TCPF_CLOSING, DCCPF_TIME_WAIT = TCPF_TIME_WAIT, DCCPF_CLOSED = TCPF_CLOSE, DCCPF_NEW_SYN_RECV = TCPF_NEW_SYN_RECV, DCCPF_PARTOPEN = (1 << DCCP_PARTOPEN), }; static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb) { return (struct dccp_hdr *)skb_transport_header(skb); } static inline struct dccp_hdr *dccp_zeroed_hdr(struct sk_buff *skb, int headlen) { skb_push(skb, headlen); skb_reset_transport_header(skb); return memset(skb_transport_header(skb), 0, headlen); } static inline struct dccp_hdr_ext *dccp_hdrx(const struct dccp_hdr *dh) { return (struct dccp_hdr_ext *)((unsigned char *)dh + sizeof(*dh)); } static inline unsigned int __dccp_basic_hdr_len(const struct dccp_hdr *dh) { return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0); } static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); return __dccp_basic_hdr_len(dh); } static inline __u64 dccp_hdr_seq(const struct dccp_hdr *dh) { __u64 seq_nr = ntohs(dh->dccph_seq); if (dh->dccph_x != 0) seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(dh)->dccph_seq_low); else seq_nr += (u32)dh->dccph_seq2 << 16; return seq_nr; } static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb) { return (struct dccp_hdr_request *)(skb_transport_header(skb) + dccp_basic_hdr_len(skb)); } static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb) { return (struct dccp_hdr_ack_bits *)(skb_transport_header(skb) + dccp_basic_hdr_len(skb)); } static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb) { const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb); return ((u64)ntohs(dhack->dccph_ack_nr_high) << 32) + ntohl(dhack->dccph_ack_nr_low); } static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb) { return (struct dccp_hdr_response *)(skb_transport_header(skb) + dccp_basic_hdr_len(skb)); } static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb) { return (struct dccp_hdr_reset *)(skb_transport_header(skb) + dccp_basic_hdr_len(skb)); } static inline unsigned int __dccp_hdr_len(const struct dccp_hdr *dh) { return __dccp_basic_hdr_len(dh) + dccp_packet_hdr_len(dh->dccph_type); } static inline unsigned int dccp_hdr_len(const struct sk_buff *skb) { return __dccp_hdr_len(dccp_hdr(skb)); } /** * struct dccp_request_sock - represent DCCP-specific connection request * @dreq_inet_rsk: structure inherited from * @dreq_iss: initial sequence number, sent on the first Response (RFC 4340, 7.1) * @dreq_gss: greatest sequence number sent (for retransmitted Responses) * @dreq_isr: initial sequence number received in the first Request * @dreq_gsr: greatest sequence number received (for retransmitted Request(s)) * @dreq_service: service code present on the Request (there is just one) * @dreq_featneg: feature negotiation options for this connection * The following two fields are analogous to the ones in dccp_sock: * @dreq_timestamp_echo: last received timestamp to echo (13.1) * @dreq_timestamp_echo: the time of receiving the last @dreq_timestamp_echo */ struct dccp_request_sock { struct inet_request_sock dreq_inet_rsk; __u64 dreq_iss; __u64 dreq_gss; __u64 dreq_isr; __u64 dreq_gsr; __be32 dreq_service; spinlock_t dreq_lock; struct list_head dreq_featneg; __u32 dreq_timestamp_echo; __u32 dreq_timestamp_time; }; static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req) { return (struct dccp_request_sock *)req; } extern struct inet_timewait_death_row dccp_death_row; extern int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, struct sk_buff *skb); struct dccp_options_received { u64 dccpor_ndp:48; u32 dccpor_timestamp; u32 dccpor_timestamp_echo; u32 dccpor_elapsed_time; }; struct ccid; enum dccp_role { DCCP_ROLE_UNDEFINED, DCCP_ROLE_LISTEN, DCCP_ROLE_CLIENT, DCCP_ROLE_SERVER, }; struct dccp_service_list { __u32 dccpsl_nr; __be32 dccpsl_list[]; }; #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) #define DCCP_SERVICE_CODE_IS_ABSENT 0 static inline bool dccp_list_has_service(const struct dccp_service_list *sl, const __be32 service) { if (likely(sl != NULL)) { u32 i = sl->dccpsl_nr; while (i--) if (sl->dccpsl_list[i] == service) return true; } return false; } struct dccp_ackvec; /** * struct dccp_sock - DCCP socket state * * @dccps_swl - sequence number window low * @dccps_swh - sequence number window high * @dccps_awl - acknowledgement number window low * @dccps_awh - acknowledgement number window high * @dccps_iss - initial sequence number sent * @dccps_isr - initial sequence number received * @dccps_osr - first OPEN sequence number received * @dccps_gss - greatest sequence number sent * @dccps_gsr - greatest valid sequence number received * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss * @dccps_service - first (passive sock) or unique (active sock) service code * @dccps_service_list - second .. last service code on passive socket * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option * @dccps_timestamp_time - time of receiving latest @dccps_timestamp_echo * @dccps_l_ack_ratio - feature-local Ack Ratio * @dccps_r_ack_ratio - feature-remote Ack Ratio * @dccps_l_seq_win - local Sequence Window (influences ack number validity) * @dccps_r_seq_win - remote Sequence Window (influences seq number validity) * @dccps_pcslen - sender partial checksum coverage (via sockopt) * @dccps_pcrlen - receiver partial checksum coverage (via sockopt) * @dccps_send_ndp_count - local Send NDP Count feature (7.7.2) * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_mss_cache - current value of MSS (path MTU minus header sizes) * @dccps_rate_last - timestamp for rate-limiting DCCP-Sync (RFC 4340, 7.5.4) * @dccps_featneg - tracks feature-negotiation state (mostly during handshake) * @dccps_hc_rx_ackvec - rx half connection ack vector * @dccps_hc_rx_ccid - CCID used for the receiver (or receiving half-connection) * @dccps_hc_tx_ccid - CCID used for the sender (or sending half-connection) * @dccps_options_received - parsed set of retrieved options * @dccps_qpolicy - TX dequeueing policy, one of %dccp_packet_dequeueing_policy * @dccps_tx_qlen - maximum length of the TX queue * @dccps_role - role of this sock, one of %dccp_role * @dccps_hc_rx_insert_options - receiver wants to add options when acking * @dccps_hc_tx_insert_options - sender wants to add options when sending * @dccps_server_timewait - server holds timewait state on close (RFC 4340, 8.3) * @dccps_sync_scheduled - flag which signals "send out-of-band message soon" * @dccps_xmitlet - tasklet scheduled by the TX CCID to dequeue data packets * @dccps_xmit_timer - used by the TX CCID to delay sending (rate-based pacing) * @dccps_syn_rtt - RTT sample from Request/Response exchange (in usecs) */ struct dccp_sock { /* inet_connection_sock has to be the first member of dccp_sock */ struct inet_connection_sock dccps_inet_connection; #define dccps_syn_rtt dccps_inet_connection.icsk_ack.lrcvtime __u64 dccps_swl; __u64 dccps_swh; __u64 dccps_awl; __u64 dccps_awh; __u64 dccps_iss; __u64 dccps_isr; __u64 dccps_osr; __u64 dccps_gss; __u64 dccps_gsr; __u64 dccps_gar; __be32 dccps_service; __u32 dccps_mss_cache; struct dccp_service_list *dccps_service_list; __u32 dccps_timestamp_echo; __u32 dccps_timestamp_time; __u16 dccps_l_ack_ratio; __u16 dccps_r_ack_ratio; __u64 dccps_l_seq_win:48; __u64 dccps_r_seq_win:48; __u8 dccps_pcslen:4; __u8 dccps_pcrlen:4; __u8 dccps_send_ndp_count:1; __u64 dccps_ndp_count:48; unsigned long dccps_rate_last; struct list_head dccps_featneg; struct dccp_ackvec *dccps_hc_rx_ackvec; struct ccid *dccps_hc_rx_ccid; struct ccid *dccps_hc_tx_ccid; struct dccp_options_received dccps_options_received; __u8 dccps_qpolicy; __u32 dccps_tx_qlen; enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; __u8 dccps_server_timewait:1; __u8 dccps_sync_scheduled:1; struct tasklet_struct dccps_xmitlet; struct timer_list dccps_xmit_timer; }; #define dccp_sk(ptr) container_of_const(ptr, struct dccp_sock, \ dccps_inet_connection.icsk_inet.sk) static inline const char *dccp_role(const struct sock *sk) { switch (dccp_sk(sk)->dccps_role) { case DCCP_ROLE_UNDEFINED: return "undefined"; case DCCP_ROLE_LISTEN: return "listen"; case DCCP_ROLE_SERVER: return "server"; case DCCP_ROLE_CLIENT: return "client"; } return NULL; } extern void dccp_syn_ack_timeout(const struct request_sock *req); #endif /* _LINUX_DCCP_H */
2 22 6 6 22 22 22 22 22 22 22 6 22 22 22 10 1 10 10 11 12 22 22 22 22 22 22 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 26 18 10 10 26 10 10 2 10 4 10 10 12 12 12 12 1 12 10 10 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ #include <linux/mm.h> #include <linux/llist.h> #include <linux/bpf.h> #include <linux/irq_work.h> #include <linux/bpf_mem_alloc.h> #include <linux/memcontrol.h> #include <asm/local.h> /* Any context (including NMI) BPF specific memory allocator. * * Tracing BPF programs can attach to kprobe and fentry. Hence they * run in unknown context where calling plain kmalloc() might not be safe. * * Front-end kmalloc() with per-cpu per-bucket cache of free elements. * Refill this cache asynchronously from irq_work. * * CPU_0 buckets * 16 32 64 96 128 196 256 512 1024 2048 4096 * ... * CPU_N buckets * 16 32 64 96 128 196 256 512 1024 2048 4096 * * The buckets are prefilled at the start. * BPF programs always run with migration disabled. * It's safe to allocate from cache of the current cpu with irqs disabled. * Free-ing is always done into bucket of the current cpu as well. * irq_work trims extra free elements from buckets with kfree * and refills them with kmalloc, so global kmalloc logic takes care * of freeing objects allocated by one cpu and freed on another. * * Every allocated objected is padded with extra 8 bytes that contains * struct llist_node. */ #define LLIST_NODE_SZ sizeof(struct llist_node) /* similar to kmalloc, but sizeof == 8 bucket is gone */ static u8 size_index[24] __ro_after_init = { 3, /* 8 */ 3, /* 16 */ 4, /* 24 */ 4, /* 32 */ 5, /* 40 */ 5, /* 48 */ 5, /* 56 */ 5, /* 64 */ 1, /* 72 */ 1, /* 80 */ 1, /* 88 */ 1, /* 96 */ 6, /* 104 */ 6, /* 112 */ 6, /* 120 */ 6, /* 128 */ 2, /* 136 */ 2, /* 144 */ 2, /* 152 */ 2, /* 160 */ 2, /* 168 */ 2, /* 176 */ 2, /* 184 */ 2 /* 192 */ }; static int bpf_mem_cache_idx(size_t size) { if (!size || size > 4096) return -1; if (size <= 192) return size_index[(size - 1) / 8] - 1; return fls(size - 1) - 2; } #define NUM_CACHES 11 struct bpf_mem_cache { /* per-cpu list of free objects of size 'unit_size'. * All accesses are done with interrupts disabled and 'active' counter * protection with __llist_add() and __llist_del_first(). */ struct llist_head free_llist; local_t active; /* Operations on the free_list from unit_alloc/unit_free/bpf_mem_refill * are sequenced by per-cpu 'active' counter. But unit_free() cannot * fail. When 'active' is busy the unit_free() will add an object to * free_llist_extra. */ struct llist_head free_llist_extra; struct irq_work refill_work; struct obj_cgroup *objcg; int unit_size; /* count of objects in free_llist */ int free_cnt; int low_watermark, high_watermark, batch; int percpu_size; bool draining; struct bpf_mem_cache *tgt; /* list of objects to be freed after RCU GP */ struct llist_head free_by_rcu; struct llist_node *free_by_rcu_tail; struct llist_head waiting_for_gp; struct llist_node *waiting_for_gp_tail; struct rcu_head rcu; atomic_t call_rcu_in_progress; struct llist_head free_llist_extra_rcu; /* list of objects to be freed after RCU tasks trace GP */ struct llist_head free_by_rcu_ttrace; struct llist_head waiting_for_gp_ttrace; struct rcu_head rcu_ttrace; atomic_t call_rcu_ttrace_in_progress; }; struct bpf_mem_caches { struct bpf_mem_cache cache[NUM_CACHES]; }; static const u16 sizes[NUM_CACHES] = {96, 192, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096}; static struct llist_node notrace *__llist_del_first(struct llist_head *head) { struct llist_node *entry, *next; entry = head->first; if (!entry) return NULL; next = entry->next; head->first = next; return entry; } static void *__alloc(struct bpf_mem_cache *c, int node, gfp_t flags) { if (c->percpu_size) { void **obj = kmalloc_node(c->percpu_size, flags, node); void *pptr = __alloc_percpu_gfp(c->unit_size, 8, flags); if (!obj || !pptr) { free_percpu(pptr); kfree(obj); return NULL; } obj[1] = pptr; return obj; } return kmalloc_node(c->unit_size, flags | __GFP_ZERO, node); } static struct mem_cgroup *get_memcg(const struct bpf_mem_cache *c) { #ifdef CONFIG_MEMCG_KMEM if (c->objcg) return get_mem_cgroup_from_objcg(c->objcg); #endif #ifdef CONFIG_MEMCG return root_mem_cgroup; #else return NULL; #endif } static void inc_active(struct bpf_mem_cache *c, unsigned long *flags) { if (IS_ENABLED(CONFIG_PREEMPT_RT)) /* In RT irq_work runs in per-cpu kthread, so disable * interrupts to avoid preemption and interrupts and * reduce the chance of bpf prog executing on this cpu * when active counter is busy. */ local_irq_save(*flags); /* alloc_bulk runs from irq_work which will not preempt a bpf * program that does unit_alloc/unit_free since IRQs are * disabled there. There is no race to increment 'active' * counter. It protects free_llist from corruption in case NMI * bpf prog preempted this loop. */ WARN_ON_ONCE(local_inc_return(&c->active) != 1); } static void dec_active(struct bpf_mem_cache *c, unsigned long *flags) { local_dec(&c->active); if (IS_ENABLED(CONFIG_PREEMPT_RT)) local_irq_restore(*flags); } static void add_obj_to_free_list(struct bpf_mem_cache *c, void *obj) { unsigned long flags; inc_active(c, &flags); __llist_add(obj, &c->free_llist); c->free_cnt++; dec_active(c, &flags); } /* Mostly runs from irq_work except __init phase. */ static void alloc_bulk(struct bpf_mem_cache *c, int cnt, int node, bool atomic) { struct mem_cgroup *memcg = NULL, *old_memcg; gfp_t gfp; void *obj; int i; gfp = __GFP_NOWARN | __GFP_ACCOUNT; gfp |= atomic ? GFP_NOWAIT : GFP_KERNEL; for (i = 0; i < cnt; i++) { /* * For every 'c' llist_del_first(&c->free_by_rcu_ttrace); is * done only by one CPU == current CPU. Other CPUs might * llist_add() and llist_del_all() in parallel. */ obj = llist_del_first(&c->free_by_rcu_ttrace); if (!obj) break; add_obj_to_free_list(c, obj); } if (i >= cnt) return; for (; i < cnt; i++) { obj = llist_del_first(&c->waiting_for_gp_ttrace); if (!obj) break; add_obj_to_free_list(c, obj); } if (i >= cnt) return; memcg = get_memcg(c); old_memcg = set_active_memcg(memcg); for (; i < cnt; i++) { /* Allocate, but don't deplete atomic reserves that typical * GFP_ATOMIC would do. irq_work runs on this cpu and kmalloc * will allocate from the current numa node which is what we * want here. */ obj = __alloc(c, node, gfp); if (!obj) break; add_obj_to_free_list(c, obj); } set_active_memcg(old_memcg); mem_cgroup_put(memcg); } static void free_one(void *obj, bool percpu) { if (percpu) { free_percpu(((void **)obj)[1]); kfree(obj); return; } kfree(obj); } static int free_all(struct llist_node *llnode, bool percpu) { struct llist_node *pos, *t; int cnt = 0; llist_for_each_safe(pos, t, llnode) { free_one(pos, percpu); cnt++; } return cnt; } static void __free_rcu(struct rcu_head *head) { struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu_ttrace); free_all(llist_del_all(&c->waiting_for_gp_ttrace), !!c->percpu_size); atomic_set(&c->call_rcu_ttrace_in_progress, 0); } static void __free_rcu_tasks_trace(struct rcu_head *head) { /* If RCU Tasks Trace grace period implies RCU grace period, * there is no need to invoke call_rcu(). */ if (rcu_trace_implies_rcu_gp()) __free_rcu(head); else call_rcu(head, __free_rcu); } static void enque_to_free(struct bpf_mem_cache *c, void *obj) { struct llist_node *llnode = obj; /* bpf_mem_cache is a per-cpu object. Freeing happens in irq_work. * Nothing races to add to free_by_rcu_ttrace list. */ llist_add(llnode, &c->free_by_rcu_ttrace); } static void do_call_rcu_ttrace(struct bpf_mem_cache *c) { struct llist_node *llnode, *t; if (atomic_xchg(&c->call_rcu_ttrace_in_progress, 1)) { if (unlikely(READ_ONCE(c->draining))) { llnode = llist_del_all(&c->free_by_rcu_ttrace); free_all(llnode, !!c->percpu_size); } return; } WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp_ttrace)); llist_for_each_safe(llnode, t, llist_del_all(&c->free_by_rcu_ttrace)) llist_add(llnode, &c->waiting_for_gp_ttrace); if (unlikely(READ_ONCE(c->draining))) { __free_rcu(&c->rcu_ttrace); return; } /* Use call_rcu_tasks_trace() to wait for sleepable progs to finish. * If RCU Tasks Trace grace period implies RCU grace period, free * these elements directly, else use call_rcu() to wait for normal * progs to finish and finally do free_one() on each element. */ call_rcu_tasks_trace(&c->rcu_ttrace, __free_rcu_tasks_trace); } static void free_bulk(struct bpf_mem_cache *c) { struct bpf_mem_cache *tgt = c->tgt; struct llist_node *llnode, *t; unsigned long flags; int cnt; WARN_ON_ONCE(tgt->unit_size != c->unit_size); WARN_ON_ONCE(tgt->percpu_size != c->percpu_size); do { inc_active(c, &flags); llnode = __llist_del_first(&c->free_llist); if (llnode) cnt = --c->free_cnt; else cnt = 0; dec_active(c, &flags); if (llnode) enque_to_free(tgt, llnode); } while (cnt > (c->high_watermark + c->low_watermark) / 2); /* and drain free_llist_extra */ llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra)) enque_to_free(tgt, llnode); do_call_rcu_ttrace(tgt); } static void __free_by_rcu(struct rcu_head *head) { struct bpf_mem_cache *c = container_of(head, struct bpf_mem_cache, rcu); struct bpf_mem_cache *tgt = c->tgt; struct llist_node *llnode; WARN_ON_ONCE(tgt->unit_size != c->unit_size); WARN_ON_ONCE(tgt->percpu_size != c->percpu_size); llnode = llist_del_all(&c->waiting_for_gp); if (!llnode) goto out; llist_add_batch(llnode, c->waiting_for_gp_tail, &tgt->free_by_rcu_ttrace); /* Objects went through regular RCU GP. Send them to RCU tasks trace */ do_call_rcu_ttrace(tgt); out: atomic_set(&c->call_rcu_in_progress, 0); } static void check_free_by_rcu(struct bpf_mem_cache *c) { struct llist_node *llnode, *t; unsigned long flags; /* drain free_llist_extra_rcu */ if (unlikely(!llist_empty(&c->free_llist_extra_rcu))) { inc_active(c, &flags); llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra_rcu)) if (__llist_add(llnode, &c->free_by_rcu)) c->free_by_rcu_tail = llnode; dec_active(c, &flags); } if (llist_empty(&c->free_by_rcu)) return; if (atomic_xchg(&c->call_rcu_in_progress, 1)) { /* * Instead of kmalloc-ing new rcu_head and triggering 10k * call_rcu() to hit rcutree.qhimark and force RCU to notice * the overload just ask RCU to hurry up. There could be many * objects in free_by_rcu list. * This hint reduces memory consumption for an artificial * benchmark from 2 Gbyte to 150 Mbyte. */ rcu_request_urgent_qs_task(current); return; } WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp)); inc_active(c, &flags); WRITE_ONCE(c->waiting_for_gp.first, __llist_del_all(&c->free_by_rcu)); c->waiting_for_gp_tail = c->free_by_rcu_tail; dec_active(c, &flags); if (unlikely(READ_ONCE(c->draining))) { free_all(llist_del_all(&c->waiting_for_gp), !!c->percpu_size); atomic_set(&c->call_rcu_in_progress, 0); } else { call_rcu_hurry(&c->rcu, __free_by_rcu); } } static void bpf_mem_refill(struct irq_work *work) { struct bpf_mem_cache *c = container_of(work, struct bpf_mem_cache, refill_work); int cnt; /* Racy access to free_cnt. It doesn't need to be 100% accurate */ cnt = c->free_cnt; if (cnt < c->low_watermark) /* irq_work runs on this cpu and kmalloc will allocate * from the current numa node which is what we want here. */ alloc_bulk(c, c->batch, NUMA_NO_NODE, true); else if (cnt > c->high_watermark) free_bulk(c); check_free_by_rcu(c); } static void notrace irq_work_raise(struct bpf_mem_cache *c) { irq_work_queue(&c->refill_work); } /* For typical bpf map case that uses bpf_mem_cache_alloc and single bucket * the freelist cache will be elem_size * 64 (or less) on each cpu. * * For bpf programs that don't have statically known allocation sizes and * assuming (low_mark + high_mark) / 2 as an average number of elements per * bucket and all buckets are used the total amount of memory in freelists * on each cpu will be: * 64*16 + 64*32 + 64*64 + 64*96 + 64*128 + 64*196 + 64*256 + 32*512 + 16*1024 + 8*2048 + 4*4096 * == ~ 116 Kbyte using below heuristic. * Initialized, but unused bpf allocator (not bpf map specific one) will * consume ~ 11 Kbyte per cpu. * Typical case will be between 11K and 116K closer to 11K. * bpf progs can and should share bpf_mem_cache when possible. * * Percpu allocation is typically rare. To avoid potential unnecessary large * memory consumption, set low_mark = 1 and high_mark = 3, resulting in c->batch = 1. */ static void init_refill_work(struct bpf_mem_cache *c) { init_irq_work(&c->refill_work, bpf_mem_refill); if (c->percpu_size) { c->low_watermark = 1; c->high_watermark = 3; } else if (c->unit_size <= 256) { c->low_watermark = 32; c->high_watermark = 96; } else { /* When page_size == 4k, order-0 cache will have low_mark == 2 * and high_mark == 6 with batch alloc of 3 individual pages at * a time. * 8k allocs and above low == 1, high == 3, batch == 1. */ c->low_watermark = max(32 * 256 / c->unit_size, 1); c->high_watermark = max(96 * 256 / c->unit_size, 3); } c->batch = max((c->high_watermark - c->low_watermark) / 4 * 3, 1); } static void prefill_mem_cache(struct bpf_mem_cache *c, int cpu) { int cnt = 1; /* To avoid consuming memory, for non-percpu allocation, assume that * 1st run of bpf prog won't be doing more than 4 map_update_elem from * irq disabled region if unit size is less than or equal to 256. * For all other cases, let us just do one allocation. */ if (!c->percpu_size && c->unit_size <= 256) cnt = 4; alloc_bulk(c, cnt, cpu_to_node(cpu), false); } /* When size != 0 bpf_mem_cache for each cpu. * This is typical bpf hash map use case when all elements have equal size. * * When size == 0 allocate 11 bpf_mem_cache-s for each cpu, then rely on * kmalloc/kfree. Max allocation size is 4096 in this case. * This is bpf_dynptr and bpf_kptr use case. */ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size, bool percpu) { struct bpf_mem_caches *cc, __percpu *pcc; struct bpf_mem_cache *c, __percpu *pc; struct obj_cgroup *objcg = NULL; int cpu, i, unit_size, percpu_size = 0; if (percpu && size == 0) return -EINVAL; /* room for llist_node and per-cpu pointer */ if (percpu) percpu_size = LLIST_NODE_SZ + sizeof(void *); ma->percpu = percpu; if (size) { pc = __alloc_percpu_gfp(sizeof(*pc), 8, GFP_KERNEL); if (!pc) return -ENOMEM; if (!percpu) size += LLIST_NODE_SZ; /* room for llist_node */ unit_size = size; #ifdef CONFIG_MEMCG_KMEM if (memcg_bpf_enabled()) objcg = get_obj_cgroup_from_current(); #endif ma->objcg = objcg; for_each_possible_cpu(cpu) { c = per_cpu_ptr(pc, cpu); c->unit_size = unit_size; c->objcg = objcg; c->percpu_size = percpu_size; c->tgt = c; init_refill_work(c); prefill_mem_cache(c, cpu); } ma->cache = pc; return 0; } pcc = __alloc_percpu_gfp(sizeof(*cc), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; #ifdef CONFIG_MEMCG_KMEM objcg = get_obj_cgroup_from_current(); #endif ma->objcg = objcg; for_each_possible_cpu(cpu) { cc = per_cpu_ptr(pcc, cpu); for (i = 0; i < NUM_CACHES; i++) { c = &cc->cache[i]; c->unit_size = sizes[i]; c->objcg = objcg; c->percpu_size = percpu_size; c->tgt = c; init_refill_work(c); prefill_mem_cache(c, cpu); } } ma->caches = pcc; return 0; } int bpf_mem_alloc_percpu_init(struct bpf_mem_alloc *ma, struct obj_cgroup *objcg) { struct bpf_mem_caches __percpu *pcc; pcc = __alloc_percpu_gfp(sizeof(struct bpf_mem_caches), 8, GFP_KERNEL); if (!pcc) return -ENOMEM; ma->caches = pcc; ma->objcg = objcg; ma->percpu = true; return 0; } int bpf_mem_alloc_percpu_unit_init(struct bpf_mem_alloc *ma, int size) { struct bpf_mem_caches *cc, __percpu *pcc; int cpu, i, unit_size, percpu_size; struct obj_cgroup *objcg; struct bpf_mem_cache *c; i = bpf_mem_cache_idx(size); if (i < 0) return -EINVAL; /* room for llist_node and per-cpu pointer */ percpu_size = LLIST_NODE_SZ + sizeof(void *); unit_size = sizes[i]; objcg = ma->objcg; pcc = ma->caches; for_each_possible_cpu(cpu) { cc = per_cpu_ptr(pcc, cpu); c = &cc->cache[i]; if (c->unit_size) break; c->unit_size = unit_size; c->objcg = objcg; c->percpu_size = percpu_size; c->tgt = c; init_refill_work(c); prefill_mem_cache(c, cpu); } return 0; } static void drain_mem_cache(struct bpf_mem_cache *c) { bool percpu = !!c->percpu_size; /* No progs are using this bpf_mem_cache, but htab_map_free() called * bpf_mem_cache_free() for all remaining elements and they can be in * free_by_rcu_ttrace or in waiting_for_gp_ttrace lists, so drain those lists now. * * Except for waiting_for_gp_ttrace list, there are no concurrent operations * on these lists, so it is safe to use __llist_del_all(). */ free_all(llist_del_all(&c->free_by_rcu_ttrace), percpu); free_all(llist_del_all(&c->waiting_for_gp_ttrace), percpu); free_all(__llist_del_all(&c->free_llist), percpu); free_all(__llist_del_all(&c->free_llist_extra), percpu); free_all(__llist_del_all(&c->free_by_rcu), percpu); free_all(__llist_del_all(&c->free_llist_extra_rcu), percpu); free_all(llist_del_all(&c->waiting_for_gp), percpu); } static void check_mem_cache(struct bpf_mem_cache *c) { WARN_ON_ONCE(!llist_empty(&c->free_by_rcu_ttrace)); WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp_ttrace)); WARN_ON_ONCE(!llist_empty(&c->free_llist)); WARN_ON_ONCE(!llist_empty(&c->free_llist_extra)); WARN_ON_ONCE(!llist_empty(&c->free_by_rcu)); WARN_ON_ONCE(!llist_empty(&c->free_llist_extra_rcu)); WARN_ON_ONCE(!llist_empty(&c->waiting_for_gp)); } static void check_leaked_objs(struct bpf_mem_alloc *ma) { struct bpf_mem_caches *cc; struct bpf_mem_cache *c; int cpu, i; if (ma->cache) { for_each_possible_cpu(cpu) { c = per_cpu_ptr(ma->cache, cpu); check_mem_cache(c); } } if (ma->caches) { for_each_possible_cpu(cpu) { cc = per_cpu_ptr(ma->caches, cpu); for (i = 0; i < NUM_CACHES; i++) { c = &cc->cache[i]; check_mem_cache(c); } } } } static void free_mem_alloc_no_barrier(struct bpf_mem_alloc *ma) { check_leaked_objs(ma); free_percpu(ma->cache); free_percpu(ma->caches); ma->cache = NULL; ma->caches = NULL; } static void free_mem_alloc(struct bpf_mem_alloc *ma) { /* waiting_for_gp[_ttrace] lists were drained, but RCU callbacks * might still execute. Wait for them. * * rcu_barrier_tasks_trace() doesn't imply synchronize_rcu_tasks_trace(), * but rcu_barrier_tasks_trace() and rcu_barrier() below are only used * to wait for the pending __free_rcu_tasks_trace() and __free_rcu(), * so if call_rcu(head, __free_rcu) is skipped due to * rcu_trace_implies_rcu_gp(), it will be OK to skip rcu_barrier() by * using rcu_trace_implies_rcu_gp() as well. */ rcu_barrier(); /* wait for __free_by_rcu */ rcu_barrier_tasks_trace(); /* wait for __free_rcu */ if (!rcu_trace_implies_rcu_gp()) rcu_barrier(); free_mem_alloc_no_barrier(ma); } static void free_mem_alloc_deferred(struct work_struct *work) { struct bpf_mem_alloc *ma = container_of(work, struct bpf_mem_alloc, work); free_mem_alloc(ma); kfree(ma); } static void destroy_mem_alloc(struct bpf_mem_alloc *ma, int rcu_in_progress) { struct bpf_mem_alloc *copy; if (!rcu_in_progress) { /* Fast path. No callbacks are pending, hence no need to do * rcu_barrier-s. */ free_mem_alloc_no_barrier(ma); return; } copy = kmemdup(ma, sizeof(*ma), GFP_KERNEL); if (!copy) { /* Slow path with inline barrier-s */ free_mem_alloc(ma); return; } /* Defer barriers into worker to let the rest of map memory to be freed */ memset(ma, 0, sizeof(*ma)); INIT_WORK(&copy->work, free_mem_alloc_deferred); queue_work(system_unbound_wq, &copy->work); } void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) { struct bpf_mem_caches *cc; struct bpf_mem_cache *c; int cpu, i, rcu_in_progress; if (ma->cache) { rcu_in_progress = 0; for_each_possible_cpu(cpu) { c = per_cpu_ptr(ma->cache, cpu); WRITE_ONCE(c->draining, true); irq_work_sync(&c->refill_work); drain_mem_cache(c); rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress); rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } obj_cgroup_put(ma->objcg); destroy_mem_alloc(ma, rcu_in_progress); } if (ma->caches) { rcu_in_progress = 0; for_each_possible_cpu(cpu) { cc = per_cpu_ptr(ma->caches, cpu); for (i = 0; i < NUM_CACHES; i++) { c = &cc->cache[i]; WRITE_ONCE(c->draining, true); irq_work_sync(&c->refill_work); drain_mem_cache(c); rcu_in_progress += atomic_read(&c->call_rcu_ttrace_in_progress); rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } } obj_cgroup_put(ma->objcg); destroy_mem_alloc(ma, rcu_in_progress); } } /* notrace is necessary here and in other functions to make sure * bpf programs cannot attach to them and cause llist corruptions. */ static void notrace *unit_alloc(struct bpf_mem_cache *c) { struct llist_node *llnode = NULL; unsigned long flags; int cnt = 0; /* Disable irqs to prevent the following race for majority of prog types: * prog_A * bpf_mem_alloc * preemption or irq -> prog_B * bpf_mem_alloc * * but prog_B could be a perf_event NMI prog. * Use per-cpu 'active' counter to order free_list access between * unit_alloc/unit_free/bpf_mem_refill. */ local_irq_save(flags); if (local_inc_return(&c->active) == 1) { llnode = __llist_del_first(&c->free_llist); if (llnode) { cnt = --c->free_cnt; *(struct bpf_mem_cache **)llnode = c; } } local_dec(&c->active); WARN_ON(cnt < 0); if (cnt < c->low_watermark) irq_work_raise(c); /* Enable IRQ after the enqueue of irq work completes, so irq work * will run after IRQ is enabled and free_llist may be refilled by * irq work before other task preempts current task. */ local_irq_restore(flags); return llnode; } /* Though 'ptr' object could have been allocated on a different cpu * add it to the free_llist of the current cpu. * Let kfree() logic deal with it when it's later called from irq_work. */ static void notrace unit_free(struct bpf_mem_cache *c, void *ptr) { struct llist_node *llnode = ptr - LLIST_NODE_SZ; unsigned long flags; int cnt = 0; BUILD_BUG_ON(LLIST_NODE_SZ > 8); /* * Remember bpf_mem_cache that allocated this object. * The hint is not accurate. */ c->tgt = *(struct bpf_mem_cache **)llnode; local_irq_save(flags); if (local_inc_return(&c->active) == 1) { __llist_add(llnode, &c->free_llist); cnt = ++c->free_cnt; } else { /* unit_free() cannot fail. Therefore add an object to atomic * llist. free_bulk() will drain it. Though free_llist_extra is * a per-cpu list we have to use atomic llist_add here, since * it also can be interrupted by bpf nmi prog that does another * unit_free() into the same free_llist_extra. */ llist_add(llnode, &c->free_llist_extra); } local_dec(&c->active); if (cnt > c->high_watermark) /* free few objects from current cpu into global kmalloc pool */ irq_work_raise(c); /* Enable IRQ after irq_work_raise() completes, otherwise when current * task is preempted by task which does unit_alloc(), unit_alloc() may * return NULL unexpectedly because irq work is already pending but can * not been triggered and free_llist can not be refilled timely. */ local_irq_restore(flags); } static void notrace unit_free_rcu(struct bpf_mem_cache *c, void *ptr) { struct llist_node *llnode = ptr - LLIST_NODE_SZ; unsigned long flags; c->tgt = *(struct bpf_mem_cache **)llnode; local_irq_save(flags); if (local_inc_return(&c->active) == 1) { if (__llist_add(llnode, &c->free_by_rcu)) c->free_by_rcu_tail = llnode; } else { llist_add(llnode, &c->free_llist_extra_rcu); } local_dec(&c->active); if (!atomic_read(&c->call_rcu_in_progress)) irq_work_raise(c); local_irq_restore(flags); } /* Called from BPF program or from sys_bpf syscall. * In both cases migration is disabled. */ void notrace *bpf_mem_alloc(struct bpf_mem_alloc *ma, size_t size) { int idx; void *ret; if (!size) return NULL; if (!ma->percpu) size += LLIST_NODE_SZ; idx = bpf_mem_cache_idx(size); if (idx < 0) return NULL; ret = unit_alloc(this_cpu_ptr(ma->caches)->cache + idx); return !ret ? NULL : ret + LLIST_NODE_SZ; } void notrace bpf_mem_free(struct bpf_mem_alloc *ma, void *ptr) { struct bpf_mem_cache *c; int idx; if (!ptr) return; c = *(void **)(ptr - LLIST_NODE_SZ); idx = bpf_mem_cache_idx(c->unit_size); if (WARN_ON_ONCE(idx < 0)) return; unit_free(this_cpu_ptr(ma->caches)->cache + idx, ptr); } void notrace bpf_mem_free_rcu(struct bpf_mem_alloc *ma, void *ptr) { struct bpf_mem_cache *c; int idx; if (!ptr) return; c = *(void **)(ptr - LLIST_NODE_SZ); idx = bpf_mem_cache_idx(c->unit_size); if (WARN_ON_ONCE(idx < 0)) return; unit_free_rcu(this_cpu_ptr(ma->caches)->cache + idx, ptr); } void notrace *bpf_mem_cache_alloc(struct bpf_mem_alloc *ma) { void *ret; ret = unit_alloc(this_cpu_ptr(ma->cache)); return !ret ? NULL : ret + LLIST_NODE_SZ; } void notrace bpf_mem_cache_free(struct bpf_mem_alloc *ma, void *ptr) { if (!ptr) return; unit_free(this_cpu_ptr(ma->cache), ptr); } void notrace bpf_mem_cache_free_rcu(struct bpf_mem_alloc *ma, void *ptr) { if (!ptr) return; unit_free_rcu(this_cpu_ptr(ma->cache), ptr); } /* Directly does a kfree() without putting 'ptr' back to the free_llist * for reuse and without waiting for a rcu_tasks_trace gp. * The caller must first go through the rcu_tasks_trace gp for 'ptr' * before calling bpf_mem_cache_raw_free(). * It could be used when the rcu_tasks_trace callback does not have * a hold on the original bpf_mem_alloc object that allocated the * 'ptr'. This should only be used in the uncommon code path. * Otherwise, the bpf_mem_alloc's free_llist cannot be refilled * and may affect performance. */ void bpf_mem_cache_raw_free(void *ptr) { if (!ptr) return; kfree(ptr - LLIST_NODE_SZ); } /* When flags == GFP_KERNEL, it signals that the caller will not cause * deadlock when using kmalloc. bpf_mem_cache_alloc_flags() will use * kmalloc if the free_llist is empty. */ void notrace *bpf_mem_cache_alloc_flags(struct bpf_mem_alloc *ma, gfp_t flags) { struct bpf_mem_cache *c; void *ret; c = this_cpu_ptr(ma->cache); ret = unit_alloc(c); if (!ret && flags == GFP_KERNEL) { struct mem_cgroup *memcg, *old_memcg; memcg = get_memcg(c); old_memcg = set_active_memcg(memcg); ret = __alloc(c, NUMA_NO_NODE, GFP_KERNEL | __GFP_NOWARN | __GFP_ACCOUNT); if (ret) *(struct bpf_mem_cache **)ret = c; set_active_memcg(old_memcg); mem_cgroup_put(memcg); } return !ret ? NULL : ret + LLIST_NODE_SZ; }
13 13 4 7 8 8 8 8 2 2 8 8 8 13 13 11 2 12 12 12 13 13 13 12 14 2 12 1 1 1 9 8 8 3 5 5 1 4 5 7 7 1 1 2 5 7 1 6 6 5 1 2 6 6 2 3 1 13 1 1 11 10 2 9 9 6 6 3 1 2 16 16 2 3 1 1 3 1 1 2 4 1 3 1 2 5 2 1 1 1 13 14 1 5 2 1 1 1 3 31 31 5 26 14 14 1 12 1 14 14 7 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 /* RFCOMM implementation for Linux Bluetooth stack (BlueZ). Copyright (C) 2002 Maxim Krasnyansky <maxk@qualcomm.com> Copyright (C) 2002 Marcel Holtmann <marcel@holtmann.org> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* * RFCOMM sockets. */ #include <linux/compat.h> #include <linux/export.h> #include <linux/debugfs.h> #include <linux/sched/signal.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> #include <net/bluetooth/rfcomm.h> static const struct proto_ops rfcomm_sock_ops; static struct bt_sock_list rfcomm_sk_list = { .lock = __RW_LOCK_UNLOCKED(rfcomm_sk_list.lock) }; static void rfcomm_sock_close(struct sock *sk); static void rfcomm_sock_kill(struct sock *sk); /* ---- DLC callbacks ---- * * called under rfcomm_dlc_lock() */ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb) { struct sock *sk = d->owner; if (!sk) return; atomic_add(skb->len, &sk->sk_rmem_alloc); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk); if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) rfcomm_dlc_throttle(d); } static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err) { struct sock *sk = d->owner, *parent; if (!sk) return; BT_DBG("dlc %p state %ld err %d", d, d->state, err); lock_sock(sk); if (err) sk->sk_err = err; sk->sk_state = d->state; parent = bt_sk(sk)->parent; if (parent) { if (d->state == BT_CLOSED) { sock_set_flag(sk, SOCK_ZAPPED); bt_accept_unlink(sk); } parent->sk_data_ready(parent); } else { if (d->state == BT_CONNECTED) rfcomm_session_getaddr(d->session, &rfcomm_pi(sk)->src, NULL); sk->sk_state_change(sk); } release_sock(sk); if (parent && sock_flag(sk, SOCK_ZAPPED)) { /* We have to drop DLC lock here, otherwise * rfcomm_sock_destruct() will dead lock. */ rfcomm_dlc_unlock(d); rfcomm_sock_kill(sk); rfcomm_dlc_lock(d); } } /* ---- Socket functions ---- */ static struct sock *__rfcomm_get_listen_sock_by_addr(u8 channel, bdaddr_t *src) { struct sock *sk = NULL; sk_for_each(sk, &rfcomm_sk_list.head) { if (rfcomm_pi(sk)->channel != channel) continue; if (bacmp(&rfcomm_pi(sk)->src, src)) continue; if (sk->sk_state == BT_BOUND || sk->sk_state == BT_LISTEN) break; } return sk ? sk : NULL; } /* Find socket with channel and source bdaddr. * Returns closest match. */ static struct sock *rfcomm_get_sock_by_channel(int state, u8 channel, bdaddr_t *src) { struct sock *sk = NULL, *sk1 = NULL; read_lock(&rfcomm_sk_list.lock); sk_for_each(sk, &rfcomm_sk_list.head) { if (state && sk->sk_state != state) continue; if (rfcomm_pi(sk)->channel == channel) { /* Exact match. */ if (!bacmp(&rfcomm_pi(sk)->src, src)) break; /* Closest match */ if (!bacmp(&rfcomm_pi(sk)->src, BDADDR_ANY)) sk1 = sk; } } read_unlock(&rfcomm_sk_list.lock); return sk ? sk : sk1; } static void rfcomm_sock_destruct(struct sock *sk) { struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; BT_DBG("sk %p dlc %p", sk, d); skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); rfcomm_dlc_lock(d); rfcomm_pi(sk)->dlc = NULL; /* Detach DLC if it's owned by this socket */ if (d->owner == sk) d->owner = NULL; rfcomm_dlc_unlock(d); rfcomm_dlc_put(d); } static void rfcomm_sock_cleanup_listen(struct sock *parent) { struct sock *sk; BT_DBG("parent %p", parent); /* Close not yet accepted dlcs */ while ((sk = bt_accept_dequeue(parent, NULL))) { rfcomm_sock_close(sk); rfcomm_sock_kill(sk); } parent->sk_state = BT_CLOSED; sock_set_flag(parent, SOCK_ZAPPED); } /* Kill socket (only if zapped and orphan) * Must be called on unlocked socket. */ static void rfcomm_sock_kill(struct sock *sk) { if (!sock_flag(sk, SOCK_ZAPPED) || sk->sk_socket) return; BT_DBG("sk %p state %d refcnt %d", sk, sk->sk_state, refcount_read(&sk->sk_refcnt)); /* Kill poor orphan */ bt_sock_unlink(&rfcomm_sk_list, sk); sock_set_flag(sk, SOCK_DEAD); sock_put(sk); } static void __rfcomm_sock_close(struct sock *sk) { struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; BT_DBG("sk %p state %d socket %p", sk, sk->sk_state, sk->sk_socket); switch (sk->sk_state) { case BT_LISTEN: rfcomm_sock_cleanup_listen(sk); break; case BT_CONNECT: case BT_CONNECT2: case BT_CONFIG: case BT_CONNECTED: rfcomm_dlc_close(d, 0); fallthrough; default: sock_set_flag(sk, SOCK_ZAPPED); break; } } /* Close socket. * Must be called on unlocked socket. */ static void rfcomm_sock_close(struct sock *sk) { lock_sock(sk); __rfcomm_sock_close(sk); release_sock(sk); } static void rfcomm_sock_init(struct sock *sk, struct sock *parent) { struct rfcomm_pinfo *pi = rfcomm_pi(sk); BT_DBG("sk %p", sk); if (parent) { sk->sk_type = parent->sk_type; pi->dlc->defer_setup = test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags); pi->sec_level = rfcomm_pi(parent)->sec_level; pi->role_switch = rfcomm_pi(parent)->role_switch; security_sk_clone(parent, sk); } else { pi->dlc->defer_setup = 0; pi->sec_level = BT_SECURITY_LOW; pi->role_switch = 0; } pi->dlc->sec_level = pi->sec_level; pi->dlc->role_switch = pi->role_switch; } static struct proto rfcomm_proto = { .name = "RFCOMM", .owner = THIS_MODULE, .obj_size = sizeof(struct rfcomm_pinfo) }; static struct sock *rfcomm_sock_alloc(struct net *net, struct socket *sock, int proto, gfp_t prio, int kern) { struct rfcomm_dlc *d; struct sock *sk; sk = bt_sock_alloc(net, sock, &rfcomm_proto, proto, prio, kern); if (!sk) return NULL; d = rfcomm_dlc_alloc(prio); if (!d) { sk_free(sk); return NULL; } d->data_ready = rfcomm_sk_data_ready; d->state_change = rfcomm_sk_state_change; rfcomm_pi(sk)->dlc = d; d->owner = sk; sk->sk_destruct = rfcomm_sock_destruct; sk->sk_sndtimeo = RFCOMM_CONN_TIMEOUT; sk->sk_sndbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; sk->sk_rcvbuf = RFCOMM_MAX_CREDITS * RFCOMM_DEFAULT_MTU * 10; bt_sock_link(&rfcomm_sk_list, sk); BT_DBG("sk %p", sk); return sk; } static int rfcomm_sock_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; BT_DBG("sock %p", sock); sock->state = SS_UNCONNECTED; if (sock->type != SOCK_STREAM && sock->type != SOCK_RAW) return -ESOCKTNOSUPPORT; sock->ops = &rfcomm_sock_ops; sk = rfcomm_sock_alloc(net, sock, protocol, GFP_ATOMIC, kern); if (!sk) return -ENOMEM; rfcomm_sock_init(sk, NULL); return 0; } static int rfcomm_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_len) { struct sockaddr_rc sa; struct sock *sk = sock->sk; int len, err = 0; if (!addr || addr_len < offsetofend(struct sockaddr, sa_family) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; memset(&sa, 0, sizeof(sa)); len = min_t(unsigned int, sizeof(sa), addr_len); memcpy(&sa, addr, len); BT_DBG("sk %p %pMR", sk, &sa.rc_bdaddr); lock_sock(sk); if (sk->sk_state != BT_OPEN) { err = -EBADFD; goto done; } if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; } write_lock(&rfcomm_sk_list.lock); if (sa.rc_channel && __rfcomm_get_listen_sock_by_addr(sa.rc_channel, &sa.rc_bdaddr)) { err = -EADDRINUSE; } else { /* Save source address */ bacpy(&rfcomm_pi(sk)->src, &sa.rc_bdaddr); rfcomm_pi(sk)->channel = sa.rc_channel; sk->sk_state = BT_BOUND; } write_unlock(&rfcomm_sk_list.lock); done: release_sock(sk); return err; } static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int alen, int flags) { struct sockaddr_rc *sa = (struct sockaddr_rc *) addr; struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; int err = 0; BT_DBG("sk %p", sk); if (alen < sizeof(struct sockaddr_rc) || addr->sa_family != AF_BLUETOOTH) return -EINVAL; sock_hold(sk); lock_sock(sk); if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) { err = -EBADFD; goto done; } if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; } sk->sk_state = BT_CONNECT; bacpy(&rfcomm_pi(sk)->dst, &sa->rc_bdaddr); rfcomm_pi(sk)->channel = sa->rc_channel; d->sec_level = rfcomm_pi(sk)->sec_level; d->role_switch = rfcomm_pi(sk)->role_switch; /* Drop sock lock to avoid potential deadlock with the RFCOMM lock */ release_sock(sk); err = rfcomm_dlc_open(d, &rfcomm_pi(sk)->src, &sa->rc_bdaddr, sa->rc_channel); lock_sock(sk); if (!err && !sock_flag(sk, SOCK_ZAPPED)) err = bt_sock_wait_state(sk, BT_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); done: release_sock(sk); sock_put(sk); return err; } static int rfcomm_sock_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; int err = 0; BT_DBG("sk %p backlog %d", sk, backlog); lock_sock(sk); if (sk->sk_state != BT_BOUND) { err = -EBADFD; goto done; } if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; } if (!rfcomm_pi(sk)->channel) { bdaddr_t *src = &rfcomm_pi(sk)->src; u8 channel; err = -EINVAL; write_lock(&rfcomm_sk_list.lock); for (channel = 1; channel < 31; channel++) if (!__rfcomm_get_listen_sock_by_addr(channel, src)) { rfcomm_pi(sk)->channel = channel; err = 0; break; } write_unlock(&rfcomm_sk_list.lock); if (err < 0) goto done; } sk->sk_max_ack_backlog = backlog; sk->sk_ack_backlog = 0; sk->sk_state = BT_LISTEN; done: release_sock(sk); return err; } static int rfcomm_sock_accept(struct socket *sock, struct socket *newsock, struct proto_accept_arg *arg) { DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk, *nsk; long timeo; int err = 0; lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; goto done; } timeo = sock_rcvtimeo(sk, arg->flags & O_NONBLOCK); BT_DBG("sk %p timeo %ld", sk, timeo); /* Wait for an incoming connection. (wake-one). */ add_wait_queue_exclusive(sk_sleep(sk), &wait); while (1) { if (sk->sk_state != BT_LISTEN) { err = -EBADFD; break; } nsk = bt_accept_dequeue(sk, newsock); if (nsk) break; if (!timeo) { err = -EAGAIN; break; } if (signal_pending(current)) { err = sock_intr_errno(timeo); break; } release_sock(sk); timeo = wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); lock_sock_nested(sk, SINGLE_DEPTH_NESTING); } remove_wait_queue(sk_sleep(sk), &wait); if (err) goto done; newsock->state = SS_CONNECTED; BT_DBG("new socket %p", nsk); done: release_sock(sk); return err; } static int rfcomm_sock_getname(struct socket *sock, struct sockaddr *addr, int peer) { struct sockaddr_rc *sa = (struct sockaddr_rc *) addr; struct sock *sk = sock->sk; BT_DBG("sock %p, sk %p", sock, sk); if (peer && sk->sk_state != BT_CONNECTED && sk->sk_state != BT_CONNECT && sk->sk_state != BT_CONNECT2) return -ENOTCONN; memset(sa, 0, sizeof(*sa)); sa->rc_family = AF_BLUETOOTH; sa->rc_channel = rfcomm_pi(sk)->channel; if (peer) bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->dst); else bacpy(&sa->rc_bdaddr, &rfcomm_pi(sk)->src); return sizeof(struct sockaddr_rc); } static int rfcomm_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; struct sk_buff *skb; int sent; if (test_bit(RFCOMM_DEFER_SETUP, &d->flags)) return -ENOTCONN; if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; if (sk->sk_shutdown & SEND_SHUTDOWN) return -EPIPE; BT_DBG("sock %p, sk %p", sock, sk); lock_sock(sk); sent = bt_sock_wait_ready(sk, msg->msg_flags); release_sock(sk); if (sent) return sent; skb = bt_skb_sendmmsg(sk, msg, len, d->mtu, RFCOMM_SKB_HEAD_RESERVE, RFCOMM_SKB_TAIL_RESERVE); if (IS_ERR(skb)) return PTR_ERR(skb); sent = rfcomm_dlc_send(d, skb); if (sent < 0) kfree_skb(skb); return sent; } static int rfcomm_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; struct rfcomm_dlc *d = rfcomm_pi(sk)->dlc; int len; if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); return 0; } len = bt_sock_stream_recvmsg(sock, msg, size, flags); lock_sock(sk); if (!(flags & MSG_PEEK) && len > 0) atomic_sub(len, &sk->sk_rmem_alloc); if (atomic_read(&sk->sk_rmem_alloc) <= (sk->sk_rcvbuf >> 2)) rfcomm_dlc_unthrottle(rfcomm_pi(sk)->dlc); release_sock(sk); return len; } static int rfcomm_sock_setsockopt_old(struct socket *sock, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; int err = 0; u32 opt; BT_DBG("sk %p", sk); lock_sock(sk); switch (optname) { case RFCOMM_LM: if (bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen)) { err = -EFAULT; break; } if (opt & RFCOMM_LM_FIPS) { err = -EINVAL; break; } if (opt & RFCOMM_LM_AUTH) rfcomm_pi(sk)->sec_level = BT_SECURITY_LOW; if (opt & RFCOMM_LM_ENCRYPT) rfcomm_pi(sk)->sec_level = BT_SECURITY_MEDIUM; if (opt & RFCOMM_LM_SECURE) rfcomm_pi(sk)->sec_level = BT_SECURITY_HIGH; rfcomm_pi(sk)->role_switch = (opt & RFCOMM_LM_MASTER); break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } static int rfcomm_sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct sock *sk = sock->sk; struct bt_security sec; int err = 0; u32 opt; BT_DBG("sk %p", sk); if (level == SOL_RFCOMM) return rfcomm_sock_setsockopt_old(sock, optname, optval, optlen); if (level != SOL_BLUETOOTH) return -ENOPROTOOPT; lock_sock(sk); switch (optname) { case BT_SECURITY: if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; break; } sec.level = BT_SECURITY_LOW; err = bt_copy_from_sockptr(&sec, sizeof(sec), optval, optlen); if (err) break; if (sec.level > BT_SECURITY_HIGH) { err = -EINVAL; break; } rfcomm_pi(sk)->sec_level = sec.level; break; case BT_DEFER_SETUP: if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { err = -EINVAL; break; } err = bt_copy_from_sockptr(&opt, sizeof(opt), optval, optlen); if (err) break; if (opt) set_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); else clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags); break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } static int rfcomm_sock_getsockopt_old(struct socket *sock, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct sock *l2cap_sk; struct l2cap_conn *conn; struct rfcomm_conninfo cinfo; int len, err = 0; u32 opt; BT_DBG("sk %p", sk); if (get_user(len, optlen)) return -EFAULT; lock_sock(sk); switch (optname) { case RFCOMM_LM: switch (rfcomm_pi(sk)->sec_level) { case BT_SECURITY_LOW: opt = RFCOMM_LM_AUTH; break; case BT_SECURITY_MEDIUM: opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT; break; case BT_SECURITY_HIGH: opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE; break; case BT_SECURITY_FIPS: opt = RFCOMM_LM_AUTH | RFCOMM_LM_ENCRYPT | RFCOMM_LM_SECURE | RFCOMM_LM_FIPS; break; default: opt = 0; break; } if (rfcomm_pi(sk)->role_switch) opt |= RFCOMM_LM_MASTER; if (put_user(opt, (u32 __user *) optval)) err = -EFAULT; break; case RFCOMM_CONNINFO: if (sk->sk_state != BT_CONNECTED && !rfcomm_pi(sk)->dlc->defer_setup) { err = -ENOTCONN; break; } l2cap_sk = rfcomm_pi(sk)->dlc->session->sock->sk; conn = l2cap_pi(l2cap_sk)->chan->conn; memset(&cinfo, 0, sizeof(cinfo)); cinfo.hci_handle = conn->hcon->handle; memcpy(cinfo.dev_class, conn->hcon->dev_class, 3); len = min_t(unsigned int, len, sizeof(cinfo)); if (copy_to_user(optval, (char *) &cinfo, len)) err = -EFAULT; break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } static int rfcomm_sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen) { struct sock *sk = sock->sk; struct bt_security sec; int len, err = 0; BT_DBG("sk %p", sk); if (level == SOL_RFCOMM) return rfcomm_sock_getsockopt_old(sock, optname, optval, optlen); if (level != SOL_BLUETOOTH) return -ENOPROTOOPT; if (get_user(len, optlen)) return -EFAULT; lock_sock(sk); switch (optname) { case BT_SECURITY: if (sk->sk_type != SOCK_STREAM) { err = -EINVAL; break; } sec.level = rfcomm_pi(sk)->sec_level; sec.key_size = 0; len = min_t(unsigned int, len, sizeof(sec)); if (copy_to_user(optval, (char *) &sec, len)) err = -EFAULT; break; case BT_DEFER_SETUP: if (sk->sk_state != BT_BOUND && sk->sk_state != BT_LISTEN) { err = -EINVAL; break; } if (put_user(test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags), (u32 __user *) optval)) err = -EFAULT; break; default: err = -ENOPROTOOPT; break; } release_sock(sk); return err; } static int rfcomm_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk __maybe_unused = sock->sk; int err; BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); err = bt_sock_ioctl(sock, cmd, arg); if (err == -ENOIOCTLCMD) { #ifdef CONFIG_BT_RFCOMM_TTY lock_sock(sk); err = rfcomm_dev_ioctl(sk, cmd, (void __user *) arg); release_sock(sk); #else err = -EOPNOTSUPP; #endif } return err; } #ifdef CONFIG_COMPAT static int rfcomm_sock_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { return rfcomm_sock_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); } #endif static int rfcomm_sock_shutdown(struct socket *sock, int how) { struct sock *sk = sock->sk; int err = 0; BT_DBG("sock %p, sk %p", sock, sk); if (!sk) return 0; lock_sock(sk); if (!sk->sk_shutdown) { sk->sk_shutdown = SHUTDOWN_MASK; release_sock(sk); __rfcomm_sock_close(sk); lock_sock(sk); if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && !(current->flags & PF_EXITING)) err = bt_sock_wait_state(sk, BT_CLOSED, sk->sk_lingertime); } release_sock(sk); return err; } static int rfcomm_sock_release(struct socket *sock) { struct sock *sk = sock->sk; int err; BT_DBG("sock %p, sk %p", sock, sk); if (!sk) return 0; err = rfcomm_sock_shutdown(sock, 2); sock_orphan(sk); rfcomm_sock_kill(sk); return err; } /* ---- RFCOMM core layer callbacks ---- * * called under rfcomm_lock() */ int rfcomm_connect_ind(struct rfcomm_session *s, u8 channel, struct rfcomm_dlc **d) { struct sock *sk, *parent; bdaddr_t src, dst; int result = 0; BT_DBG("session %p channel %d", s, channel); rfcomm_session_getaddr(s, &src, &dst); /* Check if we have socket listening on channel */ parent = rfcomm_get_sock_by_channel(BT_LISTEN, channel, &src); if (!parent) return 0; lock_sock(parent); /* Check for backlog size */ if (sk_acceptq_is_full(parent)) { BT_DBG("backlog full %d", parent->sk_ack_backlog); goto done; } sk = rfcomm_sock_alloc(sock_net(parent), NULL, BTPROTO_RFCOMM, GFP_ATOMIC, 0); if (!sk) goto done; bt_sock_reclassify_lock(sk, BTPROTO_RFCOMM); rfcomm_sock_init(sk, parent); bacpy(&rfcomm_pi(sk)->src, &src); bacpy(&rfcomm_pi(sk)->dst, &dst); rfcomm_pi(sk)->channel = channel; sk->sk_state = BT_CONFIG; bt_accept_enqueue(parent, sk, true); /* Accept connection and return socket DLC */ *d = rfcomm_pi(sk)->dlc; result = 1; done: release_sock(parent); if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) parent->sk_state_change(parent); return result; } static int rfcomm_sock_debugfs_show(struct seq_file *f, void *p) { struct sock *sk; read_lock(&rfcomm_sk_list.lock); sk_for_each(sk, &rfcomm_sk_list.head) { seq_printf(f, "%pMR %pMR %d %d\n", &rfcomm_pi(sk)->src, &rfcomm_pi(sk)->dst, sk->sk_state, rfcomm_pi(sk)->channel); } read_unlock(&rfcomm_sk_list.lock); return 0; } DEFINE_SHOW_ATTRIBUTE(rfcomm_sock_debugfs); static struct dentry *rfcomm_sock_debugfs; static const struct proto_ops rfcomm_sock_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .release = rfcomm_sock_release, .bind = rfcomm_sock_bind, .connect = rfcomm_sock_connect, .listen = rfcomm_sock_listen, .accept = rfcomm_sock_accept, .getname = rfcomm_sock_getname, .sendmsg = rfcomm_sock_sendmsg, .recvmsg = rfcomm_sock_recvmsg, .shutdown = rfcomm_sock_shutdown, .setsockopt = rfcomm_sock_setsockopt, .getsockopt = rfcomm_sock_getsockopt, .ioctl = rfcomm_sock_ioctl, .gettstamp = sock_gettstamp, .poll = bt_sock_poll, .socketpair = sock_no_socketpair, .mmap = sock_no_mmap, #ifdef CONFIG_COMPAT .compat_ioctl = rfcomm_sock_compat_ioctl, #endif }; static const struct net_proto_family rfcomm_sock_family_ops = { .family = PF_BLUETOOTH, .owner = THIS_MODULE, .create = rfcomm_sock_create }; int __init rfcomm_init_sockets(void) { int err; BUILD_BUG_ON(sizeof(struct sockaddr_rc) > sizeof(struct sockaddr)); err = proto_register(&rfcomm_proto, 0); if (err < 0) return err; err = bt_sock_register(BTPROTO_RFCOMM, &rfcomm_sock_family_ops); if (err < 0) { BT_ERR("RFCOMM socket layer registration failed"); goto error; } err = bt_procfs_init(&init_net, "rfcomm", &rfcomm_sk_list, NULL); if (err < 0) { BT_ERR("Failed to create RFCOMM proc file"); bt_sock_unregister(BTPROTO_RFCOMM); goto error; } BT_INFO("RFCOMM socket layer initialized"); if (IS_ERR_OR_NULL(bt_debugfs)) return 0; rfcomm_sock_debugfs = debugfs_create_file("rfcomm", 0444, bt_debugfs, NULL, &rfcomm_sock_debugfs_fops); return 0; error: proto_unregister(&rfcomm_proto); return err; } void __exit rfcomm_cleanup_sockets(void) { bt_procfs_cleanup(&init_net, "rfcomm"); debugfs_remove(rfcomm_sock_debugfs); bt_sock_unregister(BTPROTO_RFCOMM); proto_unregister(&rfcomm_proto); }
1 1 1 1 1 1 1 2 1 1 1 1 1 4 4 3 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 27 27 1 1 1 1 1 1 2 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 // SPDX-License-Identifier: GPL-2.0 /* * net/tipc/crypto.c: TIPC crypto for key handling & packet en/decryption * * Copyright (c) 2019, Ericsson AB * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the names of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * Alternatively, this software may be distributed under the terms of the * GNU General Public License ("GPL") version 2 as published by the Free * Software Foundation. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include <crypto/aead.h> #include <crypto/aes.h> #include <crypto/rng.h> #include "crypto.h" #include "msg.h" #include "bcast.h" #define TIPC_TX_GRACE_PERIOD msecs_to_jiffies(5000) /* 5s */ #define TIPC_TX_LASTING_TIME msecs_to_jiffies(10000) /* 10s */ #define TIPC_RX_ACTIVE_LIM msecs_to_jiffies(3000) /* 3s */ #define TIPC_RX_PASSIVE_LIM msecs_to_jiffies(15000) /* 15s */ #define TIPC_MAX_TFMS_DEF 10 #define TIPC_MAX_TFMS_LIM 1000 #define TIPC_REKEYING_INTV_DEF (60 * 24) /* default: 1 day */ /* * TIPC Key ids */ enum { KEY_MASTER = 0, KEY_MIN = KEY_MASTER, KEY_1 = 1, KEY_2, KEY_3, KEY_MAX = KEY_3, }; /* * TIPC Crypto statistics */ enum { STAT_OK, STAT_NOK, STAT_ASYNC, STAT_ASYNC_OK, STAT_ASYNC_NOK, STAT_BADKEYS, /* tx only */ STAT_BADMSGS = STAT_BADKEYS, /* rx only */ STAT_NOKEYS, STAT_SWITCHES, MAX_STATS, }; /* TIPC crypto statistics' header */ static const char *hstats[MAX_STATS] = {"ok", "nok", "async", "async_ok", "async_nok", "badmsgs", "nokeys", "switches"}; /* Max TFMs number per key */ int sysctl_tipc_max_tfms __read_mostly = TIPC_MAX_TFMS_DEF; /* Key exchange switch, default: on */ int sysctl_tipc_key_exchange_enabled __read_mostly = 1; /* * struct tipc_key - TIPC keys' status indicator * * 7 6 5 4 3 2 1 0 * +-----+-----+-----+-----+-----+-----+-----+-----+ * key: | (reserved)|passive idx| active idx|pending idx| * +-----+-----+-----+-----+-----+-----+-----+-----+ */ struct tipc_key { #define KEY_BITS (2) #define KEY_MASK ((1 << KEY_BITS) - 1) union { struct { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 pending:2, active:2, passive:2, /* rx only */ reserved:2; #elif defined(__BIG_ENDIAN_BITFIELD) u8 reserved:2, passive:2, /* rx only */ active:2, pending:2; #else #error "Please fix <asm/byteorder.h>" #endif } __packed; u8 keys; }; }; /** * struct tipc_tfm - TIPC TFM structure to form a list of TFMs * @tfm: cipher handle/key * @list: linked list of TFMs */ struct tipc_tfm { struct crypto_aead *tfm; struct list_head list; }; /** * struct tipc_aead - TIPC AEAD key structure * @tfm_entry: per-cpu pointer to one entry in TFM list * @crypto: TIPC crypto owns this key * @cloned: reference to the source key in case cloning * @users: the number of the key users (TX/RX) * @salt: the key's SALT value * @authsize: authentication tag size (max = 16) * @mode: crypto mode is applied to the key * @hint: a hint for user key * @rcu: struct rcu_head * @key: the aead key * @gen: the key's generation * @seqno: the key seqno (cluster scope) * @refcnt: the key reference counter */ struct tipc_aead { #define TIPC_AEAD_HINT_LEN (5) struct tipc_tfm * __percpu *tfm_entry; struct tipc_crypto *crypto; struct tipc_aead *cloned; atomic_t users; u32 salt; u8 authsize; u8 mode; char hint[2 * TIPC_AEAD_HINT_LEN + 1]; struct rcu_head rcu; struct tipc_aead_key *key; u16 gen; atomic64_t seqno ____cacheline_aligned; refcount_t refcnt ____cacheline_aligned; } ____cacheline_aligned; /** * struct tipc_crypto_stats - TIPC Crypto statistics * @stat: array of crypto statistics */ struct tipc_crypto_stats { unsigned int stat[MAX_STATS]; }; /** * struct tipc_crypto - TIPC TX/RX crypto structure * @net: struct net * @node: TIPC node (RX) * @aead: array of pointers to AEAD keys for encryption/decryption * @peer_rx_active: replicated peer RX active key index * @key_gen: TX/RX key generation * @key: the key states * @skey_mode: session key's mode * @skey: received session key * @wq: common workqueue on TX crypto * @work: delayed work sched for TX/RX * @key_distr: key distributing state * @rekeying_intv: rekeying interval (in minutes) * @stats: the crypto statistics * @name: the crypto name * @sndnxt: the per-peer sndnxt (TX) * @timer1: general timer 1 (jiffies) * @timer2: general timer 2 (jiffies) * @working: the crypto is working or not * @key_master: flag indicates if master key exists * @legacy_user: flag indicates if a peer joins w/o master key (for bwd comp.) * @nokey: no key indication * @flags: combined flags field * @lock: tipc_key lock */ struct tipc_crypto { struct net *net; struct tipc_node *node; struct tipc_aead __rcu *aead[KEY_MAX + 1]; atomic_t peer_rx_active; u16 key_gen; struct tipc_key key; u8 skey_mode; struct tipc_aead_key *skey; struct workqueue_struct *wq; struct delayed_work work; #define KEY_DISTR_SCHED 1 #define KEY_DISTR_COMPL 2 atomic_t key_distr; u32 rekeying_intv; struct tipc_crypto_stats __percpu *stats; char name[48]; atomic64_t sndnxt ____cacheline_aligned; unsigned long timer1; unsigned long timer2; union { struct { u8 working:1; u8 key_master:1; u8 legacy_user:1; u8 nokey: 1; }; u8 flags; }; spinlock_t lock; /* crypto lock */ } ____cacheline_aligned; /* struct tipc_crypto_tx_ctx - TX context for callbacks */ struct tipc_crypto_tx_ctx { struct tipc_aead *aead; struct tipc_bearer *bearer; struct tipc_media_addr dst; }; /* struct tipc_crypto_rx_ctx - RX context for callbacks */ struct tipc_crypto_rx_ctx { struct tipc_aead *aead; struct tipc_bearer *bearer; }; static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead); static inline void tipc_aead_put(struct tipc_aead *aead); static void tipc_aead_free(struct rcu_head *rp); static int tipc_aead_users(struct tipc_aead __rcu *aead); static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim); static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim); static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val); static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead); static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, u8 mode); static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src); static void *tipc_aead_mem_alloc(struct crypto_aead *tfm, unsigned int crypto_ctx_size, u8 **iv, struct aead_request **req, struct scatterlist **sg, int nsg); static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, struct tipc_bearer *b, struct tipc_media_addr *dst, struct tipc_node *__dnode); static void tipc_aead_encrypt_done(void *data, int err); static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, struct sk_buff *skb, struct tipc_bearer *b); static void tipc_aead_decrypt_done(void *data, int err); static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr); static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead, u8 tx_key, struct sk_buff *skb, struct tipc_crypto *__rx); static inline void tipc_crypto_key_set_state(struct tipc_crypto *c, u8 new_passive, u8 new_active, u8 new_pending); static int tipc_crypto_key_attach(struct tipc_crypto *c, struct tipc_aead *aead, u8 pos, bool master_key); static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending); static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx, struct tipc_crypto *rx, struct sk_buff *skb, u8 tx_key); static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb); static int tipc_crypto_key_revoke(struct net *net, u8 tx_key); static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb, struct tipc_bearer *b, struct tipc_media_addr *dst, struct tipc_node *__dnode, u8 type); static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead, struct tipc_bearer *b, struct sk_buff **skb, int err); static void tipc_crypto_do_cmd(struct net *net, int cmd); static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf); static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new, char *buf); static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey, u16 gen, u8 mode, u32 dnode); static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr); static void tipc_crypto_work_tx(struct work_struct *work); static void tipc_crypto_work_rx(struct work_struct *work); static int tipc_aead_key_generate(struct tipc_aead_key *skey); #define is_tx(crypto) (!(crypto)->node) #define is_rx(crypto) (!is_tx(crypto)) #define key_next(cur) ((cur) % KEY_MAX + 1) #define tipc_aead_rcu_ptr(rcu_ptr, lock) \ rcu_dereference_protected((rcu_ptr), lockdep_is_held(lock)) #define tipc_aead_rcu_replace(rcu_ptr, ptr, lock) \ do { \ struct tipc_aead *__tmp = rcu_dereference_protected((rcu_ptr), \ lockdep_is_held(lock)); \ rcu_assign_pointer((rcu_ptr), (ptr)); \ tipc_aead_put(__tmp); \ } while (0) #define tipc_crypto_key_detach(rcu_ptr, lock) \ tipc_aead_rcu_replace((rcu_ptr), NULL, lock) /** * tipc_aead_key_validate - Validate a AEAD user key * @ukey: pointer to user key data * @info: netlink info pointer */ int tipc_aead_key_validate(struct tipc_aead_key *ukey, struct genl_info *info) { int keylen; /* Check if algorithm exists */ if (unlikely(!crypto_has_alg(ukey->alg_name, 0, 0))) { GENL_SET_ERR_MSG(info, "unable to load the algorithm (module existed?)"); return -ENODEV; } /* Currently, we only support the "gcm(aes)" cipher algorithm */ if (strcmp(ukey->alg_name, "gcm(aes)")) { GENL_SET_ERR_MSG(info, "not supported yet the algorithm"); return -ENOTSUPP; } /* Check if key size is correct */ keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE; if (unlikely(keylen != TIPC_AES_GCM_KEY_SIZE_128 && keylen != TIPC_AES_GCM_KEY_SIZE_192 && keylen != TIPC_AES_GCM_KEY_SIZE_256)) { GENL_SET_ERR_MSG(info, "incorrect key length (20, 28 or 36 octets?)"); return -EKEYREJECTED; } return 0; } /** * tipc_aead_key_generate - Generate new session key * @skey: input/output key with new content * * Return: 0 in case of success, otherwise < 0 */ static int tipc_aead_key_generate(struct tipc_aead_key *skey) { int rc = 0; /* Fill the key's content with a random value via RNG cipher */ rc = crypto_get_default_rng(); if (likely(!rc)) { rc = crypto_rng_get_bytes(crypto_default_rng, skey->key, skey->keylen); crypto_put_default_rng(); } return rc; } static struct tipc_aead *tipc_aead_get(struct tipc_aead __rcu *aead) { struct tipc_aead *tmp; rcu_read_lock(); tmp = rcu_dereference(aead); if (unlikely(!tmp || !refcount_inc_not_zero(&tmp->refcnt))) tmp = NULL; rcu_read_unlock(); return tmp; } static inline void tipc_aead_put(struct tipc_aead *aead) { if (aead && refcount_dec_and_test(&aead->refcnt)) call_rcu(&aead->rcu, tipc_aead_free); } /** * tipc_aead_free - Release AEAD key incl. all the TFMs in the list * @rp: rcu head pointer */ static void tipc_aead_free(struct rcu_head *rp) { struct tipc_aead *aead = container_of(rp, struct tipc_aead, rcu); struct tipc_tfm *tfm_entry, *head, *tmp; if (aead->cloned) { tipc_aead_put(aead->cloned); } else { head = *get_cpu_ptr(aead->tfm_entry); put_cpu_ptr(aead->tfm_entry); list_for_each_entry_safe(tfm_entry, tmp, &head->list, list) { crypto_free_aead(tfm_entry->tfm); list_del(&tfm_entry->list); kfree(tfm_entry); } /* Free the head */ crypto_free_aead(head->tfm); list_del(&head->list); kfree(head); } free_percpu(aead->tfm_entry); kfree_sensitive(aead->key); kfree(aead); } static int tipc_aead_users(struct tipc_aead __rcu *aead) { struct tipc_aead *tmp; int users = 0; rcu_read_lock(); tmp = rcu_dereference(aead); if (tmp) users = atomic_read(&tmp->users); rcu_read_unlock(); return users; } static void tipc_aead_users_inc(struct tipc_aead __rcu *aead, int lim) { struct tipc_aead *tmp; rcu_read_lock(); tmp = rcu_dereference(aead); if (tmp) atomic_add_unless(&tmp->users, 1, lim); rcu_read_unlock(); } static void tipc_aead_users_dec(struct tipc_aead __rcu *aead, int lim) { struct tipc_aead *tmp; rcu_read_lock(); tmp = rcu_dereference(aead); if (tmp) atomic_add_unless(&rcu_dereference(aead)->users, -1, lim); rcu_read_unlock(); } static void tipc_aead_users_set(struct tipc_aead __rcu *aead, int val) { struct tipc_aead *tmp; int cur; rcu_read_lock(); tmp = rcu_dereference(aead); if (tmp) { do { cur = atomic_read(&tmp->users); if (cur == val) break; } while (atomic_cmpxchg(&tmp->users, cur, val) != cur); } rcu_read_unlock(); } /** * tipc_aead_tfm_next - Move TFM entry to the next one in list and return it * @aead: the AEAD key pointer */ static struct crypto_aead *tipc_aead_tfm_next(struct tipc_aead *aead) { struct tipc_tfm **tfm_entry; struct crypto_aead *tfm; tfm_entry = get_cpu_ptr(aead->tfm_entry); *tfm_entry = list_next_entry(*tfm_entry, list); tfm = (*tfm_entry)->tfm; put_cpu_ptr(tfm_entry); return tfm; } /** * tipc_aead_init - Initiate TIPC AEAD * @aead: returned new TIPC AEAD key handle pointer * @ukey: pointer to user key data * @mode: the key mode * * Allocate a (list of) new cipher transformation (TFM) with the specific user * key data if valid. The number of the allocated TFMs can be set via the sysfs * "net/tipc/max_tfms" first. * Also, all the other AEAD data are also initialized. * * Return: 0 if the initiation is successful, otherwise: < 0 */ static int tipc_aead_init(struct tipc_aead **aead, struct tipc_aead_key *ukey, u8 mode) { struct tipc_tfm *tfm_entry, *head; struct crypto_aead *tfm; struct tipc_aead *tmp; int keylen, err, cpu; int tfm_cnt = 0; if (unlikely(*aead)) return -EEXIST; /* Allocate a new AEAD */ tmp = kzalloc(sizeof(*tmp), GFP_ATOMIC); if (unlikely(!tmp)) return -ENOMEM; /* The key consists of two parts: [AES-KEY][SALT] */ keylen = ukey->keylen - TIPC_AES_GCM_SALT_SIZE; /* Allocate per-cpu TFM entry pointer */ tmp->tfm_entry = alloc_percpu(struct tipc_tfm *); if (!tmp->tfm_entry) { kfree_sensitive(tmp); return -ENOMEM; } /* Make a list of TFMs with the user key data */ do { tfm = crypto_alloc_aead(ukey->alg_name, 0, 0); if (IS_ERR(tfm)) { err = PTR_ERR(tfm); break; } if (unlikely(!tfm_cnt && crypto_aead_ivsize(tfm) != TIPC_AES_GCM_IV_SIZE)) { crypto_free_aead(tfm); err = -ENOTSUPP; break; } err = crypto_aead_setauthsize(tfm, TIPC_AES_GCM_TAG_SIZE); err |= crypto_aead_setkey(tfm, ukey->key, keylen); if (unlikely(err)) { crypto_free_aead(tfm); break; } tfm_entry = kmalloc(sizeof(*tfm_entry), GFP_KERNEL); if (unlikely(!tfm_entry)) { crypto_free_aead(tfm); err = -ENOMEM; break; } INIT_LIST_HEAD(&tfm_entry->list); tfm_entry->tfm = tfm; /* First entry? */ if (!tfm_cnt) { head = tfm_entry; for_each_possible_cpu(cpu) { *per_cpu_ptr(tmp->tfm_entry, cpu) = head; } } else { list_add_tail(&tfm_entry->list, &head->list); } } while (++tfm_cnt < sysctl_tipc_max_tfms); /* Not any TFM is allocated? */ if (!tfm_cnt) { free_percpu(tmp->tfm_entry); kfree_sensitive(tmp); return err; } /* Form a hex string of some last bytes as the key's hint */ bin2hex(tmp->hint, ukey->key + keylen - TIPC_AEAD_HINT_LEN, TIPC_AEAD_HINT_LEN); /* Initialize the other data */ tmp->mode = mode; tmp->cloned = NULL; tmp->authsize = TIPC_AES_GCM_TAG_SIZE; tmp->key = kmemdup(ukey, tipc_aead_key_size(ukey), GFP_KERNEL); if (!tmp->key) { tipc_aead_free(&tmp->rcu); return -ENOMEM; } memcpy(&tmp->salt, ukey->key + keylen, TIPC_AES_GCM_SALT_SIZE); atomic_set(&tmp->users, 0); atomic64_set(&tmp->seqno, 0); refcount_set(&tmp->refcnt, 1); *aead = tmp; return 0; } /** * tipc_aead_clone - Clone a TIPC AEAD key * @dst: dest key for the cloning * @src: source key to clone from * * Make a "copy" of the source AEAD key data to the dest, the TFMs list is * common for the keys. * A reference to the source is hold in the "cloned" pointer for the later * freeing purposes. * * Note: this must be done in cluster-key mode only! * Return: 0 in case of success, otherwise < 0 */ static int tipc_aead_clone(struct tipc_aead **dst, struct tipc_aead *src) { struct tipc_aead *aead; int cpu; if (!src) return -ENOKEY; if (src->mode != CLUSTER_KEY) return -EINVAL; if (unlikely(*dst)) return -EEXIST; aead = kzalloc(sizeof(*aead), GFP_ATOMIC); if (unlikely(!aead)) return -ENOMEM; aead->tfm_entry = alloc_percpu_gfp(struct tipc_tfm *, GFP_ATOMIC); if (unlikely(!aead->tfm_entry)) { kfree_sensitive(aead); return -ENOMEM; } for_each_possible_cpu(cpu) { *per_cpu_ptr(aead->tfm_entry, cpu) = *per_cpu_ptr(src->tfm_entry, cpu); } memcpy(aead->hint, src->hint, sizeof(src->hint)); aead->mode = src->mode; aead->salt = src->salt; aead->authsize = src->authsize; atomic_set(&aead->users, 0); atomic64_set(&aead->seqno, 0); refcount_set(&aead->refcnt, 1); WARN_ON(!refcount_inc_not_zero(&src->refcnt)); aead->cloned = src; *dst = aead; return 0; } /** * tipc_aead_mem_alloc - Allocate memory for AEAD request operations * @tfm: cipher handle to be registered with the request * @crypto_ctx_size: size of crypto context for callback * @iv: returned pointer to IV data * @req: returned pointer to AEAD request data * @sg: returned pointer to SG lists * @nsg: number of SG lists to be allocated * * Allocate memory to store the crypto context data, AEAD request, IV and SG * lists, the memory layout is as follows: * crypto_ctx || iv || aead_req || sg[] * * Return: the pointer to the memory areas in case of success, otherwise NULL */ static void *tipc_aead_mem_alloc(struct crypto_aead *tfm, unsigned int crypto_ctx_size, u8 **iv, struct aead_request **req, struct scatterlist **sg, int nsg) { unsigned int iv_size, req_size; unsigned int len; u8 *mem; iv_size = crypto_aead_ivsize(tfm); req_size = sizeof(**req) + crypto_aead_reqsize(tfm); len = crypto_ctx_size; len += iv_size; len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1); len = ALIGN(len, crypto_tfm_ctx_alignment()); len += req_size; len = ALIGN(len, __alignof__(struct scatterlist)); len += nsg * sizeof(**sg); mem = kmalloc(len, GFP_ATOMIC); if (!mem) return NULL; *iv = (u8 *)PTR_ALIGN(mem + crypto_ctx_size, crypto_aead_alignmask(tfm) + 1); *req = (struct aead_request *)PTR_ALIGN(*iv + iv_size, crypto_tfm_ctx_alignment()); *sg = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size, __alignof__(struct scatterlist)); return (void *)mem; } /** * tipc_aead_encrypt - Encrypt a message * @aead: TIPC AEAD key for the message encryption * @skb: the input/output skb * @b: TIPC bearer where the message will be delivered after the encryption * @dst: the destination media address * @__dnode: TIPC dest node if "known" * * Return: * * 0 : if the encryption has completed * * -EINPROGRESS/-EBUSY : if a callback will be performed * * < 0 : the encryption has failed */ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb, struct tipc_bearer *b, struct tipc_media_addr *dst, struct tipc_node *__dnode) { struct crypto_aead *tfm = tipc_aead_tfm_next(aead); struct tipc_crypto_tx_ctx *tx_ctx; struct aead_request *req; struct sk_buff *trailer; struct scatterlist *sg; struct tipc_ehdr *ehdr; int ehsz, len, tailen, nsg, rc; void *ctx; u32 salt; u8 *iv; /* Make sure message len at least 4-byte aligned */ len = ALIGN(skb->len, 4); tailen = len - skb->len + aead->authsize; /* Expand skb tail for authentication tag: * As for simplicity, we'd have made sure skb having enough tailroom * for authentication tag @skb allocation. Even when skb is nonlinear * but there is no frag_list, it should be still fine! * Otherwise, we must cow it to be a writable buffer with the tailroom. */ SKB_LINEAR_ASSERT(skb); if (tailen > skb_tailroom(skb)) { pr_debug("TX(): skb tailroom is not enough: %d, requires: %d\n", skb_tailroom(skb), tailen); } nsg = skb_cow_data(skb, tailen, &trailer); if (unlikely(nsg < 0)) { pr_err("TX: skb_cow_data() returned %d\n", nsg); return nsg; } pskb_put(skb, trailer, tailen); /* Allocate memory for the AEAD operation */ ctx = tipc_aead_mem_alloc(tfm, sizeof(*tx_ctx), &iv, &req, &sg, nsg); if (unlikely(!ctx)) return -ENOMEM; TIPC_SKB_CB(skb)->crypto_ctx = ctx; /* Map skb to the sg lists */ sg_init_table(sg, nsg); rc = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(rc < 0)) { pr_err("TX: skb_to_sgvec() returned %d, nsg %d!\n", rc, nsg); goto exit; } /* Prepare IV: [SALT (4 octets)][SEQNO (8 octets)] * In case we're in cluster-key mode, SALT is varied by xor-ing with * the source address (or w0 of id), otherwise with the dest address * if dest is known. */ ehdr = (struct tipc_ehdr *)skb->data; salt = aead->salt; if (aead->mode == CLUSTER_KEY) salt ^= __be32_to_cpu(ehdr->addr); else if (__dnode) salt ^= tipc_node_get_addr(__dnode); memcpy(iv, &salt, 4); memcpy(iv + 4, (u8 *)&ehdr->seqno, 8); /* Prepare request */ ehsz = tipc_ehdr_size(ehdr); aead_request_set_tfm(req, tfm); aead_request_set_ad(req, ehsz); aead_request_set_crypt(req, sg, sg, len - ehsz, iv); /* Set callback function & data */ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, tipc_aead_encrypt_done, skb); tx_ctx = (struct tipc_crypto_tx_ctx *)ctx; tx_ctx->aead = aead; tx_ctx->bearer = b; memcpy(&tx_ctx->dst, dst, sizeof(*dst)); /* Hold bearer */ if (unlikely(!tipc_bearer_hold(b))) { rc = -ENODEV; goto exit; } /* Now, do encrypt */ rc = crypto_aead_encrypt(req); if (rc == -EINPROGRESS || rc == -EBUSY) return rc; tipc_bearer_put(b); exit: kfree(ctx); TIPC_SKB_CB(skb)->crypto_ctx = NULL; return rc; } static void tipc_aead_encrypt_done(void *data, int err) { struct sk_buff *skb = data; struct tipc_crypto_tx_ctx *tx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; struct tipc_bearer *b = tx_ctx->bearer; struct tipc_aead *aead = tx_ctx->aead; struct tipc_crypto *tx = aead->crypto; struct net *net = tx->net; switch (err) { case 0: this_cpu_inc(tx->stats->stat[STAT_ASYNC_OK]); rcu_read_lock(); if (likely(test_bit(0, &b->up))) b->media->send_msg(net, skb, b, &tx_ctx->dst); else kfree_skb(skb); rcu_read_unlock(); break; case -EINPROGRESS: return; default: this_cpu_inc(tx->stats->stat[STAT_ASYNC_NOK]); kfree_skb(skb); break; } kfree(tx_ctx); tipc_bearer_put(b); tipc_aead_put(aead); } /** * tipc_aead_decrypt - Decrypt an encrypted message * @net: struct net * @aead: TIPC AEAD for the message decryption * @skb: the input/output skb * @b: TIPC bearer where the message has been received * * Return: * * 0 : if the decryption has completed * * -EINPROGRESS/-EBUSY : if a callback will be performed * * < 0 : the decryption has failed */ static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead, struct sk_buff *skb, struct tipc_bearer *b) { struct tipc_crypto_rx_ctx *rx_ctx; struct aead_request *req; struct crypto_aead *tfm; struct sk_buff *unused; struct scatterlist *sg; struct tipc_ehdr *ehdr; int ehsz, nsg, rc; void *ctx; u32 salt; u8 *iv; if (unlikely(!aead)) return -ENOKEY; nsg = skb_cow_data(skb, 0, &unused); if (unlikely(nsg < 0)) { pr_err("RX: skb_cow_data() returned %d\n", nsg); return nsg; } /* Allocate memory for the AEAD operation */ tfm = tipc_aead_tfm_next(aead); ctx = tipc_aead_mem_alloc(tfm, sizeof(*rx_ctx), &iv, &req, &sg, nsg); if (unlikely(!ctx)) return -ENOMEM; TIPC_SKB_CB(skb)->crypto_ctx = ctx; /* Map skb to the sg lists */ sg_init_table(sg, nsg); rc = skb_to_sgvec(skb, sg, 0, skb->len); if (unlikely(rc < 0)) { pr_err("RX: skb_to_sgvec() returned %d, nsg %d\n", rc, nsg); goto exit; } /* Reconstruct IV: */ ehdr = (struct tipc_ehdr *)skb->data; salt = aead->salt; if (aead->mode == CLUSTER_KEY) salt ^= __be32_to_cpu(ehdr->addr); else if (ehdr->destined) salt ^= tipc_own_addr(net); memcpy(iv, &salt, 4); memcpy(iv + 4, (u8 *)&ehdr->seqno, 8); /* Prepare request */ ehsz = tipc_ehdr_size(ehdr); aead_request_set_tfm(req, tfm); aead_request_set_ad(req, ehsz); aead_request_set_crypt(req, sg, sg, skb->len - ehsz, iv); /* Set callback function & data */ aead_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, tipc_aead_decrypt_done, skb); rx_ctx = (struct tipc_crypto_rx_ctx *)ctx; rx_ctx->aead = aead; rx_ctx->bearer = b; /* Hold bearer */ if (unlikely(!tipc_bearer_hold(b))) { rc = -ENODEV; goto exit; } /* Now, do decrypt */ rc = crypto_aead_decrypt(req); if (rc == -EINPROGRESS || rc == -EBUSY) return rc; tipc_bearer_put(b); exit: kfree(ctx); TIPC_SKB_CB(skb)->crypto_ctx = NULL; return rc; } static void tipc_aead_decrypt_done(void *data, int err) { struct sk_buff *skb = data; struct tipc_crypto_rx_ctx *rx_ctx = TIPC_SKB_CB(skb)->crypto_ctx; struct tipc_bearer *b = rx_ctx->bearer; struct tipc_aead *aead = rx_ctx->aead; struct tipc_crypto_stats __percpu *stats = aead->crypto->stats; struct net *net = aead->crypto->net; switch (err) { case 0: this_cpu_inc(stats->stat[STAT_ASYNC_OK]); break; case -EINPROGRESS: return; default: this_cpu_inc(stats->stat[STAT_ASYNC_NOK]); break; } kfree(rx_ctx); tipc_crypto_rcv_complete(net, aead, b, &skb, err); if (likely(skb)) { if (likely(test_bit(0, &b->up))) tipc_rcv(net, skb, b); else kfree_skb(skb); } tipc_bearer_put(b); } static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr) { return (ehdr->user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE; } /** * tipc_ehdr_validate - Validate an encryption message * @skb: the message buffer * * Return: "true" if this is a valid encryption message, otherwise "false" */ bool tipc_ehdr_validate(struct sk_buff *skb) { struct tipc_ehdr *ehdr; int ehsz; if (unlikely(!pskb_may_pull(skb, EHDR_MIN_SIZE))) return false; ehdr = (struct tipc_ehdr *)skb->data; if (unlikely(ehdr->version != TIPC_EVERSION)) return false; ehsz = tipc_ehdr_size(ehdr); if (unlikely(!pskb_may_pull(skb, ehsz))) return false; if (unlikely(skb->len <= ehsz + TIPC_AES_GCM_TAG_SIZE)) return false; return true; } /** * tipc_ehdr_build - Build TIPC encryption message header * @net: struct net * @aead: TX AEAD key to be used for the message encryption * @tx_key: key id used for the message encryption * @skb: input/output message skb * @__rx: RX crypto handle if dest is "known" * * Return: the header size if the building is successful, otherwise < 0 */ static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead, u8 tx_key, struct sk_buff *skb, struct tipc_crypto *__rx) { struct tipc_msg *hdr = buf_msg(skb); struct tipc_ehdr *ehdr; u32 user = msg_user(hdr); u64 seqno; int ehsz; /* Make room for encryption header */ ehsz = (user != LINK_CONFIG) ? EHDR_SIZE : EHDR_CFG_SIZE; WARN_ON(skb_headroom(skb) < ehsz); ehdr = (struct tipc_ehdr *)skb_push(skb, ehsz); /* Obtain a seqno first: * Use the key seqno (= cluster wise) if dest is unknown or we're in * cluster key mode, otherwise it's better for a per-peer seqno! */ if (!__rx || aead->mode == CLUSTER_KEY) seqno = atomic64_inc_return(&aead->seqno); else seqno = atomic64_inc_return(&__rx->sndnxt); /* Revoke the key if seqno is wrapped around */ if (unlikely(!seqno)) return tipc_crypto_key_revoke(net, tx_key); /* Word 1-2 */ ehdr->seqno = cpu_to_be64(seqno); /* Words 0, 3- */ ehdr->version = TIPC_EVERSION; ehdr->user = 0; ehdr->keepalive = 0; ehdr->tx_key = tx_key; ehdr->destined = (__rx) ? 1 : 0; ehdr->rx_key_active = (__rx) ? __rx->key.active : 0; ehdr->rx_nokey = (__rx) ? __rx->nokey : 0; ehdr->master_key = aead->crypto->key_master; ehdr->reserved_1 = 0; ehdr->reserved_2 = 0; switch (user) { case LINK_CONFIG: ehdr->user = LINK_CONFIG; memcpy(ehdr->id, tipc_own_id(net), NODE_ID_LEN); break; default: if (user == LINK_PROTOCOL && msg_type(hdr) == STATE_MSG) { ehdr->user = LINK_PROTOCOL; ehdr->keepalive = msg_is_keepalive(hdr); } ehdr->addr = hdr->hdr[3]; break; } return ehsz; } static inline void tipc_crypto_key_set_state(struct tipc_crypto *c, u8 new_passive, u8 new_active, u8 new_pending) { struct tipc_key old = c->key; char buf[32]; c->key.keys = ((new_passive & KEY_MASK) << (KEY_BITS * 2)) | ((new_active & KEY_MASK) << (KEY_BITS)) | ((new_pending & KEY_MASK)); pr_debug("%s: key changing %s ::%pS\n", c->name, tipc_key_change_dump(old, c->key, buf), __builtin_return_address(0)); } /** * tipc_crypto_key_init - Initiate a new user / AEAD key * @c: TIPC crypto to which new key is attached * @ukey: the user key * @mode: the key mode (CLUSTER_KEY or PER_NODE_KEY) * @master_key: specify this is a cluster master key * * A new TIPC AEAD key will be allocated and initiated with the specified user * key, then attached to the TIPC crypto. * * Return: new key id in case of success, otherwise: < 0 */ int tipc_crypto_key_init(struct tipc_crypto *c, struct tipc_aead_key *ukey, u8 mode, bool master_key) { struct tipc_aead *aead = NULL; int rc = 0; /* Initiate with the new user key */ rc = tipc_aead_init(&aead, ukey, mode); /* Attach it to the crypto */ if (likely(!rc)) { rc = tipc_crypto_key_attach(c, aead, 0, master_key); if (rc < 0) tipc_aead_free(&aead->rcu); } return rc; } /** * tipc_crypto_key_attach - Attach a new AEAD key to TIPC crypto * @c: TIPC crypto to which the new AEAD key is attached * @aead: the new AEAD key pointer * @pos: desired slot in the crypto key array, = 0 if any! * @master_key: specify this is a cluster master key * * Return: new key id in case of success, otherwise: -EBUSY */ static int tipc_crypto_key_attach(struct tipc_crypto *c, struct tipc_aead *aead, u8 pos, bool master_key) { struct tipc_key key; int rc = -EBUSY; u8 new_key; spin_lock_bh(&c->lock); key = c->key; if (master_key) { new_key = KEY_MASTER; goto attach; } if (key.active && key.passive) goto exit; if (key.pending) { if (tipc_aead_users(c->aead[key.pending]) > 0) goto exit; /* if (pos): ok with replacing, will be aligned when needed */ /* Replace it */ new_key = key.pending; } else { if (pos) { if (key.active && pos != key_next(key.active)) { key.passive = pos; new_key = pos; goto attach; } else if (!key.active && !key.passive) { key.pending = pos; new_key = pos; goto attach; } } key.pending = key_next(key.active ?: key.passive); new_key = key.pending; } attach: aead->crypto = c; aead->gen = (is_tx(c)) ? ++c->key_gen : c->key_gen; tipc_aead_rcu_replace(c->aead[new_key], aead, &c->lock); if (likely(c->key.keys != key.keys)) tipc_crypto_key_set_state(c, key.passive, key.active, key.pending); c->working = 1; c->nokey = 0; c->key_master |= master_key; rc = new_key; exit: spin_unlock_bh(&c->lock); return rc; } void tipc_crypto_key_flush(struct tipc_crypto *c) { struct tipc_crypto *tx, *rx; int k; spin_lock_bh(&c->lock); if (is_rx(c)) { /* Try to cancel pending work */ rx = c; tx = tipc_net(rx->net)->crypto_tx; if (cancel_delayed_work(&rx->work)) { kfree(rx->skey); rx->skey = NULL; atomic_xchg(&rx->key_distr, 0); tipc_node_put(rx->node); } /* RX stopping => decrease TX key users if any */ k = atomic_xchg(&rx->peer_rx_active, 0); if (k) { tipc_aead_users_dec(tx->aead[k], 0); /* Mark the point TX key users changed */ tx->timer1 = jiffies; } } c->flags = 0; tipc_crypto_key_set_state(c, 0, 0, 0); for (k = KEY_MIN; k <= KEY_MAX; k++) tipc_crypto_key_detach(c->aead[k], &c->lock); atomic64_set(&c->sndnxt, 0); spin_unlock_bh(&c->lock); } /** * tipc_crypto_key_try_align - Align RX keys if possible * @rx: RX crypto handle * @new_pending: new pending slot if aligned (= TX key from peer) * * Peer has used an unknown key slot, this only happens when peer has left and * rejoned, or we are newcomer. * That means, there must be no active key but a pending key at unaligned slot. * If so, we try to move the pending key to the new slot. * Note: A potential passive key can exist, it will be shifted correspondingly! * * Return: "true" if key is successfully aligned, otherwise "false" */ static bool tipc_crypto_key_try_align(struct tipc_crypto *rx, u8 new_pending) { struct tipc_aead *tmp1, *tmp2 = NULL; struct tipc_key key; bool aligned = false; u8 new_passive = 0; int x; spin_lock(&rx->lock); key = rx->key; if (key.pending == new_pending) { aligned = true; goto exit; } if (key.active) goto exit; if (!key.pending) goto exit; if (tipc_aead_users(rx->aead[key.pending]) > 0) goto exit; /* Try to "isolate" this pending key first */ tmp1 = tipc_aead_rcu_ptr(rx->aead[key.pending], &rx->lock); if (!refcount_dec_if_one(&tmp1->refcnt)) goto exit; rcu_assign_pointer(rx->aead[key.pending], NULL); /* Move passive key if any */ if (key.passive) { tmp2 = rcu_replace_pointer(rx->aead[key.passive], tmp2, lockdep_is_held(&rx->lock)); x = (key.passive - key.pending + new_pending) % KEY_MAX; new_passive = (x <= 0) ? x + KEY_MAX : x; } /* Re-allocate the key(s) */ tipc_crypto_key_set_state(rx, new_passive, 0, new_pending); rcu_assign_pointer(rx->aead[new_pending], tmp1); if (new_passive) rcu_assign_pointer(rx->aead[new_passive], tmp2); refcount_set(&tmp1->refcnt, 1); aligned = true; pr_info_ratelimited("%s: key[%d] -> key[%d]\n", rx->name, key.pending, new_pending); exit: spin_unlock(&rx->lock); return aligned; } /** * tipc_crypto_key_pick_tx - Pick one TX key for message decryption * @tx: TX crypto handle * @rx: RX crypto handle (can be NULL) * @skb: the message skb which will be decrypted later * @tx_key: peer TX key id * * This function looks up the existing TX keys and pick one which is suitable * for the message decryption, that must be a cluster key and not used before * on the same message (i.e. recursive). * * Return: the TX AEAD key handle in case of success, otherwise NULL */ static struct tipc_aead *tipc_crypto_key_pick_tx(struct tipc_crypto *tx, struct tipc_crypto *rx, struct sk_buff *skb, u8 tx_key) { struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(skb); struct tipc_aead *aead = NULL; struct tipc_key key = tx->key; u8 k, i = 0; /* Initialize data if not yet */ if (!skb_cb->tx_clone_deferred) { skb_cb->tx_clone_deferred = 1; memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx)); } skb_cb->tx_clone_ctx.rx = rx; if (++skb_cb->tx_clone_ctx.recurs > 2) return NULL; /* Pick one TX key */ spin_lock(&tx->lock); if (tx_key == KEY_MASTER) { aead = tipc_aead_rcu_ptr(tx->aead[KEY_MASTER], &tx->lock); goto done; } do { k = (i == 0) ? key.pending : ((i == 1) ? key.active : key.passive); if (!k) continue; aead = tipc_aead_rcu_ptr(tx->aead[k], &tx->lock); if (!aead) continue; if (aead->mode != CLUSTER_KEY || aead == skb_cb->tx_clone_ctx.last) { aead = NULL; continue; } /* Ok, found one cluster key */ skb_cb->tx_clone_ctx.last = aead; WARN_ON(skb->next); skb->next = skb_clone(skb, GFP_ATOMIC); if (unlikely(!skb->next)) pr_warn("Failed to clone skb for next round if any\n"); break; } while (++i < 3); done: if (likely(aead)) WARN_ON(!refcount_inc_not_zero(&aead->refcnt)); spin_unlock(&tx->lock); return aead; } /** * tipc_crypto_key_synch: Synch own key data according to peer key status * @rx: RX crypto handle * @skb: TIPCv2 message buffer (incl. the ehdr from peer) * * This function updates the peer node related data as the peer RX active key * has changed, so the number of TX keys' users on this node are increased and * decreased correspondingly. * * It also considers if peer has no key, then we need to make own master key * (if any) taking over i.e. starting grace period and also trigger key * distributing process. * * The "per-peer" sndnxt is also reset when the peer key has switched. */ static void tipc_crypto_key_synch(struct tipc_crypto *rx, struct sk_buff *skb) { struct tipc_ehdr *ehdr = (struct tipc_ehdr *)skb_network_header(skb); struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; struct tipc_msg *hdr = buf_msg(skb); u32 self = tipc_own_addr(rx->net); u8 cur, new; unsigned long delay; /* Update RX 'key_master' flag according to peer, also mark "legacy" if * a peer has no master key. */ rx->key_master = ehdr->master_key; if (!rx->key_master) tx->legacy_user = 1; /* For later cases, apply only if message is destined to this node */ if (!ehdr->destined || msg_short(hdr) || msg_destnode(hdr) != self) return; /* Case 1: Peer has no keys, let's make master key take over */ if (ehdr->rx_nokey) { /* Set or extend grace period */ tx->timer2 = jiffies; /* Schedule key distributing for the peer if not yet */ if (tx->key.keys && !atomic_cmpxchg(&rx->key_distr, 0, KEY_DISTR_SCHED)) { get_random_bytes(&delay, 2); delay %= 5; delay = msecs_to_jiffies(500 * ++delay); if (queue_delayed_work(tx->wq, &rx->work, delay)) tipc_node_get(rx->node); } } else { /* Cancel a pending key distributing if any */ atomic_xchg(&rx->key_distr, 0); } /* Case 2: Peer RX active key has changed, let's update own TX users */ cur = atomic_read(&rx->peer_rx_active); new = ehdr->rx_key_active; if (tx->key.keys && cur != new && atomic_cmpxchg(&rx->peer_rx_active, cur, new) == cur) { if (new) tipc_aead_users_inc(tx->aead[new], INT_MAX); if (cur) tipc_aead_users_dec(tx->aead[cur], 0); atomic64_set(&rx->sndnxt, 0); /* Mark the point TX key users changed */ tx->timer1 = jiffies; pr_debug("%s: key users changed %d-- %d++, peer %s\n", tx->name, cur, new, rx->name); } } static int tipc_crypto_key_revoke(struct net *net, u8 tx_key) { struct tipc_crypto *tx = tipc_net(net)->crypto_tx; struct tipc_key key; spin_lock_bh(&tx->lock); key = tx->key; WARN_ON(!key.active || tx_key != key.active); /* Free the active key */ tipc_crypto_key_set_state(tx, key.passive, 0, key.pending); tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); spin_unlock_bh(&tx->lock); pr_warn("%s: key is revoked\n", tx->name); return -EKEYREVOKED; } int tipc_crypto_start(struct tipc_crypto **crypto, struct net *net, struct tipc_node *node) { struct tipc_crypto *c; if (*crypto) return -EEXIST; /* Allocate crypto */ c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return -ENOMEM; /* Allocate workqueue on TX */ if (!node) { c->wq = alloc_ordered_workqueue("tipc_crypto", 0); if (!c->wq) { kfree(c); return -ENOMEM; } } /* Allocate statistic structure */ c->stats = alloc_percpu_gfp(struct tipc_crypto_stats, GFP_ATOMIC); if (!c->stats) { if (c->wq) destroy_workqueue(c->wq); kfree_sensitive(c); return -ENOMEM; } c->flags = 0; c->net = net; c->node = node; get_random_bytes(&c->key_gen, 2); tipc_crypto_key_set_state(c, 0, 0, 0); atomic_set(&c->key_distr, 0); atomic_set(&c->peer_rx_active, 0); atomic64_set(&c->sndnxt, 0); c->timer1 = jiffies; c->timer2 = jiffies; c->rekeying_intv = TIPC_REKEYING_INTV_DEF; spin_lock_init(&c->lock); scnprintf(c->name, 48, "%s(%s)", (is_rx(c)) ? "RX" : "TX", (is_rx(c)) ? tipc_node_get_id_str(c->node) : tipc_own_id_string(c->net)); if (is_rx(c)) INIT_DELAYED_WORK(&c->work, tipc_crypto_work_rx); else INIT_DELAYED_WORK(&c->work, tipc_crypto_work_tx); *crypto = c; return 0; } void tipc_crypto_stop(struct tipc_crypto **crypto) { struct tipc_crypto *c = *crypto; u8 k; if (!c) return; /* Flush any queued works & destroy wq */ if (is_tx(c)) { c->rekeying_intv = 0; cancel_delayed_work_sync(&c->work); destroy_workqueue(c->wq); } /* Release AEAD keys */ rcu_read_lock(); for (k = KEY_MIN; k <= KEY_MAX; k++) tipc_aead_put(rcu_dereference(c->aead[k])); rcu_read_unlock(); pr_debug("%s: has been stopped\n", c->name); /* Free this crypto statistics */ free_percpu(c->stats); *crypto = NULL; kfree_sensitive(c); } void tipc_crypto_timeout(struct tipc_crypto *rx) { struct tipc_net *tn = tipc_net(rx->net); struct tipc_crypto *tx = tn->crypto_tx; struct tipc_key key; int cmd; /* TX pending: taking all users & stable -> active */ spin_lock(&tx->lock); key = tx->key; if (key.active && tipc_aead_users(tx->aead[key.active]) > 0) goto s1; if (!key.pending || tipc_aead_users(tx->aead[key.pending]) <= 0) goto s1; if (time_before(jiffies, tx->timer1 + TIPC_TX_LASTING_TIME)) goto s1; tipc_crypto_key_set_state(tx, key.passive, key.pending, 0); if (key.active) tipc_crypto_key_detach(tx->aead[key.active], &tx->lock); this_cpu_inc(tx->stats->stat[STAT_SWITCHES]); pr_info("%s: key[%d] is activated\n", tx->name, key.pending); s1: spin_unlock(&tx->lock); /* RX pending: having user -> active */ spin_lock(&rx->lock); key = rx->key; if (!key.pending || tipc_aead_users(rx->aead[key.pending]) <= 0) goto s2; if (key.active) key.passive = key.active; key.active = key.pending; rx->timer2 = jiffies; tipc_crypto_key_set_state(rx, key.passive, key.active, 0); this_cpu_inc(rx->stats->stat[STAT_SWITCHES]); pr_info("%s: key[%d] is activated\n", rx->name, key.pending); goto s5; s2: /* RX pending: not working -> remove */ if (!key.pending || tipc_aead_users(rx->aead[key.pending]) > -10) goto s3; tipc_crypto_key_set_state(rx, key.passive, key.active, 0); tipc_crypto_key_detach(rx->aead[key.pending], &rx->lock); pr_debug("%s: key[%d] is removed\n", rx->name, key.pending); goto s5; s3: /* RX active: timed out or no user -> pending */ if (!key.active) goto s4; if (time_before(jiffies, rx->timer1 + TIPC_RX_ACTIVE_LIM) && tipc_aead_users(rx->aead[key.active]) > 0) goto s4; if (key.pending) key.passive = key.active; else key.pending = key.active; rx->timer2 = jiffies; tipc_crypto_key_set_state(rx, key.passive, 0, key.pending); tipc_aead_users_set(rx->aead[key.pending], 0); pr_debug("%s: key[%d] is deactivated\n", rx->name, key.active); goto s5; s4: /* RX passive: outdated or not working -> free */ if (!key.passive) goto s5; if (time_before(jiffies, rx->timer2 + TIPC_RX_PASSIVE_LIM) && tipc_aead_users(rx->aead[key.passive]) > -10) goto s5; tipc_crypto_key_set_state(rx, 0, key.active, key.pending); tipc_crypto_key_detach(rx->aead[key.passive], &rx->lock); pr_debug("%s: key[%d] is freed\n", rx->name, key.passive); s5: spin_unlock(&rx->lock); /* Relax it here, the flag will be set again if it really is, but only * when we are not in grace period for safety! */ if (time_after(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD)) tx->legacy_user = 0; /* Limit max_tfms & do debug commands if needed */ if (likely(sysctl_tipc_max_tfms <= TIPC_MAX_TFMS_LIM)) return; cmd = sysctl_tipc_max_tfms; sysctl_tipc_max_tfms = TIPC_MAX_TFMS_DEF; tipc_crypto_do_cmd(rx->net, cmd); } static inline void tipc_crypto_clone_msg(struct net *net, struct sk_buff *_skb, struct tipc_bearer *b, struct tipc_media_addr *dst, struct tipc_node *__dnode, u8 type) { struct sk_buff *skb; skb = skb_clone(_skb, GFP_ATOMIC); if (skb) { TIPC_SKB_CB(skb)->xmit_type = type; tipc_crypto_xmit(net, &skb, b, dst, __dnode); if (skb) b->media->send_msg(net, skb, b, dst); } } /** * tipc_crypto_xmit - Build & encrypt TIPC message for xmit * @net: struct net * @skb: input/output message skb pointer * @b: bearer used for xmit later * @dst: destination media address * @__dnode: destination node for reference if any * * First, build an encryption message header on the top of the message, then * encrypt the original TIPC message by using the pending, master or active * key with this preference order. * If the encryption is successful, the encrypted skb is returned directly or * via the callback. * Otherwise, the skb is freed! * * Return: * * 0 : the encryption has succeeded (or no encryption) * * -EINPROGRESS/-EBUSY : the encryption is ongoing, a callback will be made * * -ENOKEK : the encryption has failed due to no key * * -EKEYREVOKED : the encryption has failed due to key revoked * * -ENOMEM : the encryption has failed due to no memory * * < 0 : the encryption has failed due to other reasons */ int tipc_crypto_xmit(struct net *net, struct sk_buff **skb, struct tipc_bearer *b, struct tipc_media_addr *dst, struct tipc_node *__dnode) { struct tipc_crypto *__rx = tipc_node_crypto_rx(__dnode); struct tipc_crypto *tx = tipc_net(net)->crypto_tx; struct tipc_crypto_stats __percpu *stats = tx->stats; struct tipc_msg *hdr = buf_msg(*skb); struct tipc_key key = tx->key; struct tipc_aead *aead = NULL; u32 user = msg_user(hdr); u32 type = msg_type(hdr); int rc = -ENOKEY; u8 tx_key = 0; /* No encryption? */ if (!tx->working) return 0; /* Pending key if peer has active on it or probing time */ if (unlikely(key.pending)) { tx_key = key.pending; if (!tx->key_master && !key.active) goto encrypt; if (__rx && atomic_read(&__rx->peer_rx_active) == tx_key) goto encrypt; if (TIPC_SKB_CB(*skb)->xmit_type == SKB_PROBING) { pr_debug("%s: probing for key[%d]\n", tx->name, key.pending); goto encrypt; } if (user == LINK_CONFIG || user == LINK_PROTOCOL) tipc_crypto_clone_msg(net, *skb, b, dst, __dnode, SKB_PROBING); } /* Master key if this is a *vital* message or in grace period */ if (tx->key_master) { tx_key = KEY_MASTER; if (!key.active) goto encrypt; if (TIPC_SKB_CB(*skb)->xmit_type == SKB_GRACING) { pr_debug("%s: gracing for msg (%d %d)\n", tx->name, user, type); goto encrypt; } if (user == LINK_CONFIG || (user == LINK_PROTOCOL && type == RESET_MSG) || (user == MSG_CRYPTO && type == KEY_DISTR_MSG) || time_before(jiffies, tx->timer2 + TIPC_TX_GRACE_PERIOD)) { if (__rx && __rx->key_master && !atomic_read(&__rx->peer_rx_active)) goto encrypt; if (!__rx) { if (likely(!tx->legacy_user)) goto encrypt; tipc_crypto_clone_msg(net, *skb, b, dst, __dnode, SKB_GRACING); } } } /* Else, use the active key if any */ if (likely(key.active)) { tx_key = key.active; goto encrypt; } goto exit; encrypt: aead = tipc_aead_get(tx->aead[tx_key]); if (unlikely(!aead)) goto exit; rc = tipc_ehdr_build(net, aead, tx_key, *skb, __rx); if (likely(rc > 0)) rc = tipc_aead_encrypt(aead, *skb, b, dst, __dnode); exit: switch (rc) { case 0: this_cpu_inc(stats->stat[STAT_OK]); break; case -EINPROGRESS: case -EBUSY: this_cpu_inc(stats->stat[STAT_ASYNC]); *skb = NULL; return rc; default: this_cpu_inc(stats->stat[STAT_NOK]); if (rc == -ENOKEY) this_cpu_inc(stats->stat[STAT_NOKEYS]); else if (rc == -EKEYREVOKED) this_cpu_inc(stats->stat[STAT_BADKEYS]); kfree_skb(*skb); *skb = NULL; break; } tipc_aead_put(aead); return rc; } /** * tipc_crypto_rcv - Decrypt an encrypted TIPC message from peer * @net: struct net * @rx: RX crypto handle * @skb: input/output message skb pointer * @b: bearer where the message has been received * * If the decryption is successful, the decrypted skb is returned directly or * as the callback, the encryption header and auth tag will be trimed out * before forwarding to tipc_rcv() via the tipc_crypto_rcv_complete(). * Otherwise, the skb will be freed! * Note: RX key(s) can be re-aligned, or in case of no key suitable, TX * cluster key(s) can be taken for decryption (- recursive). * * Return: * * 0 : the decryption has successfully completed * * -EINPROGRESS/-EBUSY : the decryption is ongoing, a callback will be made * * -ENOKEY : the decryption has failed due to no key * * -EBADMSG : the decryption has failed due to bad message * * -ENOMEM : the decryption has failed due to no memory * * < 0 : the decryption has failed due to other reasons */ int tipc_crypto_rcv(struct net *net, struct tipc_crypto *rx, struct sk_buff **skb, struct tipc_bearer *b) { struct tipc_crypto *tx = tipc_net(net)->crypto_tx; struct tipc_crypto_stats __percpu *stats; struct tipc_aead *aead = NULL; struct tipc_key key; int rc = -ENOKEY; u8 tx_key, n; tx_key = ((struct tipc_ehdr *)(*skb)->data)->tx_key; /* New peer? * Let's try with TX key (i.e. cluster mode) & verify the skb first! */ if (unlikely(!rx || tx_key == KEY_MASTER)) goto pick_tx; /* Pick RX key according to TX key if any */ key = rx->key; if (tx_key == key.active || tx_key == key.pending || tx_key == key.passive) goto decrypt; /* Unknown key, let's try to align RX key(s) */ if (tipc_crypto_key_try_align(rx, tx_key)) goto decrypt; pick_tx: /* No key suitable? Try to pick one from TX... */ aead = tipc_crypto_key_pick_tx(tx, rx, *skb, tx_key); if (aead) goto decrypt; goto exit; decrypt: rcu_read_lock(); if (!aead) aead = tipc_aead_get(rx->aead[tx_key]); rc = tipc_aead_decrypt(net, aead, *skb, b); rcu_read_unlock(); exit: stats = ((rx) ?: tx)->stats; switch (rc) { case 0: this_cpu_inc(stats->stat[STAT_OK]); break; case -EINPROGRESS: case -EBUSY: this_cpu_inc(stats->stat[STAT_ASYNC]); *skb = NULL; return rc; default: this_cpu_inc(stats->stat[STAT_NOK]); if (rc == -ENOKEY) { kfree_skb(*skb); *skb = NULL; if (rx) { /* Mark rx->nokey only if we dont have a * pending received session key, nor a newer * one i.e. in the next slot. */ n = key_next(tx_key); rx->nokey = !(rx->skey || rcu_access_pointer(rx->aead[n])); pr_debug_ratelimited("%s: nokey %d, key %d/%x\n", rx->name, rx->nokey, tx_key, rx->key.keys); tipc_node_put(rx->node); } this_cpu_inc(stats->stat[STAT_NOKEYS]); return rc; } else if (rc == -EBADMSG) { this_cpu_inc(stats->stat[STAT_BADMSGS]); } break; } tipc_crypto_rcv_complete(net, aead, b, skb, rc); return rc; } static void tipc_crypto_rcv_complete(struct net *net, struct tipc_aead *aead, struct tipc_bearer *b, struct sk_buff **skb, int err) { struct tipc_skb_cb *skb_cb = TIPC_SKB_CB(*skb); struct tipc_crypto *rx = aead->crypto; struct tipc_aead *tmp = NULL; struct tipc_ehdr *ehdr; struct tipc_node *n; /* Is this completed by TX? */ if (unlikely(is_tx(aead->crypto))) { rx = skb_cb->tx_clone_ctx.rx; pr_debug("TX->RX(%s): err %d, aead %p, skb->next %p, flags %x\n", (rx) ? tipc_node_get_id_str(rx->node) : "-", err, aead, (*skb)->next, skb_cb->flags); pr_debug("skb_cb [recurs %d, last %p], tx->aead [%p %p %p]\n", skb_cb->tx_clone_ctx.recurs, skb_cb->tx_clone_ctx.last, aead->crypto->aead[1], aead->crypto->aead[2], aead->crypto->aead[3]); if (unlikely(err)) { if (err == -EBADMSG && (*skb)->next) tipc_rcv(net, (*skb)->next, b); goto free_skb; } if (likely((*skb)->next)) { kfree_skb((*skb)->next); (*skb)->next = NULL; } ehdr = (struct tipc_ehdr *)(*skb)->data; if (!rx) { WARN_ON(ehdr->user != LINK_CONFIG); n = tipc_node_create(net, 0, ehdr->id, 0xffffu, 0, true); rx = tipc_node_crypto_rx(n); if (unlikely(!rx)) goto free_skb; } /* Ignore cloning if it was TX master key */ if (ehdr->tx_key == KEY_MASTER) goto rcv; if (tipc_aead_clone(&tmp, aead) < 0) goto rcv; WARN_ON(!refcount_inc_not_zero(&tmp->refcnt)); if (tipc_crypto_key_attach(rx, tmp, ehdr->tx_key, false) < 0) { tipc_aead_free(&tmp->rcu); goto rcv; } tipc_aead_put(aead); aead = tmp; } if (unlikely(err)) { tipc_aead_users_dec((struct tipc_aead __force __rcu *)aead, INT_MIN); goto free_skb; } /* Set the RX key's user */ tipc_aead_users_set((struct tipc_aead __force __rcu *)aead, 1); /* Mark this point, RX works */ rx->timer1 = jiffies; rcv: /* Remove ehdr & auth. tag prior to tipc_rcv() */ ehdr = (struct tipc_ehdr *)(*skb)->data; /* Mark this point, RX passive still works */ if (rx->key.passive && ehdr->tx_key == rx->key.passive) rx->timer2 = jiffies; skb_reset_network_header(*skb); skb_pull(*skb, tipc_ehdr_size(ehdr)); if (pskb_trim(*skb, (*skb)->len - aead->authsize)) goto free_skb; /* Validate TIPCv2 message */ if (unlikely(!tipc_msg_validate(skb))) { pr_err_ratelimited("Packet dropped after decryption!\n"); goto free_skb; } /* Ok, everything's fine, try to synch own keys according to peers' */ tipc_crypto_key_synch(rx, *skb); /* Re-fetch skb cb as skb might be changed in tipc_msg_validate */ skb_cb = TIPC_SKB_CB(*skb); /* Mark skb decrypted */ skb_cb->decrypted = 1; /* Clear clone cxt if any */ if (likely(!skb_cb->tx_clone_deferred)) goto exit; skb_cb->tx_clone_deferred = 0; memset(&skb_cb->tx_clone_ctx, 0, sizeof(skb_cb->tx_clone_ctx)); goto exit; free_skb: kfree_skb(*skb); *skb = NULL; exit: tipc_aead_put(aead); if (rx) tipc_node_put(rx->node); } static void tipc_crypto_do_cmd(struct net *net, int cmd) { struct tipc_net *tn = tipc_net(net); struct tipc_crypto *tx = tn->crypto_tx, *rx; struct list_head *p; unsigned int stat; int i, j, cpu; char buf[200]; /* Currently only one command is supported */ switch (cmd) { case 0xfff1: goto print_stats; default: return; } print_stats: /* Print a header */ pr_info("\n=============== TIPC Crypto Statistics ===============\n\n"); /* Print key status */ pr_info("Key status:\n"); pr_info("TX(%7.7s)\n%s", tipc_own_id_string(net), tipc_crypto_key_dump(tx, buf)); rcu_read_lock(); for (p = tn->node_list.next; p != &tn->node_list; p = p->next) { rx = tipc_node_crypto_rx_by_list(p); pr_info("RX(%7.7s)\n%s", tipc_node_get_id_str(rx->node), tipc_crypto_key_dump(rx, buf)); } rcu_read_unlock(); /* Print crypto statistics */ for (i = 0, j = 0; i < MAX_STATS; i++) j += scnprintf(buf + j, 200 - j, "|%11s ", hstats[i]); pr_info("Counter %s", buf); memset(buf, '-', 115); buf[115] = '\0'; pr_info("%s\n", buf); j = scnprintf(buf, 200, "TX(%7.7s) ", tipc_own_id_string(net)); for_each_possible_cpu(cpu) { for (i = 0; i < MAX_STATS; i++) { stat = per_cpu_ptr(tx->stats, cpu)->stat[i]; j += scnprintf(buf + j, 200 - j, "|%11d ", stat); } pr_info("%s", buf); j = scnprintf(buf, 200, "%12s", " "); } rcu_read_lock(); for (p = tn->node_list.next; p != &tn->node_list; p = p->next) { rx = tipc_node_crypto_rx_by_list(p); j = scnprintf(buf, 200, "RX(%7.7s) ", tipc_node_get_id_str(rx->node)); for_each_possible_cpu(cpu) { for (i = 0; i < MAX_STATS; i++) { stat = per_cpu_ptr(rx->stats, cpu)->stat[i]; j += scnprintf(buf + j, 200 - j, "|%11d ", stat); } pr_info("%s", buf); j = scnprintf(buf, 200, "%12s", " "); } } rcu_read_unlock(); pr_info("\n======================== Done ========================\n"); } static char *tipc_crypto_key_dump(struct tipc_crypto *c, char *buf) { struct tipc_key key = c->key; struct tipc_aead *aead; int k, i = 0; char *s; for (k = KEY_MIN; k <= KEY_MAX; k++) { if (k == KEY_MASTER) { if (is_rx(c)) continue; if (time_before(jiffies, c->timer2 + TIPC_TX_GRACE_PERIOD)) s = "ACT"; else s = "PAS"; } else { if (k == key.passive) s = "PAS"; else if (k == key.active) s = "ACT"; else if (k == key.pending) s = "PEN"; else s = "-"; } i += scnprintf(buf + i, 200 - i, "\tKey%d: %s", k, s); rcu_read_lock(); aead = rcu_dereference(c->aead[k]); if (aead) i += scnprintf(buf + i, 200 - i, "{\"0x...%s\", \"%s\"}/%d:%d", aead->hint, (aead->mode == CLUSTER_KEY) ? "c" : "p", atomic_read(&aead->users), refcount_read(&aead->refcnt)); rcu_read_unlock(); i += scnprintf(buf + i, 200 - i, "\n"); } if (is_rx(c)) i += scnprintf(buf + i, 200 - i, "\tPeer RX active: %d\n", atomic_read(&c->peer_rx_active)); return buf; } static char *tipc_key_change_dump(struct tipc_key old, struct tipc_key new, char *buf) { struct tipc_key *key = &old; int k, i = 0; char *s; /* Output format: "[%s %s %s] -> [%s %s %s]", max len = 32 */ again: i += scnprintf(buf + i, 32 - i, "["); for (k = KEY_1; k <= KEY_3; k++) { if (k == key->passive) s = "pas"; else if (k == key->active) s = "act"; else if (k == key->pending) s = "pen"; else s = "-"; i += scnprintf(buf + i, 32 - i, (k != KEY_3) ? "%s " : "%s", s); } if (key != &new) { i += scnprintf(buf + i, 32 - i, "] -> "); key = &new; goto again; } i += scnprintf(buf + i, 32 - i, "]"); return buf; } /** * tipc_crypto_msg_rcv - Common 'MSG_CRYPTO' processing point * @net: the struct net * @skb: the receiving message buffer */ void tipc_crypto_msg_rcv(struct net *net, struct sk_buff *skb) { struct tipc_crypto *rx; struct tipc_msg *hdr; if (unlikely(skb_linearize(skb))) goto exit; hdr = buf_msg(skb); rx = tipc_node_crypto_rx_by_addr(net, msg_prevnode(hdr)); if (unlikely(!rx)) goto exit; switch (msg_type(hdr)) { case KEY_DISTR_MSG: if (tipc_crypto_key_rcv(rx, hdr)) goto exit; break; default: break; } tipc_node_put(rx->node); exit: kfree_skb(skb); } /** * tipc_crypto_key_distr - Distribute a TX key * @tx: the TX crypto * @key: the key's index * @dest: the destination tipc node, = NULL if distributing to all nodes * * Return: 0 in case of success, otherwise < 0 */ int tipc_crypto_key_distr(struct tipc_crypto *tx, u8 key, struct tipc_node *dest) { struct tipc_aead *aead; u32 dnode = tipc_node_get_addr(dest); int rc = -ENOKEY; if (!sysctl_tipc_key_exchange_enabled) return 0; if (key) { rcu_read_lock(); aead = tipc_aead_get(tx->aead[key]); if (likely(aead)) { rc = tipc_crypto_key_xmit(tx->net, aead->key, aead->gen, aead->mode, dnode); tipc_aead_put(aead); } rcu_read_unlock(); } return rc; } /** * tipc_crypto_key_xmit - Send a session key * @net: the struct net * @skey: the session key to be sent * @gen: the key's generation * @mode: the key's mode * @dnode: the destination node address, = 0 if broadcasting to all nodes * * The session key 'skey' is packed in a TIPC v2 'MSG_CRYPTO/KEY_DISTR_MSG' * as its data section, then xmit-ed through the uc/bc link. * * Return: 0 in case of success, otherwise < 0 */ static int tipc_crypto_key_xmit(struct net *net, struct tipc_aead_key *skey, u16 gen, u8 mode, u32 dnode) { struct sk_buff_head pkts; struct tipc_msg *hdr; struct sk_buff *skb; u16 size, cong_link_cnt; u8 *data; int rc; size = tipc_aead_key_size(skey); skb = tipc_buf_acquire(INT_H_SIZE + size, GFP_ATOMIC); if (!skb) return -ENOMEM; hdr = buf_msg(skb); tipc_msg_init(tipc_own_addr(net), hdr, MSG_CRYPTO, KEY_DISTR_MSG, INT_H_SIZE, dnode); msg_set_size(hdr, INT_H_SIZE + size); msg_set_key_gen(hdr, gen); msg_set_key_mode(hdr, mode); data = msg_data(hdr); *((__be32 *)(data + TIPC_AEAD_ALG_NAME)) = htonl(skey->keylen); memcpy(data, skey->alg_name, TIPC_AEAD_ALG_NAME); memcpy(data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->key, skey->keylen); __skb_queue_head_init(&pkts); __skb_queue_tail(&pkts, skb); if (dnode) rc = tipc_node_xmit(net, &pkts, dnode, 0); else rc = tipc_bcast_xmit(net, &pkts, &cong_link_cnt); return rc; } /** * tipc_crypto_key_rcv - Receive a session key * @rx: the RX crypto * @hdr: the TIPC v2 message incl. the receiving session key in its data * * This function retrieves the session key in the message from peer, then * schedules a RX work to attach the key to the corresponding RX crypto. * * Return: "true" if the key has been scheduled for attaching, otherwise * "false". */ static bool tipc_crypto_key_rcv(struct tipc_crypto *rx, struct tipc_msg *hdr) { struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; struct tipc_aead_key *skey = NULL; u16 key_gen = msg_key_gen(hdr); u32 size = msg_data_sz(hdr); u8 *data = msg_data(hdr); unsigned int keylen; /* Verify whether the size can exist in the packet */ if (unlikely(size < sizeof(struct tipc_aead_key) + TIPC_AEAD_KEYLEN_MIN)) { pr_debug("%s: message data size is too small\n", rx->name); goto exit; } keylen = ntohl(*((__be32 *)(data + TIPC_AEAD_ALG_NAME))); /* Verify the supplied size values */ if (unlikely(size != keylen + sizeof(struct tipc_aead_key) || keylen > TIPC_AEAD_KEY_SIZE_MAX)) { pr_debug("%s: invalid MSG_CRYPTO key size\n", rx->name); goto exit; } spin_lock(&rx->lock); if (unlikely(rx->skey || (key_gen == rx->key_gen && rx->key.keys))) { pr_err("%s: key existed <%p>, gen %d vs %d\n", rx->name, rx->skey, key_gen, rx->key_gen); goto exit_unlock; } /* Allocate memory for the key */ skey = kmalloc(size, GFP_ATOMIC); if (unlikely(!skey)) { pr_err("%s: unable to allocate memory for skey\n", rx->name); goto exit_unlock; } /* Copy key from msg data */ skey->keylen = keylen; memcpy(skey->alg_name, data, TIPC_AEAD_ALG_NAME); memcpy(skey->key, data + TIPC_AEAD_ALG_NAME + sizeof(__be32), skey->keylen); rx->key_gen = key_gen; rx->skey_mode = msg_key_mode(hdr); rx->skey = skey; rx->nokey = 0; mb(); /* for nokey flag */ exit_unlock: spin_unlock(&rx->lock); exit: /* Schedule the key attaching on this crypto */ if (likely(skey && queue_delayed_work(tx->wq, &rx->work, 0))) return true; return false; } /** * tipc_crypto_work_rx - Scheduled RX works handler * @work: the struct RX work * * The function processes the previous scheduled works i.e. distributing TX key * or attaching a received session key on RX crypto. */ static void tipc_crypto_work_rx(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct tipc_crypto *rx = container_of(dwork, struct tipc_crypto, work); struct tipc_crypto *tx = tipc_net(rx->net)->crypto_tx; unsigned long delay = msecs_to_jiffies(5000); bool resched = false; u8 key; int rc; /* Case 1: Distribute TX key to peer if scheduled */ if (atomic_cmpxchg(&rx->key_distr, KEY_DISTR_SCHED, KEY_DISTR_COMPL) == KEY_DISTR_SCHED) { /* Always pick the newest one for distributing */ key = tx->key.pending ?: tx->key.active; rc = tipc_crypto_key_distr(tx, key, rx->node); if (unlikely(rc)) pr_warn("%s: unable to distr key[%d] to %s, err %d\n", tx->name, key, tipc_node_get_id_str(rx->node), rc); /* Sched for key_distr releasing */ resched = true; } else { atomic_cmpxchg(&rx->key_distr, KEY_DISTR_COMPL, 0); } /* Case 2: Attach a pending received session key from peer if any */ if (rx->skey) { rc = tipc_crypto_key_init(rx, rx->skey, rx->skey_mode, false); if (unlikely(rc < 0)) pr_warn("%s: unable to attach received skey, err %d\n", rx->name, rc); switch (rc) { case -EBUSY: case -ENOMEM: /* Resched the key attaching */ resched = true; break; default: synchronize_rcu(); kfree(rx->skey); rx->skey = NULL; break; } } if (resched && queue_delayed_work(tx->wq, &rx->work, delay)) return; tipc_node_put(rx->node); } /** * tipc_crypto_rekeying_sched - (Re)schedule rekeying w/o new interval * @tx: TX crypto * @changed: if the rekeying needs to be rescheduled with new interval * @new_intv: new rekeying interval (when "changed" = true) */ void tipc_crypto_rekeying_sched(struct tipc_crypto *tx, bool changed, u32 new_intv) { unsigned long delay; bool now = false; if (changed) { if (new_intv == TIPC_REKEYING_NOW) now = true; else tx->rekeying_intv = new_intv; cancel_delayed_work_sync(&tx->work); } if (tx->rekeying_intv || now) { delay = (now) ? 0 : tx->rekeying_intv * 60 * 1000; queue_delayed_work(tx->wq, &tx->work, msecs_to_jiffies(delay)); } } /** * tipc_crypto_work_tx - Scheduled TX works handler * @work: the struct TX work * * The function processes the previous scheduled work, i.e. key rekeying, by * generating a new session key based on current one, then attaching it to the * TX crypto and finally distributing it to peers. It also re-schedules the * rekeying if needed. */ static void tipc_crypto_work_tx(struct work_struct *work) { struct delayed_work *dwork = to_delayed_work(work); struct tipc_crypto *tx = container_of(dwork, struct tipc_crypto, work); struct tipc_aead_key *skey = NULL; struct tipc_key key = tx->key; struct tipc_aead *aead; int rc = -ENOMEM; if (unlikely(key.pending)) goto resched; /* Take current key as a template */ rcu_read_lock(); aead = rcu_dereference(tx->aead[key.active ?: KEY_MASTER]); if (unlikely(!aead)) { rcu_read_unlock(); /* At least one key should exist for securing */ return; } /* Lets duplicate it first */ skey = kmemdup(aead->key, tipc_aead_key_size(aead->key), GFP_ATOMIC); rcu_read_unlock(); /* Now, generate new key, initiate & distribute it */ if (likely(skey)) { rc = tipc_aead_key_generate(skey) ?: tipc_crypto_key_init(tx, skey, PER_NODE_KEY, false); if (likely(rc > 0)) rc = tipc_crypto_key_distr(tx, rc, NULL); kfree_sensitive(skey); } if (unlikely(rc)) pr_warn_ratelimited("%s: rekeying returns %d\n", tx->name, rc); resched: /* Re-schedule rekeying if any */ tipc_crypto_rekeying_sched(tx, false, 0); }
10 11 10 18 5 11 2 3 2 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 // SPDX-License-Identifier: GPL-2.0-or-later /* * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173 * * Copyright (C)2003 USAGI/WIDE Project * * Author Mitsuru KANDA <mk@linux-ipv6.org> */ /* * [Memo] * * Outbound: * The compression of IP datagram MUST be done before AH/ESP processing, * fragmentation, and the addition of Hop-by-Hop/Routing header. * * Inbound: * The decompression of IP datagram MUST be done after the reassembly, * AH/ESP processing. */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/ipcomp.h> #include <linux/crypto.h> #include <linux/err.h> #include <linux/pfkeyv2.h> #include <linux/random.h> #include <linux/percpu.h> #include <linux/smp.h> #include <linux/list.h> #include <linux/vmalloc.h> #include <linux/rtnetlink.h> #include <net/ip6_route.h> #include <net/icmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/mutex.h> static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); __be32 spi; const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; struct ip_comp_hdr *ipcomph = (struct ip_comp_hdr *)(skb->data + offset); struct xfrm_state *x; if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) return 0; spi = htonl(ntohs(ipcomph->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0, sock_net_uid(net, NULL)); else ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); xfrm_state_put(x); return 0; } static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) { struct net *net = xs_net(x); struct xfrm_state *t = NULL; t = xfrm_state_alloc(net); if (!t) goto out; t->id.proto = IPPROTO_IPV6; t->id.spi = xfrm6_tunnel_alloc_spi(net, (xfrm_address_t *)&x->props.saddr); if (!t->id.spi) goto error; memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); memcpy(&t->mark, &x->mark, sizeof(t->mark)); t->if_id = x->if_id; if (xfrm_init_state(t)) goto error; atomic_set(&t->tunnel_users, 1); out: return t; error: t->km.state = XFRM_STATE_DEAD; xfrm_state_put(t); t = NULL; goto out; } static int ipcomp6_tunnel_attach(struct xfrm_state *x) { struct net *net = xs_net(x); int err = 0; struct xfrm_state *t = NULL; __be32 spi; u32 mark = x->mark.m & x->mark.v; spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr); if (spi) t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); if (!t) { err = -EINVAL; goto out; } xfrm_state_insert(t); xfrm_state_hold(t); } x->tunnel = t; atomic_inc(&t->tunnel_users); out: return err; } static int ipcomp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { int err = -EINVAL; x->props.header_len = 0; switch (x->props.mode) { case XFRM_MODE_TRANSPORT: break; case XFRM_MODE_TUNNEL: x->props.header_len += sizeof(struct ipv6hdr); break; default: NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp"); goto out; } err = ipcomp_init_state(x, extack); if (err) goto out; if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) { NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state"); goto out; } } err = 0; out: return err; } static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) { return 0; } static const struct xfrm_type ipcomp6_type = { .owner = THIS_MODULE, .proto = IPPROTO_COMP, .init_state = ipcomp6_init_state, .destructor = ipcomp_destroy, .input = ipcomp_input, .output = ipcomp_output, }; static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, .input_handler = xfrm_input, .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, .priority = 0, }; static int __init ipcomp6_init(void) { if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) { pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); return -EAGAIN; } return 0; } static void __exit ipcomp6_fini(void) { if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0) pr_info("%s: can't remove protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); } module_init(ipcomp6_init); module_exit(ipcomp6_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173"); MODULE_AUTHOR("Mitsuru KANDA <mk@linux-ipv6.org>"); MODULE_ALIAS_XFRM_TYPE(AF_INET6, XFRM_PROTO_COMP);
4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright Gavin Shan, IBM Corporation 2016. */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/of.h> #include <linux/platform_device.h> #include <net/ncsi.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/addrconf.h> #include <net/ipv6.h> #include <net/genetlink.h> #include "internal.h" #include "ncsi-pkt.h" #include "ncsi-netlink.h" LIST_HEAD(ncsi_dev_list); DEFINE_SPINLOCK(ncsi_dev_lock); bool ncsi_channel_has_link(struct ncsi_channel *channel) { return !!(channel->modes[NCSI_MODE_LINK].data[2] & 0x1); } bool ncsi_channel_is_last(struct ncsi_dev_priv *ndp, struct ncsi_channel *channel) { struct ncsi_package *np; struct ncsi_channel *nc; NCSI_FOR_EACH_PACKAGE(ndp, np) NCSI_FOR_EACH_CHANNEL(np, nc) { if (nc == channel) continue; if (nc->state == NCSI_CHANNEL_ACTIVE && ncsi_channel_has_link(nc)) return false; } return true; } static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) { struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; struct ncsi_channel *nc; unsigned long flags; nd->state = ncsi_dev_state_functional; if (force_down) { nd->link_up = 0; goto report; } nd->link_up = 0; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { spin_lock_irqsave(&nc->lock, flags); if (!list_empty(&nc->link) || nc->state != NCSI_CHANNEL_ACTIVE) { spin_unlock_irqrestore(&nc->lock, flags); continue; } if (ncsi_channel_has_link(nc)) { spin_unlock_irqrestore(&nc->lock, flags); nd->link_up = 1; goto report; } spin_unlock_irqrestore(&nc->lock, flags); } } report: nd->handler(nd); } static void ncsi_channel_monitor(struct timer_list *t) { struct ncsi_channel *nc = from_timer(nc, t, monitor.timer); struct ncsi_package *np = nc->package; struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_channel_mode *ncm; struct ncsi_cmd_arg nca; bool enabled, chained; unsigned int monitor_state; unsigned long flags; int state, ret; spin_lock_irqsave(&nc->lock, flags); state = nc->state; chained = !list_empty(&nc->link); enabled = nc->monitor.enabled; monitor_state = nc->monitor.state; spin_unlock_irqrestore(&nc->lock, flags); if (!enabled) return; /* expected race disabling timer */ if (WARN_ON_ONCE(chained)) goto bad_state; if (state != NCSI_CHANNEL_INACTIVE && state != NCSI_CHANNEL_ACTIVE) { bad_state: netdev_warn(ndp->ndev.dev, "Bad NCSI monitor state channel %d 0x%x %s queue\n", nc->id, state, chained ? "on" : "off"); spin_lock_irqsave(&nc->lock, flags); nc->monitor.enabled = false; spin_unlock_irqrestore(&nc->lock, flags); return; } switch (monitor_state) { case NCSI_CHANNEL_MONITOR_START: case NCSI_CHANNEL_MONITOR_RETRY: nca.ndp = ndp; nca.package = np->id; nca.channel = nc->id; nca.type = NCSI_PKT_CMD_GLS; nca.req_flags = 0; ret = ncsi_xmit_cmd(&nca); if (ret) netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", ret); break; case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: break; default: netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n", nc->id); ncsi_report_link(ndp, true); ndp->flags |= NCSI_DEV_RESHUFFLE; ncm = &nc->modes[NCSI_MODE_LINK]; spin_lock_irqsave(&nc->lock, flags); nc->monitor.enabled = false; nc->state = NCSI_CHANNEL_INVISIBLE; ncm->data[2] &= ~0x1; spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); nc->state = NCSI_CHANNEL_ACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); ncsi_process_next_channel(ndp); return; } spin_lock_irqsave(&nc->lock, flags); nc->monitor.state++; spin_unlock_irqrestore(&nc->lock, flags); mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_start_channel_monitor(struct ncsi_channel *nc) { unsigned long flags; spin_lock_irqsave(&nc->lock, flags); WARN_ON_ONCE(nc->monitor.enabled); nc->monitor.enabled = true; nc->monitor.state = NCSI_CHANNEL_MONITOR_START; spin_unlock_irqrestore(&nc->lock, flags); mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_stop_channel_monitor(struct ncsi_channel *nc) { unsigned long flags; spin_lock_irqsave(&nc->lock, flags); if (!nc->monitor.enabled) { spin_unlock_irqrestore(&nc->lock, flags); return; } nc->monitor.enabled = false; spin_unlock_irqrestore(&nc->lock, flags); del_timer_sync(&nc->monitor.timer); } struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, unsigned char id) { struct ncsi_channel *nc; NCSI_FOR_EACH_CHANNEL(np, nc) { if (nc->id == id) return nc; } return NULL; } struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id) { struct ncsi_channel *nc, *tmp; int index; unsigned long flags; nc = kzalloc(sizeof(*nc), GFP_ATOMIC); if (!nc) return NULL; nc->id = id; nc->package = np; nc->state = NCSI_CHANNEL_INACTIVE; nc->monitor.enabled = false; timer_setup(&nc->monitor.timer, ncsi_channel_monitor, 0); spin_lock_init(&nc->lock); INIT_LIST_HEAD(&nc->link); for (index = 0; index < NCSI_CAP_MAX; index++) nc->caps[index].index = index; for (index = 0; index < NCSI_MODE_MAX; index++) nc->modes[index].index = index; spin_lock_irqsave(&np->lock, flags); tmp = ncsi_find_channel(np, id); if (tmp) { spin_unlock_irqrestore(&np->lock, flags); kfree(nc); return tmp; } list_add_tail_rcu(&nc->node, &np->channels); np->channel_num++; spin_unlock_irqrestore(&np->lock, flags); return nc; } static void ncsi_remove_channel(struct ncsi_channel *nc) { struct ncsi_package *np = nc->package; unsigned long flags; spin_lock_irqsave(&nc->lock, flags); /* Release filters */ kfree(nc->mac_filter.addrs); kfree(nc->vlan_filter.vids); nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); ncsi_stop_channel_monitor(nc); /* Remove and free channel */ spin_lock_irqsave(&np->lock, flags); list_del_rcu(&nc->node); np->channel_num--; spin_unlock_irqrestore(&np->lock, flags); kfree(nc); } struct ncsi_package *ncsi_find_package(struct ncsi_dev_priv *ndp, unsigned char id) { struct ncsi_package *np; NCSI_FOR_EACH_PACKAGE(ndp, np) { if (np->id == id) return np; } return NULL; } struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp, unsigned char id) { struct ncsi_package *np, *tmp; unsigned long flags; np = kzalloc(sizeof(*np), GFP_ATOMIC); if (!np) return NULL; np->id = id; np->ndp = ndp; spin_lock_init(&np->lock); INIT_LIST_HEAD(&np->channels); np->channel_whitelist = UINT_MAX; spin_lock_irqsave(&ndp->lock, flags); tmp = ncsi_find_package(ndp, id); if (tmp) { spin_unlock_irqrestore(&ndp->lock, flags); kfree(np); return tmp; } list_add_tail_rcu(&np->node, &ndp->packages); ndp->package_num++; spin_unlock_irqrestore(&ndp->lock, flags); return np; } void ncsi_remove_package(struct ncsi_package *np) { struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_channel *nc, *tmp; unsigned long flags; /* Release all child channels */ list_for_each_entry_safe(nc, tmp, &np->channels, node) ncsi_remove_channel(nc); /* Remove and free package */ spin_lock_irqsave(&ndp->lock, flags); list_del_rcu(&np->node); ndp->package_num--; spin_unlock_irqrestore(&ndp->lock, flags); kfree(np); } void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, unsigned char id, struct ncsi_package **np, struct ncsi_channel **nc) { struct ncsi_package *p; struct ncsi_channel *c; p = ncsi_find_package(ndp, NCSI_PACKAGE_INDEX(id)); c = p ? ncsi_find_channel(p, NCSI_CHANNEL_INDEX(id)) : NULL; if (np) *np = p; if (nc) *nc = c; } /* For two consecutive NCSI commands, the packet IDs shouldn't * be same. Otherwise, the bogus response might be replied. So * the available IDs are allocated in round-robin fashion. */ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, unsigned int req_flags) { struct ncsi_request *nr = NULL; int i, limit = ARRAY_SIZE(ndp->requests); unsigned long flags; /* Check if there is one available request until the ceiling */ spin_lock_irqsave(&ndp->lock, flags); for (i = ndp->request_id; i < limit; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; nr->flags = req_flags; ndp->request_id = i + 1; goto found; } /* Fail back to check from the starting cursor */ for (i = NCSI_REQ_START_IDX; i < ndp->request_id; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; nr->flags = req_flags; ndp->request_id = i + 1; goto found; } found: spin_unlock_irqrestore(&ndp->lock, flags); return nr; } void ncsi_free_request(struct ncsi_request *nr) { struct ncsi_dev_priv *ndp = nr->ndp; struct sk_buff *cmd, *rsp; unsigned long flags; bool driven; if (nr->enabled) { nr->enabled = false; del_timer_sync(&nr->timer); } spin_lock_irqsave(&ndp->lock, flags); cmd = nr->cmd; rsp = nr->rsp; nr->cmd = NULL; nr->rsp = NULL; nr->used = false; driven = !!(nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN); spin_unlock_irqrestore(&ndp->lock, flags); if (driven && cmd && --ndp->pending_req_num == 0) schedule_work(&ndp->work); /* Release command and response */ consume_skb(cmd); consume_skb(rsp); } struct ncsi_dev *ncsi_find_dev(struct net_device *dev) { struct ncsi_dev_priv *ndp; NCSI_FOR_EACH_DEV(ndp) { if (ndp->ndev.dev == dev) return &ndp->ndev; } return NULL; } static void ncsi_request_timeout(struct timer_list *t) { struct ncsi_request *nr = from_timer(nr, t, timer); struct ncsi_dev_priv *ndp = nr->ndp; struct ncsi_cmd_pkt *cmd; struct ncsi_package *np; struct ncsi_channel *nc; unsigned long flags; /* If the request already had associated response, * let the response handler to release it. */ spin_lock_irqsave(&ndp->lock, flags); nr->enabled = false; if (nr->rsp || !nr->cmd) { spin_unlock_irqrestore(&ndp->lock, flags); return; } spin_unlock_irqrestore(&ndp->lock, flags); if (nr->flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) { if (nr->cmd) { /* Find the package */ cmd = (struct ncsi_cmd_pkt *) skb_network_header(nr->cmd); ncsi_find_package_and_channel(ndp, cmd->cmd.common.channel, &np, &nc); ncsi_send_netlink_timeout(nr, np, nc); } } /* Release the request */ ncsi_free_request(nr); } static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) { struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; struct ncsi_channel *nc, *tmp; struct ncsi_cmd_arg nca; unsigned long flags; int ret; np = ndp->active_package; nc = ndp->active_channel; nca.ndp = ndp; nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_suspend: nd->state = ncsi_dev_state_suspend_select; fallthrough; case ncsi_dev_state_suspend_select: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_SP; nca.package = np->id; nca.channel = NCSI_RESERVED_CHANNEL; if (ndp->flags & NCSI_DEV_HWA) nca.bytes[0] = 0; else nca.bytes[0] = 1; /* To retrieve the last link states of channels in current * package when current active channel needs fail over to * another one. It means we will possibly select another * channel as next active one. The link states of channels * are most important factor of the selection. So we need * accurate link states. Unfortunately, the link states on * inactive channels can't be updated with LSC AEN in time. */ if (ndp->flags & NCSI_DEV_RESHUFFLE) nd->state = ncsi_dev_state_suspend_gls; else nd->state = ncsi_dev_state_suspend_dcnt; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; break; case ncsi_dev_state_suspend_gls: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_GLS; nca.package = np->id; nca.channel = ndp->channel_probe_id; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; ndp->channel_probe_id++; if (ndp->channel_probe_id == ndp->channel_count) { ndp->channel_probe_id = 0; nd->state = ncsi_dev_state_suspend_dcnt; } break; case ncsi_dev_state_suspend_dcnt: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_DCNT; nca.package = np->id; nca.channel = nc->id; nd->state = ncsi_dev_state_suspend_dc; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; break; case ncsi_dev_state_suspend_dc: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_DC; nca.package = np->id; nca.channel = nc->id; nca.bytes[0] = 1; nd->state = ncsi_dev_state_suspend_deselect; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; NCSI_FOR_EACH_CHANNEL(np, tmp) { /* If there is another channel active on this package * do not deselect the package. */ if (tmp != nc && tmp->state == NCSI_CHANNEL_ACTIVE) { nd->state = ncsi_dev_state_suspend_done; break; } } break; case ncsi_dev_state_suspend_deselect: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_DP; nca.package = np->id; nca.channel = NCSI_RESERVED_CHANNEL; nd->state = ncsi_dev_state_suspend_done; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; break; case ncsi_dev_state_suspend_done: spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); if (ndp->flags & NCSI_DEV_RESET) ncsi_reset_dev(nd); else ncsi_process_next_channel(ndp); break; default: netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n", nd->state); } return; error: nd->state = ncsi_dev_state_functional; } /* Check the VLAN filter bitmap for a set filter, and construct a * "Set VLAN Filter - Disable" packet if found. */ static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, struct ncsi_cmd_arg *nca) { struct ncsi_channel_vlan_filter *ncf; unsigned long flags; void *bitmap; int index; u16 vid; ncf = &nc->vlan_filter; bitmap = &ncf->bitmap; spin_lock_irqsave(&nc->lock, flags); index = find_first_bit(bitmap, ncf->n_vids); if (index >= ncf->n_vids) { spin_unlock_irqrestore(&nc->lock, flags); return -1; } vid = ncf->vids[index]; clear_bit(index, bitmap); ncf->vids[index] = 0; spin_unlock_irqrestore(&nc->lock, flags); nca->type = NCSI_PKT_CMD_SVF; nca->words[1] = vid; /* HW filter index starts at 1 */ nca->bytes[6] = index + 1; nca->bytes[7] = 0x00; return 0; } /* Find an outstanding VLAN tag and construct a "Set VLAN Filter - Enable" * packet. */ static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, struct ncsi_cmd_arg *nca) { struct ncsi_channel_vlan_filter *ncf; struct vlan_vid *vlan = NULL; unsigned long flags; int i, index; void *bitmap; u16 vid; if (list_empty(&ndp->vlan_vids)) return -1; ncf = &nc->vlan_filter; bitmap = &ncf->bitmap; spin_lock_irqsave(&nc->lock, flags); rcu_read_lock(); list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { vid = vlan->vid; for (i = 0; i < ncf->n_vids; i++) if (ncf->vids[i] == vid) { vid = 0; break; } if (vid) break; } rcu_read_unlock(); if (!vid) { /* No VLAN ID is not set */ spin_unlock_irqrestore(&nc->lock, flags); return -1; } index = find_first_zero_bit(bitmap, ncf->n_vids); if (index < 0 || index >= ncf->n_vids) { netdev_err(ndp->ndev.dev, "Channel %u already has all VLAN filters set\n", nc->id); spin_unlock_irqrestore(&nc->lock, flags); return -1; } ncf->vids[index] = vid; set_bit(index, bitmap); spin_unlock_irqrestore(&nc->lock, flags); nca->type = NCSI_PKT_CMD_SVF; nca->words[1] = vid; /* HW filter index starts at 1 */ nca->bytes[6] = index + 1; nca->bytes[7] = 0x01; return 0; } static int ncsi_oem_keep_phy_intel(struct ncsi_cmd_arg *nca) { unsigned char data[NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN]; int ret = 0; nca->payload = NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN; memset(data, 0, NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN); *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID); data[4] = NCSI_OEM_INTEL_CMD_KEEP_PHY; /* PHY Link up attribute */ data[6] = 0x1; nca->data = data; ret = ncsi_xmit_cmd(nca); if (ret) netdev_err(nca->ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during configure\n", nca->type); return ret; } /* NCSI OEM Command APIs */ static int ncsi_oem_gma_handler_bcm(struct ncsi_cmd_arg *nca) { unsigned char data[NCSI_OEM_BCM_CMD_GMA_LEN]; int ret = 0; nca->payload = NCSI_OEM_BCM_CMD_GMA_LEN; memset(data, 0, NCSI_OEM_BCM_CMD_GMA_LEN); *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_BCM_ID); data[5] = NCSI_OEM_BCM_CMD_GMA; nca->data = data; ret = ncsi_xmit_cmd(nca); if (ret) netdev_err(nca->ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during configure\n", nca->type); return ret; } static int ncsi_oem_gma_handler_mlx(struct ncsi_cmd_arg *nca) { union { u8 data_u8[NCSI_OEM_MLX_CMD_GMA_LEN]; u32 data_u32[NCSI_OEM_MLX_CMD_GMA_LEN / sizeof(u32)]; } u; int ret = 0; nca->payload = NCSI_OEM_MLX_CMD_GMA_LEN; memset(&u, 0, sizeof(u)); u.data_u32[0] = ntohl((__force __be32)NCSI_OEM_MFR_MLX_ID); u.data_u8[5] = NCSI_OEM_MLX_CMD_GMA; u.data_u8[6] = NCSI_OEM_MLX_CMD_GMA_PARAM; nca->data = u.data_u8; ret = ncsi_xmit_cmd(nca); if (ret) netdev_err(nca->ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during configure\n", nca->type); return ret; } static int ncsi_oem_smaf_mlx(struct ncsi_cmd_arg *nca) { union { u8 data_u8[NCSI_OEM_MLX_CMD_SMAF_LEN]; u32 data_u32[NCSI_OEM_MLX_CMD_SMAF_LEN / sizeof(u32)]; } u; int ret = 0; memset(&u, 0, sizeof(u)); u.data_u32[0] = ntohl((__force __be32)NCSI_OEM_MFR_MLX_ID); u.data_u8[5] = NCSI_OEM_MLX_CMD_SMAF; u.data_u8[6] = NCSI_OEM_MLX_CMD_SMAF_PARAM; memcpy(&u.data_u8[MLX_SMAF_MAC_ADDR_OFFSET], nca->ndp->ndev.dev->dev_addr, ETH_ALEN); u.data_u8[MLX_SMAF_MED_SUPPORT_OFFSET] = (MLX_MC_RBT_AVL | MLX_MC_RBT_SUPPORT); nca->payload = NCSI_OEM_MLX_CMD_SMAF_LEN; nca->data = u.data_u8; ret = ncsi_xmit_cmd(nca); if (ret) netdev_err(nca->ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during probe\n", nca->type); return ret; } static int ncsi_oem_gma_handler_intel(struct ncsi_cmd_arg *nca) { unsigned char data[NCSI_OEM_INTEL_CMD_GMA_LEN]; int ret = 0; nca->payload = NCSI_OEM_INTEL_CMD_GMA_LEN; memset(data, 0, NCSI_OEM_INTEL_CMD_GMA_LEN); *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID); data[4] = NCSI_OEM_INTEL_CMD_GMA; nca->data = data; ret = ncsi_xmit_cmd(nca); if (ret) netdev_err(nca->ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during configure\n", nca->type); return ret; } /* OEM Command handlers initialization */ static struct ncsi_oem_gma_handler { unsigned int mfr_id; int (*handler)(struct ncsi_cmd_arg *nca); } ncsi_oem_gma_handlers[] = { { NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm }, { NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx }, { NCSI_OEM_MFR_INTEL_ID, ncsi_oem_gma_handler_intel } }; static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id) { struct ncsi_oem_gma_handler *nch = NULL; int i; /* This function should only be called once, return if flag set */ if (nca->ndp->gma_flag == 1) return -1; /* Find gma handler for given manufacturer id */ for (i = 0; i < ARRAY_SIZE(ncsi_oem_gma_handlers); i++) { if (ncsi_oem_gma_handlers[i].mfr_id == mf_id) { if (ncsi_oem_gma_handlers[i].handler) nch = &ncsi_oem_gma_handlers[i]; break; } } if (!nch) { netdev_err(nca->ndp->ndev.dev, "NCSI: No GMA handler available for MFR-ID (0x%x)\n", mf_id); return -1; } /* Get Mac address from NCSI device */ return nch->handler(nca); } /* Determine if a given channel from the channel_queue should be used for Tx */ static bool ncsi_channel_is_tx(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc) { struct ncsi_channel_mode *ncm; struct ncsi_channel *channel; struct ncsi_package *np; /* Check if any other channel has Tx enabled; a channel may have already * been configured and removed from the channel queue. */ NCSI_FOR_EACH_PACKAGE(ndp, np) { if (!ndp->multi_package && np != nc->package) continue; NCSI_FOR_EACH_CHANNEL(np, channel) { ncm = &channel->modes[NCSI_MODE_TX_ENABLE]; if (ncm->enable) return false; } } /* This channel is the preferred channel and has link */ list_for_each_entry_rcu(channel, &ndp->channel_queue, link) { np = channel->package; if (np->preferred_channel && ncsi_channel_has_link(np->preferred_channel)) { return np->preferred_channel == nc; } } /* This channel has link */ if (ncsi_channel_has_link(nc)) return true; list_for_each_entry_rcu(channel, &ndp->channel_queue, link) if (ncsi_channel_has_link(channel)) return false; /* No other channel has link; default to this one */ return true; } /* Change the active Tx channel in a multi-channel setup */ int ncsi_update_tx_channel(struct ncsi_dev_priv *ndp, struct ncsi_package *package, struct ncsi_channel *disable, struct ncsi_channel *enable) { struct ncsi_cmd_arg nca; struct ncsi_channel *nc; struct ncsi_package *np; int ret = 0; if (!package->multi_channel && !ndp->multi_package) netdev_warn(ndp->ndev.dev, "NCSI: Trying to update Tx channel in single-channel mode\n"); nca.ndp = ndp; nca.req_flags = 0; /* Find current channel with Tx enabled */ NCSI_FOR_EACH_PACKAGE(ndp, np) { if (disable) break; if (!ndp->multi_package && np != package) continue; NCSI_FOR_EACH_CHANNEL(np, nc) if (nc->modes[NCSI_MODE_TX_ENABLE].enable) { disable = nc; break; } } /* Find a suitable channel for Tx */ NCSI_FOR_EACH_PACKAGE(ndp, np) { if (enable) break; if (!ndp->multi_package && np != package) continue; if (!(ndp->package_whitelist & (0x1 << np->id))) continue; if (np->preferred_channel && ncsi_channel_has_link(np->preferred_channel)) { enable = np->preferred_channel; break; } NCSI_FOR_EACH_CHANNEL(np, nc) { if (!(np->channel_whitelist & 0x1 << nc->id)) continue; if (nc->state != NCSI_CHANNEL_ACTIVE) continue; if (ncsi_channel_has_link(nc)) { enable = nc; break; } } } if (disable == enable) return -1; if (!enable) return -1; if (disable) { nca.channel = disable->id; nca.package = disable->package->id; nca.type = NCSI_PKT_CMD_DCNT; ret = ncsi_xmit_cmd(&nca); if (ret) netdev_err(ndp->ndev.dev, "Error %d sending DCNT\n", ret); } netdev_info(ndp->ndev.dev, "NCSI: channel %u enables Tx\n", enable->id); nca.channel = enable->id; nca.package = enable->package->id; nca.type = NCSI_PKT_CMD_ECNT; ret = ncsi_xmit_cmd(&nca); if (ret) netdev_err(ndp->ndev.dev, "Error %d sending ECNT\n", ret); return ret; } static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) { struct ncsi_package *np = ndp->active_package; struct ncsi_channel *nc = ndp->active_channel; struct ncsi_channel *hot_nc = NULL; struct ncsi_dev *nd = &ndp->ndev; struct net_device *dev = nd->dev; struct ncsi_cmd_arg nca; unsigned char index; unsigned long flags; int ret; nca.ndp = ndp; nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_config: case ncsi_dev_state_config_sp: ndp->pending_req_num = 1; /* Select the specific package */ nca.type = NCSI_PKT_CMD_SP; if (ndp->flags & NCSI_DEV_HWA) nca.bytes[0] = 0; else nca.bytes[0] = 1; nca.package = np->id; nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) { netdev_err(ndp->ndev.dev, "NCSI: Failed to transmit CMD_SP\n"); goto error; } nd->state = ncsi_dev_state_config_cis; break; case ncsi_dev_state_config_cis: ndp->pending_req_num = 1; /* Clear initial state */ nca.type = NCSI_PKT_CMD_CIS; nca.package = np->id; nca.channel = nc->id; ret = ncsi_xmit_cmd(&nca); if (ret) { netdev_err(ndp->ndev.dev, "NCSI: Failed to transmit CMD_CIS\n"); goto error; } nd->state = IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) ? ncsi_dev_state_config_oem_gma : ncsi_dev_state_config_clear_vids; break; case ncsi_dev_state_config_oem_gma: nd->state = ncsi_dev_state_config_clear_vids; nca.package = np->id; nca.channel = nc->id; ndp->pending_req_num = 1; if (nc->version.major >= 1 && nc->version.minor >= 2) { nca.type = NCSI_PKT_CMD_GMCMA; ret = ncsi_xmit_cmd(&nca); } else { nca.type = NCSI_PKT_CMD_OEM; ret = ncsi_gma_handler(&nca, nc->version.mf_id); } if (ret < 0) schedule_work(&ndp->work); break; case ncsi_dev_state_config_clear_vids: case ncsi_dev_state_config_svf: case ncsi_dev_state_config_ev: case ncsi_dev_state_config_sma: case ncsi_dev_state_config_ebf: case ncsi_dev_state_config_dgmf: case ncsi_dev_state_config_ecnt: case ncsi_dev_state_config_ec: case ncsi_dev_state_config_ae: case ncsi_dev_state_config_gls: ndp->pending_req_num = 1; nca.package = np->id; nca.channel = nc->id; /* Clear any active filters on the channel before setting */ if (nd->state == ncsi_dev_state_config_clear_vids) { ret = clear_one_vid(ndp, nc, &nca); if (ret) { nd->state = ncsi_dev_state_config_svf; schedule_work(&ndp->work); break; } /* Repeat */ nd->state = ncsi_dev_state_config_clear_vids; /* Add known VLAN tags to the filter */ } else if (nd->state == ncsi_dev_state_config_svf) { ret = set_one_vid(ndp, nc, &nca); if (ret) { nd->state = ncsi_dev_state_config_ev; schedule_work(&ndp->work); break; } /* Repeat */ nd->state = ncsi_dev_state_config_svf; /* Enable/Disable the VLAN filter */ } else if (nd->state == ncsi_dev_state_config_ev) { if (list_empty(&ndp->vlan_vids)) { nca.type = NCSI_PKT_CMD_DV; } else { nca.type = NCSI_PKT_CMD_EV; nca.bytes[3] = NCSI_CAP_VLAN_NO; } nd->state = ncsi_dev_state_config_sma; } else if (nd->state == ncsi_dev_state_config_sma) { /* Use first entry in unicast filter table. Note that * the MAC filter table starts from entry 1 instead of * 0. */ nca.type = NCSI_PKT_CMD_SMA; for (index = 0; index < 6; index++) nca.bytes[index] = dev->dev_addr[index]; nca.bytes[6] = 0x1; nca.bytes[7] = 0x1; nd->state = ncsi_dev_state_config_ebf; } else if (nd->state == ncsi_dev_state_config_ebf) { nca.type = NCSI_PKT_CMD_EBF; nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap; /* if multicast global filtering is supported then * disable it so that all multicast packet will be * forwarded to management controller */ if (nc->caps[NCSI_CAP_GENERIC].cap & NCSI_CAP_GENERIC_MC) nd->state = ncsi_dev_state_config_dgmf; else if (ncsi_channel_is_tx(ndp, nc)) nd->state = ncsi_dev_state_config_ecnt; else nd->state = ncsi_dev_state_config_ec; } else if (nd->state == ncsi_dev_state_config_dgmf) { nca.type = NCSI_PKT_CMD_DGMF; if (ncsi_channel_is_tx(ndp, nc)) nd->state = ncsi_dev_state_config_ecnt; else nd->state = ncsi_dev_state_config_ec; } else if (nd->state == ncsi_dev_state_config_ecnt) { if (np->preferred_channel && nc != np->preferred_channel) netdev_info(ndp->ndev.dev, "NCSI: Tx failed over to channel %u\n", nc->id); nca.type = NCSI_PKT_CMD_ECNT; nd->state = ncsi_dev_state_config_ec; } else if (nd->state == ncsi_dev_state_config_ec) { /* Enable AEN if it's supported */ nca.type = NCSI_PKT_CMD_EC; nd->state = ncsi_dev_state_config_ae; if (!(nc->caps[NCSI_CAP_AEN].cap & NCSI_CAP_AEN_MASK)) nd->state = ncsi_dev_state_config_gls; } else if (nd->state == ncsi_dev_state_config_ae) { nca.type = NCSI_PKT_CMD_AE; nca.bytes[0] = 0; nca.dwords[1] = nc->caps[NCSI_CAP_AEN].cap; nd->state = ncsi_dev_state_config_gls; } else if (nd->state == ncsi_dev_state_config_gls) { nca.type = NCSI_PKT_CMD_GLS; nd->state = ncsi_dev_state_config_done; } ret = ncsi_xmit_cmd(&nca); if (ret) { netdev_err(ndp->ndev.dev, "NCSI: Failed to transmit CMD %x\n", nca.type); goto error; } break; case ncsi_dev_state_config_done: netdev_dbg(ndp->ndev.dev, "NCSI: channel %u config done\n", nc->id); spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_ACTIVE; if (ndp->flags & NCSI_DEV_RESET) { /* A reset event happened during config, start it now */ nc->reconfigure_needed = false; spin_unlock_irqrestore(&nc->lock, flags); ncsi_reset_dev(nd); break; } if (nc->reconfigure_needed) { /* This channel's configuration has been updated * part-way during the config state - start the * channel configuration over */ nc->reconfigure_needed = false; nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); netdev_dbg(dev, "Dirty NCSI channel state reset\n"); ncsi_process_next_channel(ndp); break; } if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { hot_nc = nc; } else { hot_nc = NULL; netdev_dbg(ndp->ndev.dev, "NCSI: channel %u link down after config\n", nc->id); } spin_unlock_irqrestore(&nc->lock, flags); /* Update the hot channel */ spin_lock_irqsave(&ndp->lock, flags); ndp->hot_channel = hot_nc; spin_unlock_irqrestore(&ndp->lock, flags); ncsi_start_channel_monitor(nc); ncsi_process_next_channel(ndp); break; default: netdev_alert(dev, "Wrong NCSI state 0x%x in config\n", nd->state); } return; error: ncsi_report_link(ndp, true); } static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) { struct ncsi_channel *nc, *found, *hot_nc; struct ncsi_channel_mode *ncm; unsigned long flags, cflags; struct ncsi_package *np; bool with_link; spin_lock_irqsave(&ndp->lock, flags); hot_nc = ndp->hot_channel; spin_unlock_irqrestore(&ndp->lock, flags); /* By default the search is done once an inactive channel with up * link is found, unless a preferred channel is set. * If multi_package or multi_channel are configured all channels in the * whitelist are added to the channel queue. */ found = NULL; with_link = false; NCSI_FOR_EACH_PACKAGE(ndp, np) { if (!(ndp->package_whitelist & (0x1 << np->id))) continue; NCSI_FOR_EACH_CHANNEL(np, nc) { if (!(np->channel_whitelist & (0x1 << nc->id))) continue; spin_lock_irqsave(&nc->lock, cflags); if (!list_empty(&nc->link) || nc->state != NCSI_CHANNEL_INACTIVE) { spin_unlock_irqrestore(&nc->lock, cflags); continue; } if (!found) found = nc; if (nc == hot_nc) found = nc; ncm = &nc->modes[NCSI_MODE_LINK]; if (ncm->data[2] & 0x1) { found = nc; with_link = true; } /* If multi_channel is enabled configure all valid * channels whether or not they currently have link * so they will have AENs enabled. */ if (with_link || np->multi_channel) { spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); netdev_dbg(ndp->ndev.dev, "NCSI: Channel %u added to queue (link %s)\n", nc->id, ncm->data[2] & 0x1 ? "up" : "down"); } spin_unlock_irqrestore(&nc->lock, cflags); if (with_link && !np->multi_channel) break; } if (with_link && !ndp->multi_package) break; } if (list_empty(&ndp->channel_queue) && found) { netdev_info(ndp->ndev.dev, "NCSI: No channel with link found, configuring channel %u\n", found->id); spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&found->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); } else if (!found) { netdev_warn(ndp->ndev.dev, "NCSI: No channel found to configure!\n"); ncsi_report_link(ndp, true); return -ENODEV; } return ncsi_process_next_channel(ndp); } static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp) { struct ncsi_package *np; struct ncsi_channel *nc; unsigned int cap; bool has_channel = false; /* The hardware arbitration is disabled if any one channel * doesn't support explicitly. */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { has_channel = true; cap = nc->caps[NCSI_CAP_GENERIC].cap; if (!(cap & NCSI_CAP_GENERIC_HWA) || (cap & NCSI_CAP_GENERIC_HWA_MASK) != NCSI_CAP_GENERIC_HWA_SUPPORT) { ndp->flags &= ~NCSI_DEV_HWA; return false; } } } if (has_channel) { ndp->flags |= NCSI_DEV_HWA; return true; } ndp->flags &= ~NCSI_DEV_HWA; return false; } static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) { struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; struct ncsi_cmd_arg nca; unsigned char index; int ret; nca.ndp = ndp; nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_probe: nd->state = ncsi_dev_state_probe_deselect; fallthrough; case ncsi_dev_state_probe_deselect: ndp->pending_req_num = 8; /* Deselect all possible packages */ nca.type = NCSI_PKT_CMD_DP; nca.channel = NCSI_RESERVED_CHANNEL; for (index = 0; index < 8; index++) { nca.package = index; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; } nd->state = ncsi_dev_state_probe_package; break; case ncsi_dev_state_probe_package: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_SP; nca.bytes[0] = 1; nca.package = ndp->package_probe_id; nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; nd->state = ncsi_dev_state_probe_channel; break; case ncsi_dev_state_probe_channel: ndp->active_package = ncsi_find_package(ndp, ndp->package_probe_id); if (!ndp->active_package) { /* No response */ nd->state = ncsi_dev_state_probe_dp; schedule_work(&ndp->work); break; } nd->state = ncsi_dev_state_probe_cis; if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_GET_MAC) && ndp->mlx_multi_host) nd->state = ncsi_dev_state_probe_mlx_gma; schedule_work(&ndp->work); break; case ncsi_dev_state_probe_mlx_gma: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_OEM; nca.package = ndp->active_package->id; nca.channel = 0; ret = ncsi_oem_gma_handler_mlx(&nca); if (ret) goto error; nd->state = ncsi_dev_state_probe_mlx_smaf; break; case ncsi_dev_state_probe_mlx_smaf: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_OEM; nca.package = ndp->active_package->id; nca.channel = 0; ret = ncsi_oem_smaf_mlx(&nca); if (ret) goto error; nd->state = ncsi_dev_state_probe_cis; break; case ncsi_dev_state_probe_keep_phy: ndp->pending_req_num = 1; nca.type = NCSI_PKT_CMD_OEM; nca.package = ndp->active_package->id; nca.channel = 0; ret = ncsi_oem_keep_phy_intel(&nca); if (ret) goto error; nd->state = ncsi_dev_state_probe_gvi; break; case ncsi_dev_state_probe_cis: case ncsi_dev_state_probe_gvi: case ncsi_dev_state_probe_gc: case ncsi_dev_state_probe_gls: np = ndp->active_package; ndp->pending_req_num = 1; /* Clear initial state Retrieve version, capability or link status */ if (nd->state == ncsi_dev_state_probe_cis) nca.type = NCSI_PKT_CMD_CIS; else if (nd->state == ncsi_dev_state_probe_gvi) nca.type = NCSI_PKT_CMD_GVI; else if (nd->state == ncsi_dev_state_probe_gc) nca.type = NCSI_PKT_CMD_GC; else nca.type = NCSI_PKT_CMD_GLS; nca.package = np->id; nca.channel = ndp->channel_probe_id; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; if (nd->state == ncsi_dev_state_probe_cis) { nd->state = ncsi_dev_state_probe_gvi; if (IS_ENABLED(CONFIG_NCSI_OEM_CMD_KEEP_PHY) && ndp->channel_probe_id == 0) nd->state = ncsi_dev_state_probe_keep_phy; } else if (nd->state == ncsi_dev_state_probe_gvi) { nd->state = ncsi_dev_state_probe_gc; } else if (nd->state == ncsi_dev_state_probe_gc) { nd->state = ncsi_dev_state_probe_gls; } else { nd->state = ncsi_dev_state_probe_cis; ndp->channel_probe_id++; } if (ndp->channel_probe_id == ndp->channel_count) { ndp->channel_probe_id = 0; nd->state = ncsi_dev_state_probe_dp; } break; case ncsi_dev_state_probe_dp: ndp->pending_req_num = 1; /* Deselect the current package */ nca.type = NCSI_PKT_CMD_DP; nca.package = ndp->package_probe_id; nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; /* Probe next package */ ndp->package_probe_id++; if (ndp->package_probe_id >= 8) { /* Probe finished */ ndp->flags |= NCSI_DEV_PROBED; break; } nd->state = ncsi_dev_state_probe_package; ndp->active_package = NULL; break; default: netdev_warn(nd->dev, "Wrong NCSI state 0x%0x in enumeration\n", nd->state); } if (ndp->flags & NCSI_DEV_PROBED) { /* Check if all packages have HWA support */ ncsi_check_hwa(ndp); ncsi_choose_active_channel(ndp); } return; error: netdev_err(ndp->ndev.dev, "NCSI: Failed to transmit cmd 0x%x during probe\n", nca.type); ncsi_report_link(ndp, true); } static void ncsi_dev_work(struct work_struct *work) { struct ncsi_dev_priv *ndp = container_of(work, struct ncsi_dev_priv, work); struct ncsi_dev *nd = &ndp->ndev; switch (nd->state & ncsi_dev_state_major) { case ncsi_dev_state_probe: ncsi_probe_channel(ndp); break; case ncsi_dev_state_suspend: ncsi_suspend_channel(ndp); break; case ncsi_dev_state_config: ncsi_configure_channel(ndp); break; default: netdev_warn(nd->dev, "Wrong NCSI state 0x%x in workqueue\n", nd->state); } } int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) { struct ncsi_channel *nc; int old_state; unsigned long flags; spin_lock_irqsave(&ndp->lock, flags); nc = list_first_or_null_rcu(&ndp->channel_queue, struct ncsi_channel, link); if (!nc) { spin_unlock_irqrestore(&ndp->lock, flags); goto out; } list_del_init(&nc->link); spin_unlock_irqrestore(&ndp->lock, flags); spin_lock_irqsave(&nc->lock, flags); old_state = nc->state; nc->state = NCSI_CHANNEL_INVISIBLE; spin_unlock_irqrestore(&nc->lock, flags); ndp->active_channel = nc; ndp->active_package = nc->package; switch (old_state) { case NCSI_CHANNEL_INACTIVE: ndp->ndev.state = ncsi_dev_state_config; netdev_dbg(ndp->ndev.dev, "NCSI: configuring channel %u\n", nc->id); ncsi_configure_channel(ndp); break; case NCSI_CHANNEL_ACTIVE: ndp->ndev.state = ncsi_dev_state_suspend; netdev_dbg(ndp->ndev.dev, "NCSI: suspending channel %u\n", nc->id); ncsi_suspend_channel(ndp); break; default: netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n", old_state, nc->package->id, nc->id); ncsi_report_link(ndp, false); return -EINVAL; } return 0; out: ndp->active_channel = NULL; ndp->active_package = NULL; if (ndp->flags & NCSI_DEV_RESHUFFLE) { ndp->flags &= ~NCSI_DEV_RESHUFFLE; return ncsi_choose_active_channel(ndp); } ncsi_report_link(ndp, false); return -ENODEV; } static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) { struct ncsi_dev *nd = &ndp->ndev; struct ncsi_channel *nc; struct ncsi_package *np; unsigned long flags; unsigned int n = 0; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { spin_lock_irqsave(&nc->lock, flags); /* Channels may be busy, mark dirty instead of * kicking if; * a) not ACTIVE (configured) * b) in the channel_queue (to be configured) * c) it's ndev is in the config state */ if (nc->state != NCSI_CHANNEL_ACTIVE) { if ((ndp->ndev.state & 0xff00) == ncsi_dev_state_config || !list_empty(&nc->link)) { netdev_dbg(nd->dev, "NCSI: channel %p marked dirty\n", nc); nc->reconfigure_needed = true; } spin_unlock_irqrestore(&nc->lock, flags); continue; } spin_unlock_irqrestore(&nc->lock, flags); ncsi_stop_channel_monitor(nc); spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INACTIVE; spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); netdev_dbg(nd->dev, "NCSI: kicked channel %p\n", nc); n++; } } return n; } int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) { struct ncsi_dev_priv *ndp; unsigned int n_vids = 0; struct vlan_vid *vlan; struct ncsi_dev *nd; bool found = false; if (vid == 0) return 0; nd = ncsi_find_dev(dev); if (!nd) { netdev_warn(dev, "NCSI: No net_device?\n"); return 0; } ndp = TO_NCSI_DEV_PRIV(nd); /* Add the VLAN id to our internal list */ list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { n_vids++; if (vlan->vid == vid) { netdev_dbg(dev, "NCSI: vid %u already registered\n", vid); return 0; } } if (n_vids >= NCSI_MAX_VLAN_VIDS) { netdev_warn(dev, "tried to add vlan id %u but NCSI max already registered (%u)\n", vid, NCSI_MAX_VLAN_VIDS); return -ENOSPC; } vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); if (!vlan) return -ENOMEM; vlan->proto = proto; vlan->vid = vid; list_add_rcu(&vlan->list, &ndp->vlan_vids); netdev_dbg(dev, "NCSI: Added new vid %u\n", vid); found = ncsi_kick_channels(ndp) != 0; return found ? ncsi_process_next_channel(ndp) : 0; } EXPORT_SYMBOL_GPL(ncsi_vlan_rx_add_vid); int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) { struct vlan_vid *vlan, *tmp; struct ncsi_dev_priv *ndp; struct ncsi_dev *nd; bool found = false; if (vid == 0) return 0; nd = ncsi_find_dev(dev); if (!nd) { netdev_warn(dev, "NCSI: no net_device?\n"); return 0; } ndp = TO_NCSI_DEV_PRIV(nd); /* Remove the VLAN id from our internal list */ list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list) if (vlan->vid == vid) { netdev_dbg(dev, "NCSI: vid %u found, removing\n", vid); list_del_rcu(&vlan->list); found = true; kfree(vlan); } if (!found) { netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid); return -EINVAL; } found = ncsi_kick_channels(ndp) != 0; return found ? ncsi_process_next_channel(ndp) : 0; } EXPORT_SYMBOL_GPL(ncsi_vlan_rx_kill_vid); struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*handler)(struct ncsi_dev *ndev)) { struct ncsi_dev_priv *ndp; struct ncsi_dev *nd; struct platform_device *pdev; struct device_node *np; unsigned long flags; int i; /* Check if the device has been registered or not */ nd = ncsi_find_dev(dev); if (nd) return nd; /* Create NCSI device */ ndp = kzalloc(sizeof(*ndp), GFP_ATOMIC); if (!ndp) return NULL; nd = &ndp->ndev; nd->state = ncsi_dev_state_registered; nd->dev = dev; nd->handler = handler; ndp->pending_req_num = 0; INIT_LIST_HEAD(&ndp->channel_queue); INIT_LIST_HEAD(&ndp->vlan_vids); INIT_WORK(&ndp->work, ncsi_dev_work); ndp->package_whitelist = UINT_MAX; /* Initialize private NCSI device */ spin_lock_init(&ndp->lock); INIT_LIST_HEAD(&ndp->packages); ndp->request_id = NCSI_REQ_START_IDX; for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) { ndp->requests[i].id = i; ndp->requests[i].ndp = ndp; timer_setup(&ndp->requests[i].timer, ncsi_request_timeout, 0); } ndp->channel_count = NCSI_RESERVED_CHANNEL; spin_lock_irqsave(&ncsi_dev_lock, flags); list_add_tail_rcu(&ndp->node, &ncsi_dev_list); spin_unlock_irqrestore(&ncsi_dev_lock, flags); /* Register NCSI packet Rx handler */ ndp->ptype.type = cpu_to_be16(ETH_P_NCSI); ndp->ptype.func = ncsi_rcv_rsp; ndp->ptype.dev = dev; dev_add_pack(&ndp->ptype); pdev = to_platform_device(dev->dev.parent); if (pdev) { np = pdev->dev.of_node; if (np && (of_property_read_bool(np, "mellanox,multi-host") || of_property_read_bool(np, "mlx,multi-host"))) ndp->mlx_multi_host = true; } return nd; } EXPORT_SYMBOL_GPL(ncsi_register_dev); int ncsi_start_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); if (nd->state != ncsi_dev_state_registered && nd->state != ncsi_dev_state_functional) return -ENOTTY; if (!(ndp->flags & NCSI_DEV_PROBED)) { ndp->package_probe_id = 0; ndp->channel_probe_id = 0; nd->state = ncsi_dev_state_probe; schedule_work(&ndp->work); return 0; } return ncsi_reset_dev(nd); } EXPORT_SYMBOL_GPL(ncsi_start_dev); void ncsi_stop_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); struct ncsi_package *np; struct ncsi_channel *nc; bool chained; int old_state; unsigned long flags; /* Stop the channel monitor on any active channels. Don't reset the * channel state so we know which were active when ncsi_start_dev() * is next called. */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { ncsi_stop_channel_monitor(nc); spin_lock_irqsave(&nc->lock, flags); chained = !list_empty(&nc->link); old_state = nc->state; spin_unlock_irqrestore(&nc->lock, flags); WARN_ON_ONCE(chained || old_state == NCSI_CHANNEL_INVISIBLE); } } netdev_dbg(ndp->ndev.dev, "NCSI: Stopping device\n"); ncsi_report_link(ndp, true); } EXPORT_SYMBOL_GPL(ncsi_stop_dev); int ncsi_reset_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); struct ncsi_channel *nc, *active, *tmp; struct ncsi_package *np; unsigned long flags; spin_lock_irqsave(&ndp->lock, flags); if (!(ndp->flags & NCSI_DEV_RESET)) { /* Haven't been called yet, check states */ switch (nd->state & ncsi_dev_state_major) { case ncsi_dev_state_registered: case ncsi_dev_state_probe: /* Not even probed yet - do nothing */ spin_unlock_irqrestore(&ndp->lock, flags); return 0; case ncsi_dev_state_suspend: case ncsi_dev_state_config: /* Wait for the channel to finish its suspend/config * operation; once it finishes it will check for * NCSI_DEV_RESET and reset the state. */ ndp->flags |= NCSI_DEV_RESET; spin_unlock_irqrestore(&ndp->lock, flags); return 0; } } else { switch (nd->state) { case ncsi_dev_state_suspend_done: case ncsi_dev_state_config_done: case ncsi_dev_state_functional: /* Ok */ break; default: /* Current reset operation happening */ spin_unlock_irqrestore(&ndp->lock, flags); return 0; } } if (!list_empty(&ndp->channel_queue)) { /* Clear any channel queue we may have interrupted */ list_for_each_entry_safe(nc, tmp, &ndp->channel_queue, link) list_del_init(&nc->link); } spin_unlock_irqrestore(&ndp->lock, flags); active = NULL; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { spin_lock_irqsave(&nc->lock, flags); if (nc->state == NCSI_CHANNEL_ACTIVE) { active = nc; nc->state = NCSI_CHANNEL_INVISIBLE; spin_unlock_irqrestore(&nc->lock, flags); ncsi_stop_channel_monitor(nc); break; } spin_unlock_irqrestore(&nc->lock, flags); } if (active) break; } if (!active) { /* Done */ spin_lock_irqsave(&ndp->lock, flags); ndp->flags &= ~NCSI_DEV_RESET; spin_unlock_irqrestore(&ndp->lock, flags); return ncsi_choose_active_channel(ndp); } spin_lock_irqsave(&ndp->lock, flags); ndp->flags |= NCSI_DEV_RESET; ndp->active_channel = active; ndp->active_package = active->package; spin_unlock_irqrestore(&ndp->lock, flags); nd->state = ncsi_dev_state_suspend; schedule_work(&ndp->work); return 0; } void ncsi_unregister_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); struct ncsi_package *np, *tmp; unsigned long flags; dev_remove_pack(&ndp->ptype); list_for_each_entry_safe(np, tmp, &ndp->packages, node) ncsi_remove_package(np); spin_lock_irqsave(&ncsi_dev_lock, flags); list_del_rcu(&ndp->node); spin_unlock_irqrestore(&ncsi_dev_lock, flags); kfree(ndp); } EXPORT_SYMBOL_GPL(ncsi_unregister_dev);
8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 // SPDX-License-Identifier: GPL-2.0-only /* * * Author Karsten Keil <kkeil@novell.com> * * Copyright 2008 by Karsten Keil <kkeil@novell.com> */ #include <linux/slab.h> #include <linux/mISDNif.h> #include <linux/kthread.h> #include <linux/sched.h> #include <linux/sched/cputime.h> #include <linux/signal.h> #include "core.h" static u_int *debug; static inline void _queue_message(struct mISDNstack *st, struct sk_buff *skb) { struct mISDNhead *hh = mISDN_HEAD_P(skb); if (*debug & DEBUG_QUEUE_FUNC) printk(KERN_DEBUG "%s prim(%x) id(%x) %p\n", __func__, hh->prim, hh->id, skb); skb_queue_tail(&st->msgq, skb); if (likely(!test_bit(mISDN_STACK_STOPPED, &st->status))) { test_and_set_bit(mISDN_STACK_WORK, &st->status); wake_up_interruptible(&st->workq); } } static int mISDN_queue_message(struct mISDNchannel *ch, struct sk_buff *skb) { _queue_message(ch->st, skb); return 0; } static struct mISDNchannel * get_channel4id(struct mISDNstack *st, u_int id) { struct mISDNchannel *ch; mutex_lock(&st->lmutex); list_for_each_entry(ch, &st->layer2, list) { if (id == ch->nr) goto unlock; } ch = NULL; unlock: mutex_unlock(&st->lmutex); return ch; } static void send_socklist(struct mISDN_sock_list *sl, struct sk_buff *skb) { struct sock *sk; struct sk_buff *cskb = NULL; read_lock(&sl->lock); sk_for_each(sk, &sl->head) { if (sk->sk_state != MISDN_BOUND) continue; if (!cskb) cskb = skb_copy(skb, GFP_ATOMIC); if (!cskb) { printk(KERN_WARNING "%s no skb\n", __func__); break; } if (!sock_queue_rcv_skb(sk, cskb)) cskb = NULL; } read_unlock(&sl->lock); dev_kfree_skb(cskb); } static void send_layer2(struct mISDNstack *st, struct sk_buff *skb) { struct sk_buff *cskb; struct mISDNhead *hh = mISDN_HEAD_P(skb); struct mISDNchannel *ch; int ret; if (!st) return; mutex_lock(&st->lmutex); if ((hh->id & MISDN_ID_ADDR_MASK) == MISDN_ID_ANY) { /* L2 for all */ list_for_each_entry(ch, &st->layer2, list) { if (list_is_last(&ch->list, &st->layer2)) { cskb = skb; skb = NULL; } else { cskb = skb_copy(skb, GFP_KERNEL); } if (cskb) { ret = ch->send(ch, cskb); if (ret) { if (*debug & DEBUG_SEND_ERR) printk(KERN_DEBUG "%s ch%d prim(%x) addr(%x)" " err %d\n", __func__, ch->nr, hh->prim, ch->addr, ret); dev_kfree_skb(cskb); } } else { printk(KERN_WARNING "%s ch%d addr %x no mem\n", __func__, ch->nr, ch->addr); goto out; } } } else { list_for_each_entry(ch, &st->layer2, list) { if ((hh->id & MISDN_ID_ADDR_MASK) == ch->addr) { ret = ch->send(ch, skb); if (!ret) skb = NULL; goto out; } } ret = st->dev->teimgr->ctrl(st->dev->teimgr, CHECK_DATA, skb); if (!ret) skb = NULL; else if (*debug & DEBUG_SEND_ERR) printk(KERN_DEBUG "%s mgr prim(%x) err %d\n", __func__, hh->prim, ret); } out: mutex_unlock(&st->lmutex); dev_kfree_skb(skb); } static inline int send_msg_to_layer(struct mISDNstack *st, struct sk_buff *skb) { struct mISDNhead *hh = mISDN_HEAD_P(skb); struct mISDNchannel *ch; int lm; lm = hh->prim & MISDN_LAYERMASK; if (*debug & DEBUG_QUEUE_FUNC) printk(KERN_DEBUG "%s prim(%x) id(%x) %p\n", __func__, hh->prim, hh->id, skb); if (lm == 0x1) { if (!hlist_empty(&st->l1sock.head)) { __net_timestamp(skb); send_socklist(&st->l1sock, skb); } return st->layer1->send(st->layer1, skb); } else if (lm == 0x2) { if (!hlist_empty(&st->l1sock.head)) send_socklist(&st->l1sock, skb); send_layer2(st, skb); return 0; } else if (lm == 0x4) { ch = get_channel4id(st, hh->id); if (ch) return ch->send(ch, skb); else printk(KERN_WARNING "%s: dev(%s) prim(%x) id(%x) no channel\n", __func__, dev_name(&st->dev->dev), hh->prim, hh->id); } else if (lm == 0x8) { WARN_ON(lm == 0x8); ch = get_channel4id(st, hh->id); if (ch) return ch->send(ch, skb); else printk(KERN_WARNING "%s: dev(%s) prim(%x) id(%x) no channel\n", __func__, dev_name(&st->dev->dev), hh->prim, hh->id); } else { /* broadcast not handled yet */ printk(KERN_WARNING "%s: dev(%s) prim %x not delivered\n", __func__, dev_name(&st->dev->dev), hh->prim); } return -ESRCH; } static void do_clear_stack(struct mISDNstack *st) { } static int mISDNStackd(void *data) { struct mISDNstack *st = data; #ifdef MISDN_MSG_STATS u64 utime, stime; #endif int err = 0; sigfillset(&current->blocked); if (*debug & DEBUG_MSG_THREAD) printk(KERN_DEBUG "mISDNStackd %s started\n", dev_name(&st->dev->dev)); if (st->notify != NULL) { complete(st->notify); st->notify = NULL; } for (;;) { struct sk_buff *skb; if (unlikely(test_bit(mISDN_STACK_STOPPED, &st->status))) { test_and_clear_bit(mISDN_STACK_WORK, &st->status); test_and_clear_bit(mISDN_STACK_RUNNING, &st->status); } else test_and_set_bit(mISDN_STACK_RUNNING, &st->status); while (test_bit(mISDN_STACK_WORK, &st->status)) { skb = skb_dequeue(&st->msgq); if (!skb) { test_and_clear_bit(mISDN_STACK_WORK, &st->status); /* test if a race happens */ skb = skb_dequeue(&st->msgq); if (!skb) continue; test_and_set_bit(mISDN_STACK_WORK, &st->status); } #ifdef MISDN_MSG_STATS st->msg_cnt++; #endif err = send_msg_to_layer(st, skb); if (unlikely(err)) { if (*debug & DEBUG_SEND_ERR) printk(KERN_DEBUG "%s: %s prim(%x) id(%x) " "send call(%d)\n", __func__, dev_name(&st->dev->dev), mISDN_HEAD_PRIM(skb), mISDN_HEAD_ID(skb), err); dev_kfree_skb(skb); continue; } if (unlikely(test_bit(mISDN_STACK_STOPPED, &st->status))) { test_and_clear_bit(mISDN_STACK_WORK, &st->status); test_and_clear_bit(mISDN_STACK_RUNNING, &st->status); break; } } if (test_bit(mISDN_STACK_CLEARING, &st->status)) { test_and_set_bit(mISDN_STACK_STOPPED, &st->status); test_and_clear_bit(mISDN_STACK_RUNNING, &st->status); do_clear_stack(st); test_and_clear_bit(mISDN_STACK_CLEARING, &st->status); test_and_set_bit(mISDN_STACK_RESTART, &st->status); } if (test_and_clear_bit(mISDN_STACK_RESTART, &st->status)) { test_and_clear_bit(mISDN_STACK_STOPPED, &st->status); test_and_set_bit(mISDN_STACK_RUNNING, &st->status); if (!skb_queue_empty(&st->msgq)) test_and_set_bit(mISDN_STACK_WORK, &st->status); } if (test_bit(mISDN_STACK_ABORT, &st->status)) break; if (st->notify != NULL) { complete(st->notify); st->notify = NULL; } #ifdef MISDN_MSG_STATS st->sleep_cnt++; #endif test_and_clear_bit(mISDN_STACK_ACTIVE, &st->status); wait_event_interruptible(st->workq, (st->status & mISDN_STACK_ACTION_MASK)); if (*debug & DEBUG_MSG_THREAD) printk(KERN_DEBUG "%s: %s wake status %08lx\n", __func__, dev_name(&st->dev->dev), st->status); test_and_set_bit(mISDN_STACK_ACTIVE, &st->status); test_and_clear_bit(mISDN_STACK_WAKEUP, &st->status); if (test_bit(mISDN_STACK_STOPPED, &st->status)) { test_and_clear_bit(mISDN_STACK_RUNNING, &st->status); #ifdef MISDN_MSG_STATS st->stopped_cnt++; #endif } } #ifdef MISDN_MSG_STATS printk(KERN_DEBUG "mISDNStackd daemon for %s proceed %d " "msg %d sleep %d stopped\n", dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt, st->stopped_cnt); task_cputime(st->thread, &utime, &stime); printk(KERN_DEBUG "mISDNStackd daemon for %s utime(%llu) stime(%llu)\n", dev_name(&st->dev->dev), utime, stime); printk(KERN_DEBUG "mISDNStackd daemon for %s nvcsw(%ld) nivcsw(%ld)\n", dev_name(&st->dev->dev), st->thread->nvcsw, st->thread->nivcsw); printk(KERN_DEBUG "mISDNStackd daemon for %s killed now\n", dev_name(&st->dev->dev)); #endif test_and_set_bit(mISDN_STACK_KILLED, &st->status); test_and_clear_bit(mISDN_STACK_RUNNING, &st->status); test_and_clear_bit(mISDN_STACK_ACTIVE, &st->status); test_and_clear_bit(mISDN_STACK_ABORT, &st->status); skb_queue_purge(&st->msgq); st->thread = NULL; if (st->notify != NULL) { complete(st->notify); st->notify = NULL; } return 0; } static int l1_receive(struct mISDNchannel *ch, struct sk_buff *skb) { if (!ch->st) return -ENODEV; __net_timestamp(skb); _queue_message(ch->st, skb); return 0; } void set_channel_address(struct mISDNchannel *ch, u_int sapi, u_int tei) { ch->addr = sapi | (tei << 8); } void __add_layer2(struct mISDNchannel *ch, struct mISDNstack *st) { list_add_tail(&ch->list, &st->layer2); } void add_layer2(struct mISDNchannel *ch, struct mISDNstack *st) { mutex_lock(&st->lmutex); __add_layer2(ch, st); mutex_unlock(&st->lmutex); } static int st_own_ctrl(struct mISDNchannel *ch, u_int cmd, void *arg) { if (!ch->st || !ch->st->layer1) return -EINVAL; return ch->st->layer1->ctrl(ch->st->layer1, cmd, arg); } int create_stack(struct mISDNdevice *dev) { struct mISDNstack *newst; int err; DECLARE_COMPLETION_ONSTACK(done); newst = kzalloc(sizeof(struct mISDNstack), GFP_KERNEL); if (!newst) { printk(KERN_ERR "kmalloc mISDN_stack failed\n"); return -ENOMEM; } newst->dev = dev; INIT_LIST_HEAD(&newst->layer2); INIT_HLIST_HEAD(&newst->l1sock.head); rwlock_init(&newst->l1sock.lock); init_waitqueue_head(&newst->workq); skb_queue_head_init(&newst->msgq); mutex_init(&newst->lmutex); dev->D.st = newst; err = create_teimanager(dev); if (err) { printk(KERN_ERR "kmalloc teimanager failed\n"); kfree(newst); return err; } dev->teimgr->peer = &newst->own; dev->teimgr->recv = mISDN_queue_message; dev->teimgr->st = newst; newst->layer1 = &dev->D; dev->D.recv = l1_receive; dev->D.peer = &newst->own; newst->own.st = newst; newst->own.ctrl = st_own_ctrl; newst->own.send = mISDN_queue_message; newst->own.recv = mISDN_queue_message; if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: st(%s)\n", __func__, dev_name(&newst->dev->dev)); newst->notify = &done; newst->thread = kthread_run(mISDNStackd, (void *)newst, "mISDN_%s", dev_name(&newst->dev->dev)); if (IS_ERR(newst->thread)) { err = PTR_ERR(newst->thread); printk(KERN_ERR "mISDN:cannot create kernel thread for %s (%d)\n", dev_name(&newst->dev->dev), err); delete_teimanager(dev->teimgr); kfree(newst); } else wait_for_completion(&done); return err; } int connect_layer1(struct mISDNdevice *dev, struct mISDNchannel *ch, u_int protocol, struct sockaddr_mISDN *adr) { struct mISDN_sock *msk = container_of(ch, struct mISDN_sock, ch); struct channel_req rq; int err; if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n", __func__, dev_name(&dev->dev), protocol, adr->dev, adr->channel, adr->sapi, adr->tei); switch (protocol) { case ISDN_P_NT_S0: case ISDN_P_NT_E1: case ISDN_P_TE_S0: case ISDN_P_TE_E1: ch->recv = mISDN_queue_message; ch->peer = &dev->D.st->own; ch->st = dev->D.st; rq.protocol = protocol; rq.adr.channel = adr->channel; err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq); printk(KERN_DEBUG "%s: ret %d (dev %d)\n", __func__, err, dev->id); if (err) return err; write_lock_bh(&dev->D.st->l1sock.lock); sk_add_node(&msk->sk, &dev->D.st->l1sock.head); write_unlock_bh(&dev->D.st->l1sock.lock); break; default: return -ENOPROTOOPT; } return 0; } int connect_Bstack(struct mISDNdevice *dev, struct mISDNchannel *ch, u_int protocol, struct sockaddr_mISDN *adr) { struct channel_req rq, rq2; int pmask, err; struct Bprotocol *bp; if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n", __func__, dev_name(&dev->dev), protocol, adr->dev, adr->channel, adr->sapi, adr->tei); ch->st = dev->D.st; pmask = 1 << (protocol & ISDN_P_B_MASK); if (pmask & dev->Bprotocols) { rq.protocol = protocol; rq.adr = *adr; err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq); if (err) return err; ch->recv = rq.ch->send; ch->peer = rq.ch; rq.ch->recv = ch->send; rq.ch->peer = ch; rq.ch->st = dev->D.st; } else { bp = get_Bprotocol4mask(pmask); if (!bp) return -ENOPROTOOPT; rq2.protocol = protocol; rq2.adr = *adr; rq2.ch = ch; err = bp->create(&rq2); if (err) return err; ch->recv = rq2.ch->send; ch->peer = rq2.ch; rq2.ch->st = dev->D.st; rq.protocol = rq2.protocol; rq.adr = *adr; err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq); if (err) { rq2.ch->ctrl(rq2.ch, CLOSE_CHANNEL, NULL); return err; } rq2.ch->recv = rq.ch->send; rq2.ch->peer = rq.ch; rq.ch->recv = rq2.ch->send; rq.ch->peer = rq2.ch; rq.ch->st = dev->D.st; } ch->protocol = protocol; ch->nr = rq.ch->nr; return 0; } int create_l2entity(struct mISDNdevice *dev, struct mISDNchannel *ch, u_int protocol, struct sockaddr_mISDN *adr) { struct channel_req rq; int err; if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: %s proto(%x) adr(%d %d %d %d)\n", __func__, dev_name(&dev->dev), protocol, adr->dev, adr->channel, adr->sapi, adr->tei); rq.protocol = ISDN_P_TE_S0; if (dev->Dprotocols & (1 << ISDN_P_TE_E1)) rq.protocol = ISDN_P_TE_E1; switch (protocol) { case ISDN_P_LAPD_NT: rq.protocol = ISDN_P_NT_S0; if (dev->Dprotocols & (1 << ISDN_P_NT_E1)) rq.protocol = ISDN_P_NT_E1; fallthrough; case ISDN_P_LAPD_TE: ch->recv = mISDN_queue_message; ch->peer = &dev->D.st->own; ch->st = dev->D.st; rq.adr.channel = 0; err = dev->D.ctrl(&dev->D, OPEN_CHANNEL, &rq); printk(KERN_DEBUG "%s: ret 1 %d\n", __func__, err); if (err) break; rq.protocol = protocol; rq.adr = *adr; rq.ch = ch; err = dev->teimgr->ctrl(dev->teimgr, OPEN_CHANNEL, &rq); printk(KERN_DEBUG "%s: ret 2 %d\n", __func__, err); if (!err) { if ((protocol == ISDN_P_LAPD_NT) && !rq.ch) break; add_layer2(rq.ch, dev->D.st); rq.ch->recv = mISDN_queue_message; rq.ch->peer = &dev->D.st->own; rq.ch->ctrl(rq.ch, OPEN_CHANNEL, NULL); /* can't fail */ } break; default: err = -EPROTONOSUPPORT; } return err; } void delete_channel(struct mISDNchannel *ch) { struct mISDN_sock *msk = container_of(ch, struct mISDN_sock, ch); struct mISDNchannel *pch; if (!ch->st) { printk(KERN_WARNING "%s: no stack\n", __func__); return; } if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: st(%s) protocol(%x)\n", __func__, dev_name(&ch->st->dev->dev), ch->protocol); if (ch->protocol >= ISDN_P_B_START) { if (ch->peer) { ch->peer->ctrl(ch->peer, CLOSE_CHANNEL, NULL); ch->peer = NULL; } return; } switch (ch->protocol) { case ISDN_P_NT_S0: case ISDN_P_TE_S0: case ISDN_P_NT_E1: case ISDN_P_TE_E1: write_lock_bh(&ch->st->l1sock.lock); sk_del_node_init(&msk->sk); write_unlock_bh(&ch->st->l1sock.lock); ch->st->dev->D.ctrl(&ch->st->dev->D, CLOSE_CHANNEL, NULL); break; case ISDN_P_LAPD_TE: pch = get_channel4id(ch->st, ch->nr); if (pch) { mutex_lock(&ch->st->lmutex); list_del(&pch->list); mutex_unlock(&ch->st->lmutex); pch->ctrl(pch, CLOSE_CHANNEL, NULL); pch = ch->st->dev->teimgr; pch->ctrl(pch, CLOSE_CHANNEL, NULL); } else printk(KERN_WARNING "%s: no l2 channel\n", __func__); break; case ISDN_P_LAPD_NT: pch = ch->st->dev->teimgr; if (pch) { pch->ctrl(pch, CLOSE_CHANNEL, NULL); } else printk(KERN_WARNING "%s: no l2 channel\n", __func__); break; default: break; } return; } void delete_stack(struct mISDNdevice *dev) { struct mISDNstack *st = dev->D.st; DECLARE_COMPLETION_ONSTACK(done); if (*debug & DEBUG_CORE_FUNC) printk(KERN_DEBUG "%s: st(%s)\n", __func__, dev_name(&st->dev->dev)); if (dev->teimgr) delete_teimanager(dev->teimgr); if (st->thread) { if (st->notify) { printk(KERN_WARNING "%s: notifier in use\n", __func__); complete(st->notify); } st->notify = &done; test_and_set_bit(mISDN_STACK_ABORT, &st->status); test_and_set_bit(mISDN_STACK_WAKEUP, &st->status); wake_up_interruptible(&st->workq); wait_for_completion(&done); } if (!list_empty(&st->layer2)) printk(KERN_WARNING "%s: layer2 list not empty\n", __func__); if (!hlist_empty(&st->l1sock.head)) printk(KERN_WARNING "%s: layer1 list not empty\n", __func__); kfree(st); } void mISDN_initstack(u_int *dp) { debug = dp; }
4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PERCPU_COUNTER_H #define _LINUX_PERCPU_COUNTER_H /* * A simple "approximate counter" for use in ext2 and ext3 superblocks. * * WARNING: these things are HUGE. 4 kbytes per counter on 32-way P4. */ #include <linux/spinlock.h> #include <linux/smp.h> #include <linux/list.h> #include <linux/threads.h> #include <linux/percpu.h> #include <linux/types.h> /* percpu_counter batch for local add or sub */ #define PERCPU_COUNTER_LOCAL_BATCH INT_MAX #ifdef CONFIG_SMP struct percpu_counter { raw_spinlock_t lock; s64 count; #ifdef CONFIG_HOTPLUG_CPU struct list_head list; /* All percpu_counters are on a list */ #endif s32 __percpu *counters; }; extern int percpu_counter_batch; int __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp, u32 nr_counters, struct lock_class_key *key); #define percpu_counter_init_many(fbc, value, gfp, nr_counters) \ ({ \ static struct lock_class_key __key; \ \ __percpu_counter_init_many(fbc, value, gfp, nr_counters,\ &__key); \ }) #define percpu_counter_init(fbc, value, gfp) \ percpu_counter_init_many(fbc, value, gfp, 1) void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters); static inline void percpu_counter_destroy(struct percpu_counter *fbc) { percpu_counter_destroy_many(fbc, 1); } void percpu_counter_set(struct percpu_counter *fbc, s64 amount); void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch); s64 __percpu_counter_sum(struct percpu_counter *fbc); int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch); bool __percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount, s32 batch); void percpu_counter_sync(struct percpu_counter *fbc); static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) { return __percpu_counter_compare(fbc, rhs, percpu_counter_batch); } static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { percpu_counter_add_batch(fbc, amount, percpu_counter_batch); } static inline bool percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount) { return __percpu_counter_limited_add(fbc, limit, amount, percpu_counter_batch); } /* * With percpu_counter_add_local() and percpu_counter_sub_local(), counts * are accumulated in local per cpu counter and not in fbc->count until * local count overflows PERCPU_COUNTER_LOCAL_BATCH. This makes counter * write efficient. * But percpu_counter_sum(), instead of percpu_counter_read(), needs to be * used to add up the counts from each CPU to account for all the local * counts. So percpu_counter_add_local() and percpu_counter_sub_local() * should be used when a counter is updated frequently and read rarely. */ static inline void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) { percpu_counter_add_batch(fbc, amount, PERCPU_COUNTER_LOCAL_BATCH); } static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { s64 ret = __percpu_counter_sum(fbc); return ret < 0 ? 0 : ret; } static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { return __percpu_counter_sum(fbc); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) { return fbc->count; } /* * It is possible for the percpu_counter_read() to return a small negative * number for some counter which should never be negative. * */ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) { /* Prevent reloads of fbc->count */ s64 ret = READ_ONCE(fbc->count); if (ret >= 0) return ret; return 0; } static inline bool percpu_counter_initialized(struct percpu_counter *fbc) { return (fbc->counters != NULL); } #else /* !CONFIG_SMP */ struct percpu_counter { s64 count; }; static inline int percpu_counter_init_many(struct percpu_counter *fbc, s64 amount, gfp_t gfp, u32 nr_counters) { u32 i; for (i = 0; i < nr_counters; i++) fbc[i].count = amount; return 0; } static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp) { return percpu_counter_init_many(fbc, amount, gfp, 1); } static inline void percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters) { } static inline void percpu_counter_destroy(struct percpu_counter *fbc) { } static inline void percpu_counter_set(struct percpu_counter *fbc, s64 amount) { fbc->count = amount; } static inline int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs) { if (fbc->count > rhs) return 1; else if (fbc->count < rhs) return -1; else return 0; } static inline int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch) { return percpu_counter_compare(fbc, rhs); } static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount) { unsigned long flags; local_irq_save(flags); fbc->count += amount; local_irq_restore(flags); } static inline bool percpu_counter_limited_add(struct percpu_counter *fbc, s64 limit, s64 amount) { unsigned long flags; bool good = false; s64 count; if (amount == 0) return true; local_irq_save(flags); count = fbc->count + amount; if ((amount > 0 && count <= limit) || (amount < 0 && count >= limit)) { fbc->count = count; good = true; } local_irq_restore(flags); return good; } /* non-SMP percpu_counter_add_local is the same with percpu_counter_add */ static inline void percpu_counter_add_local(struct percpu_counter *fbc, s64 amount) { percpu_counter_add(fbc, amount); } static inline void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { percpu_counter_add(fbc, amount); } static inline s64 percpu_counter_read(struct percpu_counter *fbc) { return fbc->count; } /* * percpu_counter is intended to track positive numbers. In the UP case the * number should never be negative. */ static inline s64 percpu_counter_read_positive(struct percpu_counter *fbc) { return fbc->count; } static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc) { return percpu_counter_read_positive(fbc); } static inline s64 percpu_counter_sum(struct percpu_counter *fbc) { return percpu_counter_read(fbc); } static inline bool percpu_counter_initialized(struct percpu_counter *fbc) { return true; } static inline void percpu_counter_sync(struct percpu_counter *fbc) { } #endif /* CONFIG_SMP */ static inline void percpu_counter_inc(struct percpu_counter *fbc) { percpu_counter_add(fbc, 1); } static inline void percpu_counter_dec(struct percpu_counter *fbc) { percpu_counter_add(fbc, -1); } static inline void percpu_counter_sub(struct percpu_counter *fbc, s64 amount) { percpu_counter_add(fbc, -amount); } static inline void percpu_counter_sub_local(struct percpu_counter *fbc, s64 amount) { percpu_counter_add_local(fbc, -amount); } #endif /* _LINUX_PERCPU_COUNTER_H */
3 3 3 3 3 8 1 7 8 8 8 8 8 8 8 8 8 8 8 8 8 1 2 3 3 3 3 3 3 2 2 1 1 2 2 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 // SPDX-License-Identifier: GPL-2.0-or-later /* netfs cookie management * * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See Documentation/filesystems/caching/netfs-api.rst for more information on * the netfs API. */ #define FSCACHE_DEBUG_LEVEL COOKIE #include <linux/module.h> #include <linux/slab.h> #include "internal.h" struct kmem_cache *fscache_cookie_jar; static void fscache_cookie_lru_timed_out(struct timer_list *timer); static void fscache_cookie_lru_worker(struct work_struct *work); static void fscache_cookie_worker(struct work_struct *work); static void fscache_unhash_cookie(struct fscache_cookie *cookie); static void fscache_perform_invalidation(struct fscache_cookie *cookie); #define fscache_cookie_hash_shift 15 static struct hlist_bl_head fscache_cookie_hash[1 << fscache_cookie_hash_shift]; static LIST_HEAD(fscache_cookies); static DEFINE_RWLOCK(fscache_cookies_lock); static LIST_HEAD(fscache_cookie_lru); static DEFINE_SPINLOCK(fscache_cookie_lru_lock); DEFINE_TIMER(fscache_cookie_lru_timer, fscache_cookie_lru_timed_out); static DECLARE_WORK(fscache_cookie_lru_work, fscache_cookie_lru_worker); static const char fscache_cookie_states[FSCACHE_COOKIE_STATE__NR] = "-LCAIFUWRD"; static unsigned int fscache_lru_cookie_timeout = 10 * HZ; void fscache_print_cookie(struct fscache_cookie *cookie, char prefix) { const u8 *k; pr_err("%c-cookie c=%08x [fl=%lx na=%u nA=%u s=%c]\n", prefix, cookie->debug_id, cookie->flags, atomic_read(&cookie->n_active), atomic_read(&cookie->n_accesses), fscache_cookie_states[cookie->state]); pr_err("%c-cookie V=%08x [%s]\n", prefix, cookie->volume->debug_id, cookie->volume->key); k = (cookie->key_len <= sizeof(cookie->inline_key)) ? cookie->inline_key : cookie->key; pr_err("%c-key=[%u] '%*phN'\n", prefix, cookie->key_len, cookie->key_len, k); } static void fscache_free_cookie(struct fscache_cookie *cookie) { if (WARN_ON_ONCE(!list_empty(&cookie->commit_link))) { spin_lock(&fscache_cookie_lru_lock); list_del_init(&cookie->commit_link); spin_unlock(&fscache_cookie_lru_lock); fscache_stat_d(&fscache_n_cookies_lru); fscache_stat(&fscache_n_cookies_lru_removed); } if (WARN_ON_ONCE(test_bit(FSCACHE_COOKIE_IS_HASHED, &cookie->flags))) { fscache_print_cookie(cookie, 'F'); return; } write_lock(&fscache_cookies_lock); list_del(&cookie->proc_link); write_unlock(&fscache_cookies_lock); if (cookie->aux_len > sizeof(cookie->inline_aux)) kfree(cookie->aux); if (cookie->key_len > sizeof(cookie->inline_key)) kfree(cookie->key); fscache_stat_d(&fscache_n_cookies); kmem_cache_free(fscache_cookie_jar, cookie); } static void __fscache_queue_cookie(struct fscache_cookie *cookie) { if (!queue_work(fscache_wq, &cookie->work)) fscache_put_cookie(cookie, fscache_cookie_put_over_queued); } static void fscache_queue_cookie(struct fscache_cookie *cookie, enum fscache_cookie_trace where) { fscache_get_cookie(cookie, where); __fscache_queue_cookie(cookie); } /* * Initialise the access gate on a cookie by setting a flag to prevent the * state machine from being queued when the access counter transitions to 0. * We're only interested in this when we withdraw caching services from the * cookie. */ static void fscache_init_access_gate(struct fscache_cookie *cookie) { int n_accesses; n_accesses = atomic_read(&cookie->n_accesses); trace_fscache_access(cookie->debug_id, refcount_read(&cookie->ref), n_accesses, fscache_access_cache_pin); set_bit(FSCACHE_COOKIE_NO_ACCESS_WAKE, &cookie->flags); } /** * fscache_end_cookie_access - Unpin a cache at the end of an access. * @cookie: A data file cookie * @why: An indication of the circumstances of the access for tracing * * Unpin a cache cookie after we've accessed it and bring a deferred * relinquishment or withdrawal state into effect. * * The @why indicator is provided for tracing purposes. */ void fscache_end_cookie_access(struct fscache_cookie *cookie, enum fscache_access_trace why) { int n_accesses; smp_mb__before_atomic(); n_accesses = atomic_dec_return(&cookie->n_accesses); trace_fscache_access(cookie->debug_id, refcount_read(&cookie->ref), n_accesses, why); if (n_accesses == 0 && !test_bit(FSCACHE_COOKIE_NO_ACCESS_WAKE, &cookie->flags)) fscache_queue_cookie(cookie, fscache_cookie_get_end_access); } EXPORT_SYMBOL(fscache_end_cookie_access); /* * Pin the cache behind a cookie so that we can access it. */ static void __fscache_begin_cookie_access(struct fscache_cookie *cookie, enum fscache_access_trace why) { int n_accesses; n_accesses = atomic_inc_return(&cookie->n_accesses); smp_mb__after_atomic(); /* (Future) read state after is-caching. * Reread n_accesses after is-caching */ trace_fscache_access(cookie->debug_id, refcount_read(&cookie->ref), n_accesses, why); } /** * fscache_begin_cookie_access - Pin a cache so data can be accessed * @cookie: A data file cookie * @why: An indication of the circumstances of the access for tracing * * Attempt to pin the cache to prevent it from going away whilst we're * accessing data and returns true if successful. This works as follows: * * (1) If the cookie is not being cached (ie. FSCACHE_COOKIE_IS_CACHING is not * set), we return false to indicate access was not permitted. * * (2) If the cookie is being cached, we increment its n_accesses count and * then recheck the IS_CACHING flag, ending the access if it got cleared. * * (3) When we end the access, we decrement the cookie's n_accesses and wake * up the any waiters if it reaches 0. * * (4) Whilst the cookie is actively being cached, its n_accesses is kept * artificially incremented to prevent wakeups from happening. * * (5) When the cache is taken offline or if the cookie is culled, the flag is * cleared to prevent new accesses, the cookie's n_accesses is decremented * and we wait for it to become 0. * * The @why indicator are merely provided for tracing purposes. */ bool fscache_begin_cookie_access(struct fscache_cookie *cookie, enum fscache_access_trace why) { if (!test_bit(FSCACHE_COOKIE_IS_CACHING, &cookie->flags)) return false; __fscache_begin_cookie_access(cookie, why); if (!test_bit(FSCACHE_COOKIE_IS_CACHING, &cookie->flags) || !fscache_cache_is_live(cookie->volume->cache)) { fscache_end_cookie_access(cookie, fscache_access_unlive); return false; } return true; } static inline void wake_up_cookie_state(struct fscache_cookie *cookie) { /* Use a barrier to ensure that waiters see the state variable * change, as spin_unlock doesn't guarantee a barrier. * * See comments over wake_up_bit() and waitqueue_active(). */ smp_mb(); wake_up_var(&cookie->state); } /* * Change the state a cookie is at and wake up anyone waiting for that. Impose * an ordering between the stuff stored in the cookie and the state member. * Paired with fscache_cookie_state(). */ static void __fscache_set_cookie_state(struct fscache_cookie *cookie, enum fscache_cookie_state state) { smp_store_release(&cookie->state, state); } static void fscache_set_cookie_state(struct fscache_cookie *cookie, enum fscache_cookie_state state) { spin_lock(&cookie->lock); __fscache_set_cookie_state(cookie, state); spin_unlock(&cookie->lock); wake_up_cookie_state(cookie); } /** * fscache_cookie_lookup_negative - Note negative lookup * @cookie: The cookie that was being looked up * * Note that some part of the metadata path in the cache doesn't exist and so * we can release any waiting readers in the certain knowledge that there's * nothing for them to actually read. * * This function uses no locking and must only be called from the state machine. */ void fscache_cookie_lookup_negative(struct fscache_cookie *cookie) { set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_CREATING); } EXPORT_SYMBOL(fscache_cookie_lookup_negative); /** * fscache_resume_after_invalidation - Allow I/O to resume after invalidation * @cookie: The cookie that was invalidated * * Tell fscache that invalidation is sufficiently complete that I/O can be * allowed again. */ void fscache_resume_after_invalidation(struct fscache_cookie *cookie) { fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_ACTIVE); } EXPORT_SYMBOL(fscache_resume_after_invalidation); /** * fscache_caching_failed - Report that a failure stopped caching on a cookie * @cookie: The cookie that was affected * * Tell fscache that caching on a cookie needs to be stopped due to some sort * of failure. * * This function uses no locking and must only be called from the state machine. */ void fscache_caching_failed(struct fscache_cookie *cookie) { clear_bit(FSCACHE_COOKIE_IS_CACHING, &cookie->flags); fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_FAILED); trace_fscache_cookie(cookie->debug_id, refcount_read(&cookie->ref), fscache_cookie_failed); } EXPORT_SYMBOL(fscache_caching_failed); /* * Set the index key in a cookie. The cookie struct has space for a 16-byte * key plus length and hash, but if that's not big enough, it's instead a * pointer to a buffer containing 3 bytes of hash, 1 byte of length and then * the key data. */ static int fscache_set_key(struct fscache_cookie *cookie, const void *index_key, size_t index_key_len) { void *buf; size_t buf_size; buf_size = round_up(index_key_len, sizeof(__le32)); if (index_key_len > sizeof(cookie->inline_key)) { buf = kzalloc(buf_size, GFP_KERNEL); if (!buf) return -ENOMEM; cookie->key = buf; } else { buf = cookie->inline_key; } memcpy(buf, index_key, index_key_len); cookie->key_hash = fscache_hash(cookie->volume->key_hash, buf, buf_size); return 0; } static bool fscache_cookie_same(const struct fscache_cookie *a, const struct fscache_cookie *b) { const void *ka, *kb; if (a->key_hash != b->key_hash || a->volume != b->volume || a->key_len != b->key_len) return false; if (a->key_len <= sizeof(a->inline_key)) { ka = &a->inline_key; kb = &b->inline_key; } else { ka = a->key; kb = b->key; } return memcmp(ka, kb, a->key_len) == 0; } static atomic_t fscache_cookie_debug_id = ATOMIC_INIT(1); /* * Allocate a cookie. */ static struct fscache_cookie *fscache_alloc_cookie( struct fscache_volume *volume, u8 advice, const void *index_key, size_t index_key_len, const void *aux_data, size_t aux_data_len, loff_t object_size) { struct fscache_cookie *cookie; /* allocate and initialise a cookie */ cookie = kmem_cache_zalloc(fscache_cookie_jar, GFP_KERNEL); if (!cookie) return NULL; fscache_stat(&fscache_n_cookies); cookie->volume = volume; cookie->advice = advice; cookie->key_len = index_key_len; cookie->aux_len = aux_data_len; cookie->object_size = object_size; if (object_size == 0) __set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); if (fscache_set_key(cookie, index_key, index_key_len) < 0) goto nomem; if (cookie->aux_len <= sizeof(cookie->inline_aux)) { memcpy(cookie->inline_aux, aux_data, cookie->aux_len); } else { cookie->aux = kmemdup(aux_data, cookie->aux_len, GFP_KERNEL); if (!cookie->aux) goto nomem; } refcount_set(&cookie->ref, 1); cookie->debug_id = atomic_inc_return(&fscache_cookie_debug_id); spin_lock_init(&cookie->lock); INIT_LIST_HEAD(&cookie->commit_link); INIT_WORK(&cookie->work, fscache_cookie_worker); __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT); write_lock(&fscache_cookies_lock); list_add_tail(&cookie->proc_link, &fscache_cookies); write_unlock(&fscache_cookies_lock); fscache_see_cookie(cookie, fscache_cookie_new_acquire); return cookie; nomem: fscache_free_cookie(cookie); return NULL; } static inline bool fscache_cookie_is_dropped(struct fscache_cookie *cookie) { return READ_ONCE(cookie->state) == FSCACHE_COOKIE_STATE_DROPPED; } static void fscache_wait_on_collision(struct fscache_cookie *candidate, struct fscache_cookie *wait_for) { enum fscache_cookie_state *statep = &wait_for->state; wait_var_event_timeout(statep, fscache_cookie_is_dropped(wait_for), 20 * HZ); if (!fscache_cookie_is_dropped(wait_for)) { pr_notice("Potential collision c=%08x old: c=%08x", candidate->debug_id, wait_for->debug_id); wait_var_event(statep, fscache_cookie_is_dropped(wait_for)); } } /* * Attempt to insert the new cookie into the hash. If there's a collision, we * wait for the old cookie to complete if it's being relinquished and an error * otherwise. */ static bool fscache_hash_cookie(struct fscache_cookie *candidate) { struct fscache_cookie *cursor, *wait_for = NULL; struct hlist_bl_head *h; struct hlist_bl_node *p; unsigned int bucket; bucket = candidate->key_hash & (ARRAY_SIZE(fscache_cookie_hash) - 1); h = &fscache_cookie_hash[bucket]; hlist_bl_lock(h); hlist_bl_for_each_entry(cursor, p, h, hash_link) { if (fscache_cookie_same(candidate, cursor)) { if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cursor->flags)) goto collision; wait_for = fscache_get_cookie(cursor, fscache_cookie_get_hash_collision); break; } } fscache_get_volume(candidate->volume, fscache_volume_get_cookie); atomic_inc(&candidate->volume->n_cookies); hlist_bl_add_head(&candidate->hash_link, h); set_bit(FSCACHE_COOKIE_IS_HASHED, &candidate->flags); hlist_bl_unlock(h); if (wait_for) { fscache_wait_on_collision(candidate, wait_for); fscache_put_cookie(wait_for, fscache_cookie_put_hash_collision); } return true; collision: trace_fscache_cookie(cursor->debug_id, refcount_read(&cursor->ref), fscache_cookie_collision); pr_err("Duplicate cookie detected\n"); fscache_print_cookie(cursor, 'O'); fscache_print_cookie(candidate, 'N'); hlist_bl_unlock(h); return false; } /* * Request a cookie to represent a data storage object within a volume. * * We never let on to the netfs about errors. We may set a negative cookie * pointer, but that's okay */ struct fscache_cookie *__fscache_acquire_cookie( struct fscache_volume *volume, u8 advice, const void *index_key, size_t index_key_len, const void *aux_data, size_t aux_data_len, loff_t object_size) { struct fscache_cookie *cookie; kenter("V=%x", volume->debug_id); if (!index_key || !index_key_len || index_key_len > 255 || aux_data_len > 255) return NULL; if (!aux_data || !aux_data_len) { aux_data = NULL; aux_data_len = 0; } fscache_stat(&fscache_n_acquires); cookie = fscache_alloc_cookie(volume, advice, index_key, index_key_len, aux_data, aux_data_len, object_size); if (!cookie) { fscache_stat(&fscache_n_acquires_oom); return NULL; } if (!fscache_hash_cookie(cookie)) { fscache_see_cookie(cookie, fscache_cookie_discard); fscache_free_cookie(cookie); return NULL; } trace_fscache_acquire(cookie); fscache_stat(&fscache_n_acquires_ok); kleave(" = c=%08x", cookie->debug_id); return cookie; } EXPORT_SYMBOL(__fscache_acquire_cookie); /* * Prepare a cache object to be written to. */ static void fscache_prepare_to_write(struct fscache_cookie *cookie) { cookie->volume->cache->ops->prepare_to_write(cookie); } /* * Look up a cookie in the cache. */ static void fscache_perform_lookup(struct fscache_cookie *cookie) { enum fscache_access_trace trace = fscache_access_lookup_cookie_end_failed; bool need_withdraw = false; kenter(""); if (!cookie->volume->cache_priv) { fscache_create_volume(cookie->volume, true); if (!cookie->volume->cache_priv) { fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT); goto out; } } if (!cookie->volume->cache->ops->lookup_cookie(cookie)) { if (cookie->state != FSCACHE_COOKIE_STATE_FAILED) fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT); need_withdraw = true; kleave(" [fail]"); goto out; } fscache_see_cookie(cookie, fscache_cookie_see_active); spin_lock(&cookie->lock); if (test_and_clear_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags)) __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_INVALIDATING); else __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_ACTIVE); spin_unlock(&cookie->lock); wake_up_cookie_state(cookie); trace = fscache_access_lookup_cookie_end; out: fscache_end_cookie_access(cookie, trace); if (need_withdraw) fscache_withdraw_cookie(cookie); fscache_end_volume_access(cookie->volume, cookie, trace); } /* * Begin the process of looking up a cookie. We offload the actual process to * a worker thread. */ static bool fscache_begin_lookup(struct fscache_cookie *cookie, bool will_modify) { if (will_modify) { set_bit(FSCACHE_COOKIE_LOCAL_WRITE, &cookie->flags); set_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags); } if (!fscache_begin_volume_access(cookie->volume, cookie, fscache_access_lookup_cookie)) return false; __fscache_begin_cookie_access(cookie, fscache_access_lookup_cookie); __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_LOOKING_UP); set_bit(FSCACHE_COOKIE_IS_CACHING, &cookie->flags); set_bit(FSCACHE_COOKIE_HAS_BEEN_CACHED, &cookie->flags); return true; } /* * Start using the cookie for I/O. This prevents the backing object from being * reaped by VM pressure. */ void __fscache_use_cookie(struct fscache_cookie *cookie, bool will_modify) { enum fscache_cookie_state state; bool queue = false; int n_active; kenter("c=%08x", cookie->debug_id); if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), "Trying to use relinquished cookie\n")) return; spin_lock(&cookie->lock); n_active = atomic_inc_return(&cookie->n_active); trace_fscache_active(cookie->debug_id, refcount_read(&cookie->ref), n_active, atomic_read(&cookie->n_accesses), will_modify ? fscache_active_use_modify : fscache_active_use); again: state = fscache_cookie_state(cookie); switch (state) { case FSCACHE_COOKIE_STATE_QUIESCENT: queue = fscache_begin_lookup(cookie, will_modify); break; case FSCACHE_COOKIE_STATE_LOOKING_UP: case FSCACHE_COOKIE_STATE_CREATING: if (will_modify) set_bit(FSCACHE_COOKIE_LOCAL_WRITE, &cookie->flags); break; case FSCACHE_COOKIE_STATE_ACTIVE: case FSCACHE_COOKIE_STATE_INVALIDATING: if (will_modify && !test_and_set_bit(FSCACHE_COOKIE_LOCAL_WRITE, &cookie->flags)) { set_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags); queue = true; } /* * We could race with cookie_lru which may set LRU_DISCARD bit * but has yet to run the cookie state machine. If this happens * and another thread tries to use the cookie, clear LRU_DISCARD * so we don't end up withdrawing the cookie while in use. */ if (test_and_clear_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) fscache_see_cookie(cookie, fscache_cookie_see_lru_discard_clear); break; case FSCACHE_COOKIE_STATE_FAILED: case FSCACHE_COOKIE_STATE_WITHDRAWING: break; case FSCACHE_COOKIE_STATE_LRU_DISCARDING: spin_unlock(&cookie->lock); wait_var_event(&cookie->state, fscache_cookie_state(cookie) != FSCACHE_COOKIE_STATE_LRU_DISCARDING); spin_lock(&cookie->lock); goto again; case FSCACHE_COOKIE_STATE_DROPPED: case FSCACHE_COOKIE_STATE_RELINQUISHING: WARN(1, "Can't use cookie in state %u\n", state); break; } spin_unlock(&cookie->lock); if (queue) fscache_queue_cookie(cookie, fscache_cookie_get_use_work); kleave(""); } EXPORT_SYMBOL(__fscache_use_cookie); static void fscache_unuse_cookie_locked(struct fscache_cookie *cookie) { clear_bit(FSCACHE_COOKIE_DISABLED, &cookie->flags); if (!test_bit(FSCACHE_COOKIE_IS_CACHING, &cookie->flags)) return; cookie->unused_at = jiffies; spin_lock(&fscache_cookie_lru_lock); if (list_empty(&cookie->commit_link)) { fscache_get_cookie(cookie, fscache_cookie_get_lru); fscache_stat(&fscache_n_cookies_lru); } list_move_tail(&cookie->commit_link, &fscache_cookie_lru); spin_unlock(&fscache_cookie_lru_lock); timer_reduce(&fscache_cookie_lru_timer, jiffies + fscache_lru_cookie_timeout); } /* * Stop using the cookie for I/O. */ void __fscache_unuse_cookie(struct fscache_cookie *cookie, const void *aux_data, const loff_t *object_size) { unsigned int debug_id = cookie->debug_id; unsigned int r = refcount_read(&cookie->ref); unsigned int a = atomic_read(&cookie->n_accesses); unsigned int c; if (aux_data || object_size) __fscache_update_cookie(cookie, aux_data, object_size); /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ c = atomic_fetch_add_unless(&cookie->n_active, -1, 1); if (c != 1) { trace_fscache_active(debug_id, r, c - 1, a, fscache_active_unuse); return; } spin_lock(&cookie->lock); r = refcount_read(&cookie->ref); a = atomic_read(&cookie->n_accesses); c = atomic_dec_return(&cookie->n_active); trace_fscache_active(debug_id, r, c, a, fscache_active_unuse); if (c == 0) fscache_unuse_cookie_locked(cookie); spin_unlock(&cookie->lock); } EXPORT_SYMBOL(__fscache_unuse_cookie); /* * Perform work upon the cookie, such as committing its cache state, * relinquishing it or withdrawing the backing cache. We're protected from the * cache going away under us as object withdrawal must come through this * non-reentrant work item. */ static void fscache_cookie_state_machine(struct fscache_cookie *cookie) { enum fscache_cookie_state state; bool wake = false; kenter("c=%x", cookie->debug_id); again: spin_lock(&cookie->lock); again_locked: state = cookie->state; switch (state) { case FSCACHE_COOKIE_STATE_QUIESCENT: /* The QUIESCENT state is jumped to the LOOKING_UP state by * fscache_use_cookie(). */ if (atomic_read(&cookie->n_accesses) == 0 && test_bit(FSCACHE_COOKIE_DO_RELINQUISH, &cookie->flags)) { __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_RELINQUISHING); wake = true; goto again_locked; } break; case FSCACHE_COOKIE_STATE_LOOKING_UP: spin_unlock(&cookie->lock); fscache_init_access_gate(cookie); fscache_perform_lookup(cookie); goto again; case FSCACHE_COOKIE_STATE_INVALIDATING: spin_unlock(&cookie->lock); fscache_perform_invalidation(cookie); goto again; case FSCACHE_COOKIE_STATE_ACTIVE: if (test_and_clear_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags)) { spin_unlock(&cookie->lock); fscache_prepare_to_write(cookie); spin_lock(&cookie->lock); } if (test_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) { __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_LRU_DISCARDING); wake = true; goto again_locked; } fallthrough; case FSCACHE_COOKIE_STATE_FAILED: if (test_and_clear_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags)) fscache_end_cookie_access(cookie, fscache_access_invalidate_cookie_end); if (atomic_read(&cookie->n_accesses) != 0) break; if (test_bit(FSCACHE_COOKIE_DO_RELINQUISH, &cookie->flags)) { __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_RELINQUISHING); wake = true; goto again_locked; } if (test_bit(FSCACHE_COOKIE_DO_WITHDRAW, &cookie->flags)) { __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_WITHDRAWING); wake = true; goto again_locked; } break; case FSCACHE_COOKIE_STATE_LRU_DISCARDING: case FSCACHE_COOKIE_STATE_RELINQUISHING: case FSCACHE_COOKIE_STATE_WITHDRAWING: if (cookie->cache_priv) { spin_unlock(&cookie->lock); cookie->volume->cache->ops->withdraw_cookie(cookie); spin_lock(&cookie->lock); } if (test_and_clear_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags)) fscache_end_cookie_access(cookie, fscache_access_invalidate_cookie_end); switch (state) { case FSCACHE_COOKIE_STATE_RELINQUISHING: fscache_see_cookie(cookie, fscache_cookie_see_relinquish); fscache_unhash_cookie(cookie); __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_DROPPED); wake = true; goto out; case FSCACHE_COOKIE_STATE_LRU_DISCARDING: fscache_see_cookie(cookie, fscache_cookie_see_lru_discard); break; case FSCACHE_COOKIE_STATE_WITHDRAWING: fscache_see_cookie(cookie, fscache_cookie_see_withdraw); break; default: BUG(); } clear_bit(FSCACHE_COOKIE_NEEDS_UPDATE, &cookie->flags); clear_bit(FSCACHE_COOKIE_DO_WITHDRAW, &cookie->flags); clear_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags); clear_bit(FSCACHE_COOKIE_DO_PREP_TO_WRITE, &cookie->flags); set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_QUIESCENT); wake = true; goto again_locked; case FSCACHE_COOKIE_STATE_DROPPED: break; default: WARN_ONCE(1, "Cookie %x in unexpected state %u\n", cookie->debug_id, state); break; } out: spin_unlock(&cookie->lock); if (wake) wake_up_cookie_state(cookie); kleave(""); } static void fscache_cookie_worker(struct work_struct *work) { struct fscache_cookie *cookie = container_of(work, struct fscache_cookie, work); fscache_see_cookie(cookie, fscache_cookie_see_work); fscache_cookie_state_machine(cookie); fscache_put_cookie(cookie, fscache_cookie_put_work); } /* * Wait for the object to become inactive. The cookie's work item will be * scheduled when someone transitions n_accesses to 0 - but if someone's * already done that, schedule it anyway. */ static void __fscache_withdraw_cookie(struct fscache_cookie *cookie) { int n_accesses; bool unpinned; unpinned = test_and_clear_bit(FSCACHE_COOKIE_NO_ACCESS_WAKE, &cookie->flags); /* Need to read the access count after unpinning */ n_accesses = atomic_read(&cookie->n_accesses); if (unpinned) trace_fscache_access(cookie->debug_id, refcount_read(&cookie->ref), n_accesses, fscache_access_cache_unpin); if (n_accesses == 0) fscache_queue_cookie(cookie, fscache_cookie_get_end_access); } static void fscache_cookie_lru_do_one(struct fscache_cookie *cookie) { fscache_see_cookie(cookie, fscache_cookie_see_lru_do_one); spin_lock(&cookie->lock); if (cookie->state != FSCACHE_COOKIE_STATE_ACTIVE || time_before(jiffies, cookie->unused_at + fscache_lru_cookie_timeout) || atomic_read(&cookie->n_active) > 0) { spin_unlock(&cookie->lock); fscache_stat(&fscache_n_cookies_lru_removed); } else { set_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags); spin_unlock(&cookie->lock); fscache_stat(&fscache_n_cookies_lru_expired); kdebug("lru c=%x", cookie->debug_id); __fscache_withdraw_cookie(cookie); } fscache_put_cookie(cookie, fscache_cookie_put_lru); } static void fscache_cookie_lru_worker(struct work_struct *work) { struct fscache_cookie *cookie; unsigned long unused_at; spin_lock(&fscache_cookie_lru_lock); while (!list_empty(&fscache_cookie_lru)) { cookie = list_first_entry(&fscache_cookie_lru, struct fscache_cookie, commit_link); unused_at = cookie->unused_at + fscache_lru_cookie_timeout; if (time_before(jiffies, unused_at)) { timer_reduce(&fscache_cookie_lru_timer, unused_at); break; } list_del_init(&cookie->commit_link); fscache_stat_d(&fscache_n_cookies_lru); spin_unlock(&fscache_cookie_lru_lock); fscache_cookie_lru_do_one(cookie); spin_lock(&fscache_cookie_lru_lock); } spin_unlock(&fscache_cookie_lru_lock); } static void fscache_cookie_lru_timed_out(struct timer_list *timer) { queue_work(fscache_wq, &fscache_cookie_lru_work); } static void fscache_cookie_drop_from_lru(struct fscache_cookie *cookie) { bool need_put = false; if (!list_empty(&cookie->commit_link)) { spin_lock(&fscache_cookie_lru_lock); if (!list_empty(&cookie->commit_link)) { list_del_init(&cookie->commit_link); fscache_stat_d(&fscache_n_cookies_lru); fscache_stat(&fscache_n_cookies_lru_dropped); need_put = true; } spin_unlock(&fscache_cookie_lru_lock); if (need_put) fscache_put_cookie(cookie, fscache_cookie_put_lru); } } /* * Remove a cookie from the hash table. */ static void fscache_unhash_cookie(struct fscache_cookie *cookie) { struct hlist_bl_head *h; unsigned int bucket; bucket = cookie->key_hash & (ARRAY_SIZE(fscache_cookie_hash) - 1); h = &fscache_cookie_hash[bucket]; hlist_bl_lock(h); hlist_bl_del(&cookie->hash_link); clear_bit(FSCACHE_COOKIE_IS_HASHED, &cookie->flags); hlist_bl_unlock(h); fscache_stat(&fscache_n_relinquishes_dropped); } static void fscache_drop_withdraw_cookie(struct fscache_cookie *cookie) { fscache_cookie_drop_from_lru(cookie); __fscache_withdraw_cookie(cookie); } /** * fscache_withdraw_cookie - Mark a cookie for withdrawal * @cookie: The cookie to be withdrawn. * * Allow the cache backend to withdraw the backing for a cookie for its own * reasons, even if that cookie is in active use. */ void fscache_withdraw_cookie(struct fscache_cookie *cookie) { set_bit(FSCACHE_COOKIE_DO_WITHDRAW, &cookie->flags); fscache_drop_withdraw_cookie(cookie); } EXPORT_SYMBOL(fscache_withdraw_cookie); /* * Allow the netfs to release a cookie back to the cache. * - the object will be marked as recyclable on disk if retire is true */ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) { fscache_stat(&fscache_n_relinquishes); if (retire) fscache_stat(&fscache_n_relinquishes_retire); kenter("c=%08x{%d},%d", cookie->debug_id, atomic_read(&cookie->n_active), retire); if (WARN(test_and_set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), "Cookie c=%x already relinquished\n", cookie->debug_id)) return; if (retire) set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags); trace_fscache_relinquish(cookie, retire); ASSERTCMP(atomic_read(&cookie->n_active), ==, 0); ASSERTCMP(atomic_read(&cookie->volume->n_cookies), >, 0); atomic_dec(&cookie->volume->n_cookies); if (test_bit(FSCACHE_COOKIE_HAS_BEEN_CACHED, &cookie->flags)) { set_bit(FSCACHE_COOKIE_DO_RELINQUISH, &cookie->flags); fscache_drop_withdraw_cookie(cookie); } else { fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_DROPPED); fscache_unhash_cookie(cookie); } fscache_put_cookie(cookie, fscache_cookie_put_relinquish); } EXPORT_SYMBOL(__fscache_relinquish_cookie); /* * Drop a reference to a cookie. */ void fscache_put_cookie(struct fscache_cookie *cookie, enum fscache_cookie_trace where) { struct fscache_volume *volume = cookie->volume; unsigned int cookie_debug_id = cookie->debug_id; bool zero; int ref; zero = __refcount_dec_and_test(&cookie->ref, &ref); trace_fscache_cookie(cookie_debug_id, ref - 1, where); if (zero) { fscache_free_cookie(cookie); fscache_put_volume(volume, fscache_volume_put_cookie); } } EXPORT_SYMBOL(fscache_put_cookie); /* * Get a reference to a cookie. */ struct fscache_cookie *fscache_get_cookie(struct fscache_cookie *cookie, enum fscache_cookie_trace where) { int ref; __refcount_inc(&cookie->ref, &ref); trace_fscache_cookie(cookie->debug_id, ref + 1, where); return cookie; } EXPORT_SYMBOL(fscache_get_cookie); /* * Ask the cache to effect invalidation of a cookie. */ static void fscache_perform_invalidation(struct fscache_cookie *cookie) { if (!cookie->volume->cache->ops->invalidate_cookie(cookie)) fscache_caching_failed(cookie); fscache_end_cookie_access(cookie, fscache_access_invalidate_cookie_end); } /* * Invalidate an object. */ void __fscache_invalidate(struct fscache_cookie *cookie, const void *aux_data, loff_t new_size, unsigned int flags) { bool is_caching; kenter("c=%x", cookie->debug_id); fscache_stat(&fscache_n_invalidates); if (WARN(test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags), "Trying to invalidate relinquished cookie\n")) return; if ((flags & FSCACHE_INVAL_DIO_WRITE) && test_and_set_bit(FSCACHE_COOKIE_DISABLED, &cookie->flags)) return; spin_lock(&cookie->lock); set_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags); fscache_update_aux(cookie, aux_data, &new_size); cookie->inval_counter++; trace_fscache_invalidate(cookie, new_size); switch (cookie->state) { case FSCACHE_COOKIE_STATE_INVALIDATING: /* is_still_valid will catch it */ default: spin_unlock(&cookie->lock); kleave(" [no %u]", cookie->state); return; case FSCACHE_COOKIE_STATE_LOOKING_UP: if (!test_and_set_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags)) __fscache_begin_cookie_access(cookie, fscache_access_invalidate_cookie); fallthrough; case FSCACHE_COOKIE_STATE_CREATING: spin_unlock(&cookie->lock); kleave(" [look %x]", cookie->inval_counter); return; case FSCACHE_COOKIE_STATE_ACTIVE: is_caching = fscache_begin_cookie_access( cookie, fscache_access_invalidate_cookie); if (is_caching) __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_INVALIDATING); spin_unlock(&cookie->lock); wake_up_cookie_state(cookie); if (is_caching) fscache_queue_cookie(cookie, fscache_cookie_get_inval_work); kleave(" [inv]"); return; } } EXPORT_SYMBOL(__fscache_invalidate); #ifdef CONFIG_PROC_FS /* * Generate a list of extant cookies in /proc/fs/fscache/cookies */ static int fscache_cookies_seq_show(struct seq_file *m, void *v) { struct fscache_cookie *cookie; unsigned int keylen = 0, auxlen = 0; u8 *p; if (v == &fscache_cookies) { seq_puts(m, "COOKIE VOLUME REF ACT ACC S FL DEF \n" "======== ======== === === === = == ================\n" ); return 0; } cookie = list_entry(v, struct fscache_cookie, proc_link); seq_printf(m, "%08x %08x %3d %3d %3d %c %02lx", cookie->debug_id, cookie->volume->debug_id, refcount_read(&cookie->ref), atomic_read(&cookie->n_active), atomic_read(&cookie->n_accesses), fscache_cookie_states[cookie->state], cookie->flags); keylen = cookie->key_len; auxlen = cookie->aux_len; if (keylen > 0 || auxlen > 0) { seq_puts(m, " "); p = keylen <= sizeof(cookie->inline_key) ? cookie->inline_key : cookie->key; for (; keylen > 0; keylen--) seq_printf(m, "%02x", *p++); if (auxlen > 0) { seq_puts(m, ", "); p = auxlen <= sizeof(cookie->inline_aux) ? cookie->inline_aux : cookie->aux; for (; auxlen > 0; auxlen--) seq_printf(m, "%02x", *p++); } } seq_puts(m, "\n"); return 0; } static void *fscache_cookies_seq_start(struct seq_file *m, loff_t *_pos) __acquires(fscache_cookies_lock) { read_lock(&fscache_cookies_lock); return seq_list_start_head(&fscache_cookies, *_pos); } static void *fscache_cookies_seq_next(struct seq_file *m, void *v, loff_t *_pos) { return seq_list_next(v, &fscache_cookies, _pos); } static void fscache_cookies_seq_stop(struct seq_file *m, void *v) __releases(rcu) { read_unlock(&fscache_cookies_lock); } const struct seq_operations fscache_cookies_seq_ops = { .start = fscache_cookies_seq_start, .next = fscache_cookies_seq_next, .stop = fscache_cookies_seq_stop, .show = fscache_cookies_seq_show, }; #endif
1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 // SPDX-License-Identifier: GPL-2.0+ /* * usbdux.c * Copyright (C) 2003-2014 Bernd Porr, mail@berndporr.me.uk */ /* * Driver: usbdux * Description: University of Stirling USB DAQ & INCITE Technology Limited * Devices: [ITL] USB-DUX (usbdux) * Author: Bernd Porr <mail@berndporr.me.uk> * Updated: 10 Oct 2014 * Status: Stable * * Connection scheme for the counter at the digital port: * 0=/CLK0, 1=UP/DOWN0, 2=RESET0, 4=/CLK1, 5=UP/DOWN1, 6=RESET1. * The sampling rate of the counter is approximately 500Hz. * * Note that under USB2.0 the length of the channel list determines * the max sampling rate. If you sample only one channel you get 8kHz * sampling rate. If you sample two channels you get 4kHz and so on. */ /* * I must give credit here to Chris Baugher who * wrote the driver for AT-MIO-16d. I used some parts of this * driver. I also must give credits to David Brownell * who supported me with the USB development. * * Bernd Porr * * * Revision history: * 0.94: D/A output should work now with any channel list combinations * 0.95: .owner commented out for kernel vers below 2.4.19 * sanity checks in ai/ao_cmd * 0.96: trying to get it working with 2.6, moved all memory alloc to comedi's * attach final USB IDs * moved memory allocation completely to the corresponding comedi * functions firmware upload is by fxload and no longer by comedi (due to * enumeration) * 0.97: USB IDs received, adjusted table * 0.98: SMP, locking, memory alloc: moved all usb memory alloc * to the usb subsystem and moved all comedi related memory * alloc to comedi. * | kernel | registration | usbdux-usb | usbdux-comedi | comedi | * 0.99: USB 2.0: changed protocol to isochronous transfer * IRQ transfer is too buggy and too risky in 2.0 * for the high speed ISO transfer is now a working version * available * 0.99b: Increased the iso transfer buffer for high sp.to 10 buffers. Some VIA * chipsets miss out IRQs. Deeper buffering is needed. * 1.00: full USB 2.0 support for the A/D converter. Now: max 8kHz sampling * rate. * Firmware vers 1.00 is needed for this. * Two 16 bit up/down/reset counter with a sampling rate of 1kHz * And loads of cleaning up, in particular streamlining the * bulk transfers. * 1.1: moved EP4 transfers to EP1 to make space for a PWM output on EP4 * 1.2: added PWM support via EP4 * 2.0: PWM seems to be stable and is not interfering with the other functions * 2.1: changed PWM API * 2.2: added firmware kernel request to fix an udev problem * 2.3: corrected a bug in bulk timeouts which were far too short * 2.4: fixed a bug which causes the driver to hang when it ran out of data. * Thanks to Jan-Matthias Braun and Ian to spot the bug and fix it. * */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/fcntl.h> #include <linux/compiler.h> #include <linux/comedi/comedi_usb.h> /* constants for firmware upload and download */ #define USBDUX_FIRMWARE "usbdux_firmware.bin" #define USBDUX_FIRMWARE_MAX_LEN 0x2000 #define USBDUX_FIRMWARE_CMD 0xa0 #define VENDOR_DIR_IN 0xc0 #define VENDOR_DIR_OUT 0x40 #define USBDUX_CPU_CS 0xe600 /* usbdux bulk transfer commands */ #define USBDUX_CMD_MULT_AI 0 #define USBDUX_CMD_AO 1 #define USBDUX_CMD_DIO_CFG 2 #define USBDUX_CMD_DIO_BITS 3 #define USBDUX_CMD_SINGLE_AI 4 #define USBDUX_CMD_TIMER_RD 5 #define USBDUX_CMD_TIMER_WR 6 #define USBDUX_CMD_PWM_ON 7 #define USBDUX_CMD_PWM_OFF 8 /* timeout for the USB-transfer in ms */ #define BULK_TIMEOUT 1000 /* 300Hz max frequ under PWM */ #define MIN_PWM_PERIOD ((long)(1E9 / 300)) /* Default PWM frequency */ #define PWM_DEFAULT_PERIOD ((long)(1E9 / 100)) /* Size of one A/D value */ #define SIZEADIN ((sizeof(u16))) /* * Size of the input-buffer IN BYTES * Always multiple of 8 for 8 microframes which is needed in the highspeed mode */ #define SIZEINBUF (8 * SIZEADIN) /* 16 bytes. */ #define SIZEINSNBUF 16 /* size of one value for the D/A converter: channel and value */ #define SIZEDAOUT ((sizeof(u8) + sizeof(u16))) /* * Size of the output-buffer in bytes * Actually only the first 4 triplets are used but for the * high speed mode we need to pad it to 8 (microframes). */ #define SIZEOUTBUF (8 * SIZEDAOUT) /* * Size of the buffer for the dux commands: just now max size is determined * by the analogue out + command byte + panic bytes... */ #define SIZEOFDUXBUFFER (8 * SIZEDAOUT + 2) /* Number of in-URBs which receive the data: min=2 */ #define NUMOFINBUFFERSFULL 5 /* Number of out-URBs which send the data: min=2 */ #define NUMOFOUTBUFFERSFULL 5 /* Number of in-URBs which receive the data: min=5 */ /* must have more buffers due to buggy USB ctr */ #define NUMOFINBUFFERSHIGH 10 /* Number of out-URBs which send the data: min=5 */ /* must have more buffers due to buggy USB ctr */ #define NUMOFOUTBUFFERSHIGH 10 /* number of retries to get the right dux command */ #define RETRIES 10 static const struct comedi_lrange range_usbdux_ai_range = { 4, { BIP_RANGE(4.096), BIP_RANGE(4.096 / 2), UNI_RANGE(4.096), UNI_RANGE(4.096 / 2) } }; static const struct comedi_lrange range_usbdux_ao_range = { 2, { BIP_RANGE(4.096), UNI_RANGE(4.096) } }; struct usbdux_private { /* actual number of in-buffers */ int n_ai_urbs; /* actual number of out-buffers */ int n_ao_urbs; /* ISO-transfer handling: buffers */ struct urb **ai_urbs; struct urb **ao_urbs; /* pwm-transfer handling */ struct urb *pwm_urb; /* PWM period */ unsigned int pwm_period; /* PWM internal delay for the GPIF in the FX2 */ u8 pwm_delay; /* size of the PWM buffer which holds the bit pattern */ int pwm_buf_sz; /* input buffer for the ISO-transfer */ __le16 *in_buf; /* input buffer for single insn */ __le16 *insn_buf; unsigned int high_speed:1; unsigned int ai_cmd_running:1; unsigned int ao_cmd_running:1; unsigned int pwm_cmd_running:1; /* time between samples in units of the timer */ unsigned int ai_timer; unsigned int ao_timer; /* counter between aquisitions */ unsigned int ai_counter; unsigned int ao_counter; /* interval in frames/uframes */ unsigned int ai_interval; /* commands */ u8 *dux_commands; struct mutex mut; }; static void usbdux_unlink_urbs(struct urb **urbs, int num_urbs) { int i; for (i = 0; i < num_urbs; i++) usb_kill_urb(urbs[i]); } static void usbdux_ai_stop(struct comedi_device *dev, int do_unlink) { struct usbdux_private *devpriv = dev->private; if (do_unlink && devpriv->ai_urbs) usbdux_unlink_urbs(devpriv->ai_urbs, devpriv->n_ai_urbs); devpriv->ai_cmd_running = 0; } static int usbdux_ai_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbdux_private *devpriv = dev->private; /* prevent other CPUs from submitting new commands just now */ mutex_lock(&devpriv->mut); /* unlink only if the urb really has been submitted */ usbdux_ai_stop(dev, devpriv->ai_cmd_running); mutex_unlock(&devpriv->mut); return 0; } static void usbduxsub_ai_handle_urb(struct comedi_device *dev, struct comedi_subdevice *s, struct urb *urb) { struct usbdux_private *devpriv = dev->private; struct comedi_async *async = s->async; struct comedi_cmd *cmd = &async->cmd; int ret; int i; devpriv->ai_counter--; if (devpriv->ai_counter == 0) { devpriv->ai_counter = devpriv->ai_timer; /* get the data from the USB bus and hand it over to comedi */ for (i = 0; i < cmd->chanlist_len; i++) { unsigned int range = CR_RANGE(cmd->chanlist[i]); u16 val = le16_to_cpu(devpriv->in_buf[i]); /* bipolar data is two's-complement */ if (comedi_range_is_bipolar(s, range)) val = comedi_offset_munge(s, val); /* transfer data */ if (!comedi_buf_write_samples(s, &val, 1)) return; } if (cmd->stop_src == TRIG_COUNT && async->scans_done >= cmd->stop_arg) async->events |= COMEDI_CB_EOA; } /* if command is still running, resubmit urb */ if (!(async->events & COMEDI_CB_CANCEL_MASK)) { urb->dev = comedi_to_usb_dev(dev); ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret < 0) { dev_err(dev->class_dev, "urb resubmit failed in int-context! err=%d\n", ret); if (ret == -EL2NSYNC) dev_err(dev->class_dev, "buggy USB host controller or bug in IRQ handler!\n"); async->events |= COMEDI_CB_ERROR; } } } static void usbduxsub_ai_isoc_irq(struct urb *urb) { struct comedi_device *dev = urb->context; struct comedi_subdevice *s = dev->read_subdev; struct comedi_async *async = s->async; struct usbdux_private *devpriv = dev->private; /* exit if not running a command, do not resubmit urb */ if (!devpriv->ai_cmd_running) return; switch (urb->status) { case 0: /* copy the result in the transfer buffer */ memcpy(devpriv->in_buf, urb->transfer_buffer, SIZEINBUF); usbduxsub_ai_handle_urb(dev, s, urb); break; case -EILSEQ: /* * error in the ISOchronous data * we don't copy the data into the transfer buffer * and recycle the last data byte */ dev_dbg(dev->class_dev, "CRC error in ISO IN stream\n"); usbduxsub_ai_handle_urb(dev, s, urb); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -ECONNABORTED: /* after an unlink command, unplug, ... etc */ async->events |= COMEDI_CB_ERROR; break; default: /* a real error */ dev_err(dev->class_dev, "Non-zero urb status received in ai intr context: %d\n", urb->status); async->events |= COMEDI_CB_ERROR; break; } /* * comedi_handle_events() cannot be used in this driver. The (*cancel) * operation would unlink the urb. */ if (async->events & COMEDI_CB_CANCEL_MASK) usbdux_ai_stop(dev, 0); comedi_event(dev, s); } static void usbdux_ao_stop(struct comedi_device *dev, int do_unlink) { struct usbdux_private *devpriv = dev->private; if (do_unlink && devpriv->ao_urbs) usbdux_unlink_urbs(devpriv->ao_urbs, devpriv->n_ao_urbs); devpriv->ao_cmd_running = 0; } static int usbdux_ao_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbdux_private *devpriv = dev->private; /* prevent other CPUs from submitting a command just now */ mutex_lock(&devpriv->mut); /* unlink only if it is really running */ usbdux_ao_stop(dev, devpriv->ao_cmd_running); mutex_unlock(&devpriv->mut); return 0; } static void usbduxsub_ao_handle_urb(struct comedi_device *dev, struct comedi_subdevice *s, struct urb *urb) { struct usbdux_private *devpriv = dev->private; struct comedi_async *async = s->async; struct comedi_cmd *cmd = &async->cmd; u8 *datap; int ret; int i; devpriv->ao_counter--; if (devpriv->ao_counter == 0) { devpriv->ao_counter = devpriv->ao_timer; if (cmd->stop_src == TRIG_COUNT && async->scans_done >= cmd->stop_arg) { async->events |= COMEDI_CB_EOA; return; } /* transmit data to the USB bus */ datap = urb->transfer_buffer; *datap++ = cmd->chanlist_len; for (i = 0; i < cmd->chanlist_len; i++) { unsigned int chan = CR_CHAN(cmd->chanlist[i]); unsigned short val; if (!comedi_buf_read_samples(s, &val, 1)) { dev_err(dev->class_dev, "buffer underflow\n"); async->events |= COMEDI_CB_OVERFLOW; return; } /* pointer to the DA */ *datap++ = val & 0xff; *datap++ = (val >> 8) & 0xff; *datap++ = chan << 6; s->readback[chan] = val; } } /* if command is still running, resubmit urb for BULK transfer */ if (!(async->events & COMEDI_CB_CANCEL_MASK)) { urb->transfer_buffer_length = SIZEOUTBUF; urb->dev = comedi_to_usb_dev(dev); urb->status = 0; if (devpriv->high_speed) urb->interval = 8; /* uframes */ else urb->interval = 1; /* frames */ urb->number_of_packets = 1; urb->iso_frame_desc[0].offset = 0; urb->iso_frame_desc[0].length = SIZEOUTBUF; urb->iso_frame_desc[0].status = 0; ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret < 0) { dev_err(dev->class_dev, "ao urb resubm failed in int-cont. ret=%d", ret); if (ret == -EL2NSYNC) dev_err(dev->class_dev, "buggy USB host controller or bug in IRQ handling!\n"); async->events |= COMEDI_CB_ERROR; } } } static void usbduxsub_ao_isoc_irq(struct urb *urb) { struct comedi_device *dev = urb->context; struct comedi_subdevice *s = dev->write_subdev; struct comedi_async *async = s->async; struct usbdux_private *devpriv = dev->private; /* exit if not running a command, do not resubmit urb */ if (!devpriv->ao_cmd_running) return; switch (urb->status) { case 0: usbduxsub_ao_handle_urb(dev, s, urb); break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -ECONNABORTED: /* after an unlink command, unplug, ... etc */ async->events |= COMEDI_CB_ERROR; break; default: /* a real error */ dev_err(dev->class_dev, "Non-zero urb status received in ao intr context: %d\n", urb->status); async->events |= COMEDI_CB_ERROR; break; } /* * comedi_handle_events() cannot be used in this driver. The (*cancel) * operation would unlink the urb. */ if (async->events & COMEDI_CB_CANCEL_MASK) usbdux_ao_stop(dev, 0); comedi_event(dev, s); } static int usbdux_submit_urbs(struct comedi_device *dev, struct urb **urbs, int num_urbs, int input_urb) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbdux_private *devpriv = dev->private; struct urb *urb; int ret; int i; /* Submit all URBs and start the transfer on the bus */ for (i = 0; i < num_urbs; i++) { urb = urbs[i]; /* in case of a resubmission after an unlink... */ if (input_urb) urb->interval = devpriv->ai_interval; urb->context = dev; urb->dev = usb; urb->status = 0; urb->transfer_flags = URB_ISO_ASAP; ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret) return ret; } return 0; } static int usbdux_ai_cmdtest(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_cmd *cmd) { struct usbdux_private *devpriv = dev->private; int err = 0; /* Step 1 : check if triggers are trivially valid */ err |= comedi_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_INT); err |= comedi_check_trigger_src(&cmd->scan_begin_src, TRIG_TIMER); err |= comedi_check_trigger_src(&cmd->convert_src, TRIG_NOW); err |= comedi_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT); err |= comedi_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE); if (err) return 1; /* Step 2a : make sure trigger sources are unique */ err |= comedi_check_trigger_is_unique(cmd->start_src); err |= comedi_check_trigger_is_unique(cmd->stop_src); /* Step 2b : and mutually compatible */ if (err) return 2; /* Step 3: check if arguments are trivially valid */ err |= comedi_check_trigger_arg_is(&cmd->start_arg, 0); if (cmd->scan_begin_src == TRIG_FOLLOW) /* internal trigger */ err |= comedi_check_trigger_arg_is(&cmd->scan_begin_arg, 0); if (cmd->scan_begin_src == TRIG_TIMER) { /* full speed does 1kHz scans every USB frame */ unsigned int arg = 1000000; unsigned int min_arg = arg; if (devpriv->high_speed) { /* * In high speed mode microframes are possible. * However, during one microframe we can roughly * sample one channel. Thus, the more channels * are in the channel list the more time we need. */ int i = 1; /* find a power of 2 for the number of channels */ while (i < cmd->chanlist_len) i = i * 2; arg /= 8; min_arg = arg * i; } err |= comedi_check_trigger_arg_min(&cmd->scan_begin_arg, min_arg); /* calc the real sampling rate with the rounding errors */ arg = (cmd->scan_begin_arg / arg) * arg; err |= comedi_check_trigger_arg_is(&cmd->scan_begin_arg, arg); } err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len); if (cmd->stop_src == TRIG_COUNT) err |= comedi_check_trigger_arg_min(&cmd->stop_arg, 1); else /* TRIG_NONE */ err |= comedi_check_trigger_arg_is(&cmd->stop_arg, 0); if (err) return 3; return 0; } /* * creates the ADC command for the MAX1271 * range is the range value from comedi */ static u8 create_adc_command(unsigned int chan, unsigned int range) { u8 p = (range <= 1); u8 r = ((range % 2) == 0); return (chan << 4) | ((p == 1) << 2) | ((r == 1) << 3); } static int send_dux_commands(struct comedi_device *dev, unsigned int cmd_type) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbdux_private *devpriv = dev->private; int nsent; devpriv->dux_commands[0] = cmd_type; return usb_bulk_msg(usb, usb_sndbulkpipe(usb, 1), devpriv->dux_commands, SIZEOFDUXBUFFER, &nsent, BULK_TIMEOUT); } static int receive_dux_commands(struct comedi_device *dev, unsigned int command) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbdux_private *devpriv = dev->private; int ret; int nrec; int i; for (i = 0; i < RETRIES; i++) { ret = usb_bulk_msg(usb, usb_rcvbulkpipe(usb, 8), devpriv->insn_buf, SIZEINSNBUF, &nrec, BULK_TIMEOUT); if (ret < 0) return ret; if (le16_to_cpu(devpriv->insn_buf[0]) == command) return ret; } /* command not received */ return -EFAULT; } static int usbdux_ai_inttrig(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int trig_num) { struct usbdux_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; int ret; if (trig_num != cmd->start_arg) return -EINVAL; mutex_lock(&devpriv->mut); if (!devpriv->ai_cmd_running) { devpriv->ai_cmd_running = 1; ret = usbdux_submit_urbs(dev, devpriv->ai_urbs, devpriv->n_ai_urbs, 1); if (ret < 0) { devpriv->ai_cmd_running = 0; goto ai_trig_exit; } s->async->inttrig = NULL; } else { ret = -EBUSY; } ai_trig_exit: mutex_unlock(&devpriv->mut); return ret; } static int usbdux_ai_cmd(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbdux_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; int len = cmd->chanlist_len; int ret = -EBUSY; int i; /* block other CPUs from starting an ai_cmd */ mutex_lock(&devpriv->mut); if (devpriv->ai_cmd_running) goto ai_cmd_exit; devpriv->dux_commands[1] = len; for (i = 0; i < len; ++i) { unsigned int chan = CR_CHAN(cmd->chanlist[i]); unsigned int range = CR_RANGE(cmd->chanlist[i]); devpriv->dux_commands[i + 2] = create_adc_command(chan, range); } ret = send_dux_commands(dev, USBDUX_CMD_MULT_AI); if (ret < 0) goto ai_cmd_exit; if (devpriv->high_speed) { /* * every channel gets a time window of 125us. Thus, if we * sample all 8 channels we need 1ms. If we sample only one * channel we need only 125us */ devpriv->ai_interval = 1; /* find a power of 2 for the interval */ while (devpriv->ai_interval < len) devpriv->ai_interval *= 2; devpriv->ai_timer = cmd->scan_begin_arg / (125000 * devpriv->ai_interval); } else { /* interval always 1ms */ devpriv->ai_interval = 1; devpriv->ai_timer = cmd->scan_begin_arg / 1000000; } if (devpriv->ai_timer < 1) { ret = -EINVAL; goto ai_cmd_exit; } devpriv->ai_counter = devpriv->ai_timer; if (cmd->start_src == TRIG_NOW) { /* enable this acquisition operation */ devpriv->ai_cmd_running = 1; ret = usbdux_submit_urbs(dev, devpriv->ai_urbs, devpriv->n_ai_urbs, 1); if (ret < 0) { devpriv->ai_cmd_running = 0; /* fixme: unlink here?? */ goto ai_cmd_exit; } s->async->inttrig = NULL; } else { /* TRIG_INT */ /* don't enable the acquision operation */ /* wait for an internal signal */ s->async->inttrig = usbdux_ai_inttrig; } ai_cmd_exit: mutex_unlock(&devpriv->mut); return ret; } /* Mode 0 is used to get a single conversion on demand */ static int usbdux_ai_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbdux_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); unsigned int range = CR_RANGE(insn->chanspec); unsigned int val; int ret = -EBUSY; int i; mutex_lock(&devpriv->mut); if (devpriv->ai_cmd_running) goto ai_read_exit; /* set command for the first channel */ devpriv->dux_commands[1] = create_adc_command(chan, range); /* adc commands */ ret = send_dux_commands(dev, USBDUX_CMD_SINGLE_AI); if (ret < 0) goto ai_read_exit; for (i = 0; i < insn->n; i++) { ret = receive_dux_commands(dev, USBDUX_CMD_SINGLE_AI); if (ret < 0) goto ai_read_exit; val = le16_to_cpu(devpriv->insn_buf[1]); /* bipolar data is two's-complement */ if (comedi_range_is_bipolar(s, range)) val = comedi_offset_munge(s, val); data[i] = val; } ai_read_exit: mutex_unlock(&devpriv->mut); return ret ? ret : insn->n; } static int usbdux_ao_insn_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbdux_private *devpriv = dev->private; int ret; mutex_lock(&devpriv->mut); ret = comedi_readback_insn_read(dev, s, insn, data); mutex_unlock(&devpriv->mut); return ret; } static int usbdux_ao_insn_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbdux_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); __le16 *p = (__le16 *)&devpriv->dux_commands[2]; int ret = -EBUSY; int i; mutex_lock(&devpriv->mut); if (devpriv->ao_cmd_running) goto ao_write_exit; /* number of channels: 1 */ devpriv->dux_commands[1] = 1; /* channel number */ devpriv->dux_commands[4] = chan << 6; for (i = 0; i < insn->n; i++) { unsigned int val = data[i]; /* one 16 bit value */ *p = cpu_to_le16(val); ret = send_dux_commands(dev, USBDUX_CMD_AO); if (ret < 0) goto ao_write_exit; s->readback[chan] = val; } ao_write_exit: mutex_unlock(&devpriv->mut); return ret ? ret : insn->n; } static int usbdux_ao_inttrig(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int trig_num) { struct usbdux_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; int ret; if (trig_num != cmd->start_arg) return -EINVAL; mutex_lock(&devpriv->mut); if (!devpriv->ao_cmd_running) { devpriv->ao_cmd_running = 1; ret = usbdux_submit_urbs(dev, devpriv->ao_urbs, devpriv->n_ao_urbs, 0); if (ret < 0) { devpriv->ao_cmd_running = 0; goto ao_trig_exit; } s->async->inttrig = NULL; } else { ret = -EBUSY; } ao_trig_exit: mutex_unlock(&devpriv->mut); return ret; } static int usbdux_ao_cmdtest(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_cmd *cmd) { int err = 0; unsigned int flags; /* Step 1 : check if triggers are trivially valid */ err |= comedi_check_trigger_src(&cmd->start_src, TRIG_NOW | TRIG_INT); if (0) { /* (devpriv->high_speed) */ /* the sampling rate is set by the coversion rate */ flags = TRIG_FOLLOW; } else { /* start a new scan (output at once) with a timer */ flags = TRIG_TIMER; } err |= comedi_check_trigger_src(&cmd->scan_begin_src, flags); if (0) { /* (devpriv->high_speed) */ /* * in usb-2.0 only one conversion it transmitted * but with 8kHz/n */ flags = TRIG_TIMER; } else { /* * all conversion events happen simultaneously with * a rate of 1kHz/n */ flags = TRIG_NOW; } err |= comedi_check_trigger_src(&cmd->convert_src, flags); err |= comedi_check_trigger_src(&cmd->scan_end_src, TRIG_COUNT); err |= comedi_check_trigger_src(&cmd->stop_src, TRIG_COUNT | TRIG_NONE); if (err) return 1; /* Step 2a : make sure trigger sources are unique */ err |= comedi_check_trigger_is_unique(cmd->start_src); err |= comedi_check_trigger_is_unique(cmd->stop_src); /* Step 2b : and mutually compatible */ if (err) return 2; /* Step 3: check if arguments are trivially valid */ err |= comedi_check_trigger_arg_is(&cmd->start_arg, 0); if (cmd->scan_begin_src == TRIG_FOLLOW) /* internal trigger */ err |= comedi_check_trigger_arg_is(&cmd->scan_begin_arg, 0); if (cmd->scan_begin_src == TRIG_TIMER) { err |= comedi_check_trigger_arg_min(&cmd->scan_begin_arg, 1000000); } /* not used now, is for later use */ if (cmd->convert_src == TRIG_TIMER) err |= comedi_check_trigger_arg_min(&cmd->convert_arg, 125000); err |= comedi_check_trigger_arg_is(&cmd->scan_end_arg, cmd->chanlist_len); if (cmd->stop_src == TRIG_COUNT) err |= comedi_check_trigger_arg_min(&cmd->stop_arg, 1); else /* TRIG_NONE */ err |= comedi_check_trigger_arg_is(&cmd->stop_arg, 0); if (err) return 3; return 0; } static int usbdux_ao_cmd(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbdux_private *devpriv = dev->private; struct comedi_cmd *cmd = &s->async->cmd; int ret = -EBUSY; mutex_lock(&devpriv->mut); if (devpriv->ao_cmd_running) goto ao_cmd_exit; /* we count in steps of 1ms (125us) */ /* 125us mode not used yet */ if (0) { /* (devpriv->high_speed) */ /* 125us */ /* timing of the conversion itself: every 125 us */ devpriv->ao_timer = cmd->convert_arg / 125000; } else { /* 1ms */ /* timing of the scan: we get all channels at once */ devpriv->ao_timer = cmd->scan_begin_arg / 1000000; if (devpriv->ao_timer < 1) { ret = -EINVAL; goto ao_cmd_exit; } } devpriv->ao_counter = devpriv->ao_timer; if (cmd->start_src == TRIG_NOW) { /* enable this acquisition operation */ devpriv->ao_cmd_running = 1; ret = usbdux_submit_urbs(dev, devpriv->ao_urbs, devpriv->n_ao_urbs, 0); if (ret < 0) { devpriv->ao_cmd_running = 0; /* fixme: unlink here?? */ goto ao_cmd_exit; } s->async->inttrig = NULL; } else { /* TRIG_INT */ /* submit the urbs later */ /* wait for an internal signal */ s->async->inttrig = usbdux_ao_inttrig; } ao_cmd_exit: mutex_unlock(&devpriv->mut); return ret; } static int usbdux_dio_insn_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { int ret; ret = comedi_dio_insn_config(dev, s, insn, data, 0); if (ret) return ret; /* * We don't tell the firmware here as it would take 8 frames * to submit the information. We do it in the insn_bits. */ return insn->n; } static int usbdux_dio_insn_bits(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbdux_private *devpriv = dev->private; int ret; mutex_lock(&devpriv->mut); comedi_dio_update_state(s, data); /* Always update the hardware. See the (*insn_config). */ devpriv->dux_commands[1] = s->io_bits; devpriv->dux_commands[2] = s->state; /* * This command also tells the firmware to return * the digital input lines. */ ret = send_dux_commands(dev, USBDUX_CMD_DIO_BITS); if (ret < 0) goto dio_exit; ret = receive_dux_commands(dev, USBDUX_CMD_DIO_BITS); if (ret < 0) goto dio_exit; data[1] = le16_to_cpu(devpriv->insn_buf[1]); dio_exit: mutex_unlock(&devpriv->mut); return ret ? ret : insn->n; } static int usbdux_counter_read(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbdux_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); int ret = 0; int i; mutex_lock(&devpriv->mut); for (i = 0; i < insn->n; i++) { ret = send_dux_commands(dev, USBDUX_CMD_TIMER_RD); if (ret < 0) goto counter_read_exit; ret = receive_dux_commands(dev, USBDUX_CMD_TIMER_RD); if (ret < 0) goto counter_read_exit; data[i] = le16_to_cpu(devpriv->insn_buf[chan + 1]); } counter_read_exit: mutex_unlock(&devpriv->mut); return ret ? ret : insn->n; } static int usbdux_counter_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbdux_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); __le16 *p = (__le16 *)&devpriv->dux_commands[2]; int ret = 0; int i; mutex_lock(&devpriv->mut); devpriv->dux_commands[1] = chan; for (i = 0; i < insn->n; i++) { *p = cpu_to_le16(data[i]); ret = send_dux_commands(dev, USBDUX_CMD_TIMER_WR); if (ret < 0) break; } mutex_unlock(&devpriv->mut); return ret ? ret : insn->n; } static int usbdux_counter_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { /* nothing to do so far */ return 2; } static void usbduxsub_unlink_pwm_urbs(struct comedi_device *dev) { struct usbdux_private *devpriv = dev->private; usb_kill_urb(devpriv->pwm_urb); } static void usbdux_pwm_stop(struct comedi_device *dev, int do_unlink) { struct usbdux_private *devpriv = dev->private; if (do_unlink) usbduxsub_unlink_pwm_urbs(dev); devpriv->pwm_cmd_running = 0; } static int usbdux_pwm_cancel(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbdux_private *devpriv = dev->private; int ret; mutex_lock(&devpriv->mut); /* unlink only if it is really running */ usbdux_pwm_stop(dev, devpriv->pwm_cmd_running); ret = send_dux_commands(dev, USBDUX_CMD_PWM_OFF); mutex_unlock(&devpriv->mut); return ret; } static void usbduxsub_pwm_irq(struct urb *urb) { struct comedi_device *dev = urb->context; struct usbdux_private *devpriv = dev->private; int ret; switch (urb->status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -ECONNABORTED: /* * after an unlink command, unplug, ... etc * no unlink needed here. Already shutting down. */ if (devpriv->pwm_cmd_running) usbdux_pwm_stop(dev, 0); return; default: /* a real error */ if (devpriv->pwm_cmd_running) { dev_err(dev->class_dev, "Non-zero urb status received in pwm intr context: %d\n", urb->status); usbdux_pwm_stop(dev, 0); } return; } /* are we actually running? */ if (!devpriv->pwm_cmd_running) return; urb->transfer_buffer_length = devpriv->pwm_buf_sz; urb->dev = comedi_to_usb_dev(dev); urb->status = 0; if (devpriv->pwm_cmd_running) { ret = usb_submit_urb(urb, GFP_ATOMIC); if (ret < 0) { dev_err(dev->class_dev, "pwm urb resubm failed in int-cont. ret=%d", ret); if (ret == -EL2NSYNC) dev_err(dev->class_dev, "buggy USB host controller or bug in IRQ handling!\n"); /* don't do an unlink here */ usbdux_pwm_stop(dev, 0); } } } static int usbduxsub_submit_pwm_urbs(struct comedi_device *dev) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbdux_private *devpriv = dev->private; struct urb *urb = devpriv->pwm_urb; /* in case of a resubmission after an unlink... */ usb_fill_bulk_urb(urb, usb, usb_sndbulkpipe(usb, 4), urb->transfer_buffer, devpriv->pwm_buf_sz, usbduxsub_pwm_irq, dev); return usb_submit_urb(urb, GFP_ATOMIC); } static int usbdux_pwm_period(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int period) { struct usbdux_private *devpriv = dev->private; int fx2delay; if (period < MIN_PWM_PERIOD) return -EAGAIN; fx2delay = (period / (6 * 512 * 1000 / 33)) - 6; if (fx2delay > 255) return -EAGAIN; devpriv->pwm_delay = fx2delay; devpriv->pwm_period = period; return 0; } static int usbdux_pwm_start(struct comedi_device *dev, struct comedi_subdevice *s) { struct usbdux_private *devpriv = dev->private; int ret = 0; mutex_lock(&devpriv->mut); if (devpriv->pwm_cmd_running) goto pwm_start_exit; devpriv->dux_commands[1] = devpriv->pwm_delay; ret = send_dux_commands(dev, USBDUX_CMD_PWM_ON); if (ret < 0) goto pwm_start_exit; /* initialise the buffer */ memset(devpriv->pwm_urb->transfer_buffer, 0, devpriv->pwm_buf_sz); devpriv->pwm_cmd_running = 1; ret = usbduxsub_submit_pwm_urbs(dev); if (ret < 0) devpriv->pwm_cmd_running = 0; pwm_start_exit: mutex_unlock(&devpriv->mut); return ret; } static void usbdux_pwm_pattern(struct comedi_device *dev, struct comedi_subdevice *s, unsigned int chan, unsigned int value, unsigned int sign) { struct usbdux_private *devpriv = dev->private; char pwm_mask = (1 << chan); /* DIO bit for the PWM data */ char sgn_mask = (16 << chan); /* DIO bit for the sign */ char *buf = (char *)(devpriv->pwm_urb->transfer_buffer); int szbuf = devpriv->pwm_buf_sz; int i; for (i = 0; i < szbuf; i++) { char c = *buf; c &= ~pwm_mask; if (i < value) c |= pwm_mask; if (!sign) c &= ~sgn_mask; else c |= sgn_mask; *buf++ = c; } } static int usbdux_pwm_write(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { unsigned int chan = CR_CHAN(insn->chanspec); /* * It doesn't make sense to support more than one value here * because it would just overwrite the PWM buffer. */ if (insn->n != 1) return -EINVAL; /* * The sign is set via a special INSN only, this gives us 8 bits * for normal operation, sign is 0 by default. */ usbdux_pwm_pattern(dev, s, chan, data[0], 0); return insn->n; } static int usbdux_pwm_config(struct comedi_device *dev, struct comedi_subdevice *s, struct comedi_insn *insn, unsigned int *data) { struct usbdux_private *devpriv = dev->private; unsigned int chan = CR_CHAN(insn->chanspec); switch (data[0]) { case INSN_CONFIG_ARM: /* * if not zero the PWM is limited to a certain time which is * not supported here */ if (data[1] != 0) return -EINVAL; return usbdux_pwm_start(dev, s); case INSN_CONFIG_DISARM: return usbdux_pwm_cancel(dev, s); case INSN_CONFIG_GET_PWM_STATUS: data[1] = devpriv->pwm_cmd_running; return 0; case INSN_CONFIG_PWM_SET_PERIOD: return usbdux_pwm_period(dev, s, data[1]); case INSN_CONFIG_PWM_GET_PERIOD: data[1] = devpriv->pwm_period; return 0; case INSN_CONFIG_PWM_SET_H_BRIDGE: /* * data[1] = value * data[2] = sign (for a relay) */ usbdux_pwm_pattern(dev, s, chan, data[1], (data[2] != 0)); return 0; case INSN_CONFIG_PWM_GET_H_BRIDGE: /* values are not kept in this driver, nothing to return here */ return -EINVAL; } return -EINVAL; } static int usbdux_firmware_upload(struct comedi_device *dev, const u8 *data, size_t size, unsigned long context) { struct usb_device *usb = comedi_to_usb_dev(dev); u8 *buf; u8 *tmp; int ret; if (!data) return 0; if (size > USBDUX_FIRMWARE_MAX_LEN) { dev_err(dev->class_dev, "usbdux firmware binary it too large for FX2.\n"); return -ENOMEM; } /* we generate a local buffer for the firmware */ buf = kmemdup(data, size, GFP_KERNEL); if (!buf) return -ENOMEM; /* we need a malloc'ed buffer for usb_control_msg() */ tmp = kmalloc(1, GFP_KERNEL); if (!tmp) { kfree(buf); return -ENOMEM; } /* stop the current firmware on the device */ *tmp = 1; /* 7f92 to one */ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0), USBDUX_FIRMWARE_CMD, VENDOR_DIR_OUT, USBDUX_CPU_CS, 0x0000, tmp, 1, BULK_TIMEOUT); if (ret < 0) { dev_err(dev->class_dev, "can not stop firmware\n"); goto done; } /* upload the new firmware to the device */ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0), USBDUX_FIRMWARE_CMD, VENDOR_DIR_OUT, 0, 0x0000, buf, size, BULK_TIMEOUT); if (ret < 0) { dev_err(dev->class_dev, "firmware upload failed\n"); goto done; } /* start the new firmware on the device */ *tmp = 0; /* 7f92 to zero */ ret = usb_control_msg(usb, usb_sndctrlpipe(usb, 0), USBDUX_FIRMWARE_CMD, VENDOR_DIR_OUT, USBDUX_CPU_CS, 0x0000, tmp, 1, BULK_TIMEOUT); if (ret < 0) dev_err(dev->class_dev, "can not start firmware\n"); done: kfree(tmp); kfree(buf); return ret; } static int usbdux_alloc_usb_buffers(struct comedi_device *dev) { struct usb_device *usb = comedi_to_usb_dev(dev); struct usbdux_private *devpriv = dev->private; struct urb *urb; int i; devpriv->dux_commands = kzalloc(SIZEOFDUXBUFFER, GFP_KERNEL); devpriv->in_buf = kzalloc(SIZEINBUF, GFP_KERNEL); devpriv->insn_buf = kzalloc(SIZEINSNBUF, GFP_KERNEL); devpriv->ai_urbs = kcalloc(devpriv->n_ai_urbs, sizeof(void *), GFP_KERNEL); devpriv->ao_urbs = kcalloc(devpriv->n_ao_urbs, sizeof(void *), GFP_KERNEL); if (!devpriv->dux_commands || !devpriv->in_buf || !devpriv->insn_buf || !devpriv->ai_urbs || !devpriv->ao_urbs) return -ENOMEM; for (i = 0; i < devpriv->n_ai_urbs; i++) { /* one frame: 1ms */ urb = usb_alloc_urb(1, GFP_KERNEL); if (!urb) return -ENOMEM; devpriv->ai_urbs[i] = urb; urb->dev = usb; urb->context = dev; urb->pipe = usb_rcvisocpipe(usb, 6); urb->transfer_flags = URB_ISO_ASAP; urb->transfer_buffer = kzalloc(SIZEINBUF, GFP_KERNEL); if (!urb->transfer_buffer) return -ENOMEM; urb->complete = usbduxsub_ai_isoc_irq; urb->number_of_packets = 1; urb->transfer_buffer_length = SIZEINBUF; urb->iso_frame_desc[0].offset = 0; urb->iso_frame_desc[0].length = SIZEINBUF; } for (i = 0; i < devpriv->n_ao_urbs; i++) { /* one frame: 1ms */ urb = usb_alloc_urb(1, GFP_KERNEL); if (!urb) return -ENOMEM; devpriv->ao_urbs[i] = urb; urb->dev = usb; urb->context = dev; urb->pipe = usb_sndisocpipe(usb, 2); urb->transfer_flags = URB_ISO_ASAP; urb->transfer_buffer = kzalloc(SIZEOUTBUF, GFP_KERNEL); if (!urb->transfer_buffer) return -ENOMEM; urb->complete = usbduxsub_ao_isoc_irq; urb->number_of_packets = 1; urb->transfer_buffer_length = SIZEOUTBUF; urb->iso_frame_desc[0].offset = 0; urb->iso_frame_desc[0].length = SIZEOUTBUF; if (devpriv->high_speed) urb->interval = 8; /* uframes */ else urb->interval = 1; /* frames */ } /* pwm */ if (devpriv->pwm_buf_sz) { urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) return -ENOMEM; devpriv->pwm_urb = urb; /* max bulk ep size in high speed */ urb->transfer_buffer = kzalloc(devpriv->pwm_buf_sz, GFP_KERNEL); if (!urb->transfer_buffer) return -ENOMEM; } return 0; } static void usbdux_free_usb_buffers(struct comedi_device *dev) { struct usbdux_private *devpriv = dev->private; struct urb *urb; int i; urb = devpriv->pwm_urb; if (urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } if (devpriv->ao_urbs) { for (i = 0; i < devpriv->n_ao_urbs; i++) { urb = devpriv->ao_urbs[i]; if (urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } } kfree(devpriv->ao_urbs); } if (devpriv->ai_urbs) { for (i = 0; i < devpriv->n_ai_urbs; i++) { urb = devpriv->ai_urbs[i]; if (urb) { kfree(urb->transfer_buffer); usb_free_urb(urb); } } kfree(devpriv->ai_urbs); } kfree(devpriv->insn_buf); kfree(devpriv->in_buf); kfree(devpriv->dux_commands); } static int usbdux_auto_attach(struct comedi_device *dev, unsigned long context_unused) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct usb_device *usb = comedi_to_usb_dev(dev); struct usbdux_private *devpriv; struct comedi_subdevice *s; int ret; devpriv = comedi_alloc_devpriv(dev, sizeof(*devpriv)); if (!devpriv) return -ENOMEM; mutex_init(&devpriv->mut); usb_set_intfdata(intf, devpriv); devpriv->high_speed = (usb->speed == USB_SPEED_HIGH); if (devpriv->high_speed) { devpriv->n_ai_urbs = NUMOFINBUFFERSHIGH; devpriv->n_ao_urbs = NUMOFOUTBUFFERSHIGH; devpriv->pwm_buf_sz = 512; } else { devpriv->n_ai_urbs = NUMOFINBUFFERSFULL; devpriv->n_ao_urbs = NUMOFOUTBUFFERSFULL; } ret = usbdux_alloc_usb_buffers(dev); if (ret) return ret; /* setting to alternate setting 3: enabling iso ep and bulk ep. */ ret = usb_set_interface(usb, intf->altsetting->desc.bInterfaceNumber, 3); if (ret < 0) { dev_err(dev->class_dev, "could not set alternate setting 3 in high speed\n"); return ret; } ret = comedi_load_firmware(dev, &usb->dev, USBDUX_FIRMWARE, usbdux_firmware_upload, 0); if (ret < 0) return ret; ret = comedi_alloc_subdevices(dev, (devpriv->high_speed) ? 5 : 4); if (ret) return ret; /* Analog Input subdevice */ s = &dev->subdevices[0]; dev->read_subdev = s; s->type = COMEDI_SUBD_AI; s->subdev_flags = SDF_READABLE | SDF_GROUND | SDF_CMD_READ; s->n_chan = 8; s->maxdata = 0x0fff; s->len_chanlist = 8; s->range_table = &range_usbdux_ai_range; s->insn_read = usbdux_ai_insn_read; s->do_cmdtest = usbdux_ai_cmdtest; s->do_cmd = usbdux_ai_cmd; s->cancel = usbdux_ai_cancel; /* Analog Output subdevice */ s = &dev->subdevices[1]; dev->write_subdev = s; s->type = COMEDI_SUBD_AO; s->subdev_flags = SDF_WRITABLE | SDF_GROUND | SDF_CMD_WRITE; s->n_chan = 4; s->maxdata = 0x0fff; s->len_chanlist = s->n_chan; s->range_table = &range_usbdux_ao_range; s->do_cmdtest = usbdux_ao_cmdtest; s->do_cmd = usbdux_ao_cmd; s->cancel = usbdux_ao_cancel; s->insn_read = usbdux_ao_insn_read; s->insn_write = usbdux_ao_insn_write; ret = comedi_alloc_subdev_readback(s); if (ret) return ret; /* Digital I/O subdevice */ s = &dev->subdevices[2]; s->type = COMEDI_SUBD_DIO; s->subdev_flags = SDF_READABLE | SDF_WRITABLE; s->n_chan = 8; s->maxdata = 1; s->range_table = &range_digital; s->insn_bits = usbdux_dio_insn_bits; s->insn_config = usbdux_dio_insn_config; /* Counter subdevice */ s = &dev->subdevices[3]; s->type = COMEDI_SUBD_COUNTER; s->subdev_flags = SDF_WRITABLE | SDF_READABLE; s->n_chan = 4; s->maxdata = 0xffff; s->insn_read = usbdux_counter_read; s->insn_write = usbdux_counter_write; s->insn_config = usbdux_counter_config; if (devpriv->high_speed) { /* PWM subdevice */ s = &dev->subdevices[4]; s->type = COMEDI_SUBD_PWM; s->subdev_flags = SDF_WRITABLE | SDF_PWM_HBRIDGE; s->n_chan = 8; s->maxdata = devpriv->pwm_buf_sz; s->insn_write = usbdux_pwm_write; s->insn_config = usbdux_pwm_config; usbdux_pwm_period(dev, s, PWM_DEFAULT_PERIOD); } return 0; } static void usbdux_detach(struct comedi_device *dev) { struct usb_interface *intf = comedi_to_usb_interface(dev); struct usbdux_private *devpriv = dev->private; usb_set_intfdata(intf, NULL); if (!devpriv) return; mutex_lock(&devpriv->mut); /* force unlink all urbs */ usbdux_pwm_stop(dev, 1); usbdux_ao_stop(dev, 1); usbdux_ai_stop(dev, 1); usbdux_free_usb_buffers(dev); mutex_unlock(&devpriv->mut); mutex_destroy(&devpriv->mut); } static struct comedi_driver usbdux_driver = { .driver_name = "usbdux", .module = THIS_MODULE, .auto_attach = usbdux_auto_attach, .detach = usbdux_detach, }; static int usbdux_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { return comedi_usb_auto_config(intf, &usbdux_driver, 0); } static const struct usb_device_id usbdux_usb_table[] = { { USB_DEVICE(0x13d8, 0x0001) }, { USB_DEVICE(0x13d8, 0x0002) }, { } }; MODULE_DEVICE_TABLE(usb, usbdux_usb_table); static struct usb_driver usbdux_usb_driver = { .name = "usbdux", .probe = usbdux_usb_probe, .disconnect = comedi_usb_auto_unconfig, .id_table = usbdux_usb_table, }; module_comedi_usb_driver(usbdux_driver, usbdux_usb_driver); MODULE_AUTHOR("Bernd Porr, BerndPorr@f2s.com"); MODULE_DESCRIPTION("Stirling/ITL USB-DUX -- Bernd.Porr@f2s.com"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(USBDUX_FIRMWARE);
13 1 1 2 1 1 2 18 18 18 10 3 5 14 8 2 12 3 2 8 4 1 1 1 1 16 16 10 2 2 18 11 11 9 1 8 9 9 9 4 5 1 1 2 16 4 2 1 3 5 4 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2017 Red Hat, Inc. */ #include "fuse_i.h" #include <linux/uio.h> #include <linux/compat.h> #include <linux/fileattr.h> #include <linux/fsverity.h> static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args, struct fuse_ioctl_out *outarg) { ssize_t ret; args->out_args[0].size = sizeof(*outarg); args->out_args[0].value = outarg; ret = fuse_simple_request(fm, args); /* Translate ENOSYS, which shouldn't be returned from fs */ if (ret == -ENOSYS) ret = -ENOTTY; if (ret >= 0 && outarg->result == -ENOSYS) outarg->result = -ENOTTY; return ret; } /* * CUSE servers compiled on 32bit broke on 64bit kernels because the * ABI was defined to be 'struct iovec' which is different on 32bit * and 64bit. Fortunately we can determine which structure the server * used from the size of the reply. */ static int fuse_copy_ioctl_iovec_old(struct iovec *dst, void *src, size_t transferred, unsigned count, bool is_compat) { #ifdef CONFIG_COMPAT if (count * sizeof(struct compat_iovec) == transferred) { struct compat_iovec *ciov = src; unsigned i; /* * With this interface a 32bit server cannot support * non-compat (i.e. ones coming from 64bit apps) ioctl * requests */ if (!is_compat) return -EINVAL; for (i = 0; i < count; i++) { dst[i].iov_base = compat_ptr(ciov[i].iov_base); dst[i].iov_len = ciov[i].iov_len; } return 0; } #endif if (count * sizeof(struct iovec) != transferred) return -EIO; memcpy(dst, src, transferred); return 0; } /* Make sure iov_length() won't overflow */ static int fuse_verify_ioctl_iov(struct fuse_conn *fc, struct iovec *iov, size_t count) { size_t n; u32 max = fc->max_pages << PAGE_SHIFT; for (n = 0; n < count; n++, iov++) { if (iov->iov_len > (size_t) max) return -ENOMEM; max -= iov->iov_len; } return 0; } static int fuse_copy_ioctl_iovec(struct fuse_conn *fc, struct iovec *dst, void *src, size_t transferred, unsigned count, bool is_compat) { unsigned i; struct fuse_ioctl_iovec *fiov = src; if (fc->minor < 16) { return fuse_copy_ioctl_iovec_old(dst, src, transferred, count, is_compat); } if (count * sizeof(struct fuse_ioctl_iovec) != transferred) return -EIO; for (i = 0; i < count; i++) { /* Did the server supply an inappropriate value? */ if (fiov[i].base != (unsigned long) fiov[i].base || fiov[i].len != (unsigned long) fiov[i].len) return -EIO; dst[i].iov_base = (void __user *) (unsigned long) fiov[i].base; dst[i].iov_len = (size_t) fiov[i].len; #ifdef CONFIG_COMPAT if (is_compat && (ptr_to_compat(dst[i].iov_base) != fiov[i].base || (compat_size_t) dst[i].iov_len != fiov[i].len)) return -EIO; #endif } return 0; } /* For fs-verity, determine iov lengths from input */ static int fuse_setup_measure_verity(unsigned long arg, struct iovec *iov) { __u16 digest_size; struct fsverity_digest __user *uarg = (void __user *)arg; if (copy_from_user(&digest_size, &uarg->digest_size, sizeof(digest_size))) return -EFAULT; if (digest_size > SIZE_MAX - sizeof(struct fsverity_digest)) return -EINVAL; iov->iov_len = sizeof(struct fsverity_digest) + digest_size; return 0; } static int fuse_setup_enable_verity(unsigned long arg, struct iovec *iov, unsigned int *in_iovs) { struct fsverity_enable_arg enable; struct fsverity_enable_arg __user *uarg = (void __user *)arg; const __u32 max_buffer_len = FUSE_MAX_MAX_PAGES * PAGE_SIZE; if (copy_from_user(&enable, uarg, sizeof(enable))) return -EFAULT; if (enable.salt_size > max_buffer_len || enable.sig_size > max_buffer_len) return -ENOMEM; if (enable.salt_size > 0) { iov++; (*in_iovs)++; iov->iov_base = u64_to_user_ptr(enable.salt_ptr); iov->iov_len = enable.salt_size; } if (enable.sig_size > 0) { iov++; (*in_iovs)++; iov->iov_base = u64_to_user_ptr(enable.sig_ptr); iov->iov_len = enable.sig_size; } return 0; } /* * For ioctls, there is no generic way to determine how much memory * needs to be read and/or written. Furthermore, ioctls are allowed * to dereference the passed pointer, so the parameter requires deep * copying but FUSE has no idea whatsoever about what to copy in or * out. * * This is solved by allowing FUSE server to retry ioctl with * necessary in/out iovecs. Let's assume the ioctl implementation * needs to read in the following structure. * * struct a { * char *buf; * size_t buflen; * } * * On the first callout to FUSE server, inarg->in_size and * inarg->out_size will be NULL; then, the server completes the ioctl * with FUSE_IOCTL_RETRY set in out->flags, out->in_iovs set to 1 and * the actual iov array to * * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) } } * * which tells FUSE to copy in the requested area and retry the ioctl. * On the second round, the server has access to the structure and * from that it can tell what to look for next, so on the invocation, * it sets FUSE_IOCTL_RETRY, out->in_iovs to 2 and iov array to * * { { .iov_base = inarg.arg, .iov_len = sizeof(struct a) }, * { .iov_base = a.buf, .iov_len = a.buflen } } * * FUSE will copy both struct a and the pointed buffer from the * process doing the ioctl and retry ioctl with both struct a and the * buffer. * * This time, FUSE server has everything it needs and completes ioctl * without FUSE_IOCTL_RETRY which finishes the ioctl call. * * Copying data out works the same way. * * Note that if FUSE_IOCTL_UNRESTRICTED is clear, the kernel * automatically initializes in and out iovs by decoding @cmd with * _IOC_* macros and the server is not allowed to request RETRY. This * limits ioctl data transfers to well-formed ioctls and is the forced * behavior for all FUSE servers. */ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags) { struct fuse_file *ff = file->private_data; struct fuse_mount *fm = ff->fm; struct fuse_ioctl_in inarg = { .fh = ff->fh, .cmd = cmd, .arg = arg, .flags = flags }; struct fuse_ioctl_out outarg; struct iovec *iov_page = NULL; struct iovec *in_iov = NULL, *out_iov = NULL; unsigned int in_iovs = 0, out_iovs = 0, max_pages; size_t in_size, out_size, c; ssize_t transferred; int err, i; struct iov_iter ii; struct fuse_args_pages ap = {}; #if BITS_PER_LONG == 32 inarg.flags |= FUSE_IOCTL_32BIT; #else if (flags & FUSE_IOCTL_COMPAT) { inarg.flags |= FUSE_IOCTL_32BIT; #ifdef CONFIG_X86_X32_ABI if (in_x32_syscall()) inarg.flags |= FUSE_IOCTL_COMPAT_X32; #endif } #endif /* assume all the iovs returned by client always fits in a page */ BUILD_BUG_ON(sizeof(struct fuse_ioctl_iovec) * FUSE_IOCTL_MAX_IOV > PAGE_SIZE); err = -ENOMEM; ap.pages = fuse_pages_alloc(fm->fc->max_pages, GFP_KERNEL, &ap.descs); iov_page = (struct iovec *) __get_free_page(GFP_KERNEL); if (!ap.pages || !iov_page) goto out; fuse_page_descs_length_init(ap.descs, 0, fm->fc->max_pages); /* * If restricted, initialize IO parameters as encoded in @cmd. * RETRY from server is not allowed. */ if (!(flags & FUSE_IOCTL_UNRESTRICTED)) { struct iovec *iov = iov_page; iov->iov_base = (void __user *)arg; iov->iov_len = _IOC_SIZE(cmd); if (_IOC_DIR(cmd) & _IOC_WRITE) { in_iov = iov; in_iovs = 1; } if (_IOC_DIR(cmd) & _IOC_READ) { out_iov = iov; out_iovs = 1; } err = 0; switch (cmd) { case FS_IOC_MEASURE_VERITY: err = fuse_setup_measure_verity(arg, iov); break; case FS_IOC_ENABLE_VERITY: err = fuse_setup_enable_verity(arg, iov, &in_iovs); break; } if (err) goto out; } retry: inarg.in_size = in_size = iov_length(in_iov, in_iovs); inarg.out_size = out_size = iov_length(out_iov, out_iovs); /* * Out data can be used either for actual out data or iovs, * make sure there always is at least one page. */ out_size = max_t(size_t, out_size, PAGE_SIZE); max_pages = DIV_ROUND_UP(max(in_size, out_size), PAGE_SIZE); /* make sure there are enough buffer pages and init request with them */ err = -ENOMEM; if (max_pages > fm->fc->max_pages) goto out; while (ap.num_pages < max_pages) { ap.pages[ap.num_pages] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); if (!ap.pages[ap.num_pages]) goto out; ap.num_pages++; } /* okay, let's send it to the client */ ap.args.opcode = FUSE_IOCTL; ap.args.nodeid = ff->nodeid; ap.args.in_numargs = 1; ap.args.in_args[0].size = sizeof(inarg); ap.args.in_args[0].value = &inarg; if (in_size) { ap.args.in_numargs++; ap.args.in_args[1].size = in_size; ap.args.in_pages = true; err = -EFAULT; iov_iter_init(&ii, ITER_SOURCE, in_iov, in_iovs, in_size); for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { c = copy_page_from_iter(ap.pages[i], 0, PAGE_SIZE, &ii); if (c != PAGE_SIZE && iov_iter_count(&ii)) goto out; } } ap.args.out_numargs = 2; ap.args.out_args[1].size = out_size; ap.args.out_pages = true; ap.args.out_argvar = true; transferred = fuse_send_ioctl(fm, &ap.args, &outarg); err = transferred; if (transferred < 0) goto out; /* did it ask for retry? */ if (outarg.flags & FUSE_IOCTL_RETRY) { void *vaddr; /* no retry if in restricted mode */ err = -EIO; if (!(flags & FUSE_IOCTL_UNRESTRICTED)) goto out; in_iovs = outarg.in_iovs; out_iovs = outarg.out_iovs; /* * Make sure things are in boundary, separate checks * are to protect against overflow. */ err = -ENOMEM; if (in_iovs > FUSE_IOCTL_MAX_IOV || out_iovs > FUSE_IOCTL_MAX_IOV || in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV) goto out; vaddr = kmap_local_page(ap.pages[0]); err = fuse_copy_ioctl_iovec(fm->fc, iov_page, vaddr, transferred, in_iovs + out_iovs, (flags & FUSE_IOCTL_COMPAT) != 0); kunmap_local(vaddr); if (err) goto out; in_iov = iov_page; out_iov = in_iov + in_iovs; err = fuse_verify_ioctl_iov(fm->fc, in_iov, in_iovs); if (err) goto out; err = fuse_verify_ioctl_iov(fm->fc, out_iov, out_iovs); if (err) goto out; goto retry; } err = -EIO; if (transferred > inarg.out_size) goto out; err = -EFAULT; iov_iter_init(&ii, ITER_DEST, out_iov, out_iovs, transferred); for (i = 0; iov_iter_count(&ii) && !WARN_ON(i >= ap.num_pages); i++) { c = copy_page_to_iter(ap.pages[i], 0, PAGE_SIZE, &ii); if (c != PAGE_SIZE && iov_iter_count(&ii)) goto out; } err = 0; out: free_page((unsigned long) iov_page); while (ap.num_pages) __free_page(ap.pages[--ap.num_pages]); kfree(ap.pages); return err ? err : outarg.result; } EXPORT_SYMBOL_GPL(fuse_do_ioctl); long fuse_ioctl_common(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags) { struct inode *inode = file_inode(file); struct fuse_conn *fc = get_fuse_conn(inode); if (!fuse_allow_current_process(fc)) return -EACCES; if (fuse_is_bad(inode)) return -EIO; return fuse_do_ioctl(file, cmd, arg, flags); } long fuse_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return fuse_ioctl_common(file, cmd, arg, 0); } long fuse_file_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return fuse_ioctl_common(file, cmd, arg, FUSE_IOCTL_COMPAT); } static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff, unsigned int cmd, void *ptr, size_t size) { struct fuse_mount *fm = ff->fm; struct fuse_ioctl_in inarg; struct fuse_ioctl_out outarg; FUSE_ARGS(args); int err; memset(&inarg, 0, sizeof(inarg)); inarg.fh = ff->fh; inarg.cmd = cmd; #if BITS_PER_LONG == 32 inarg.flags |= FUSE_IOCTL_32BIT; #endif if (S_ISDIR(inode->i_mode)) inarg.flags |= FUSE_IOCTL_DIR; if (_IOC_DIR(cmd) & _IOC_READ) inarg.out_size = size; if (_IOC_DIR(cmd) & _IOC_WRITE) inarg.in_size = size; args.opcode = FUSE_IOCTL; args.nodeid = ff->nodeid; args.in_numargs = 2; args.in_args[0].size = sizeof(inarg); args.in_args[0].value = &inarg; args.in_args[1].size = inarg.in_size; args.in_args[1].value = ptr; args.out_numargs = 2; args.out_args[1].size = inarg.out_size; args.out_args[1].value = ptr; err = fuse_send_ioctl(fm, &args, &outarg); if (!err) { if (outarg.result < 0) err = outarg.result; else if (outarg.flags & FUSE_IOCTL_RETRY) err = -EIO; } return err; } static struct fuse_file *fuse_priv_ioctl_prepare(struct inode *inode) { struct fuse_mount *fm = get_fuse_mount(inode); bool isdir = S_ISDIR(inode->i_mode); if (!fuse_allow_current_process(fm->fc)) return ERR_PTR(-EACCES); if (fuse_is_bad(inode)) return ERR_PTR(-EIO); if (!S_ISREG(inode->i_mode) && !isdir) return ERR_PTR(-ENOTTY); return fuse_file_open(fm, get_node_id(inode), O_RDONLY, isdir); } static void fuse_priv_ioctl_cleanup(struct inode *inode, struct fuse_file *ff) { fuse_file_release(inode, ff, O_RDONLY, NULL, S_ISDIR(inode->i_mode)); } int fuse_fileattr_get(struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct fuse_file *ff; unsigned int flags; struct fsxattr xfa; int err; ff = fuse_priv_ioctl_prepare(inode); if (IS_ERR(ff)) return PTR_ERR(ff); if (fa->flags_valid) { err = fuse_priv_ioctl(inode, ff, FS_IOC_GETFLAGS, &flags, sizeof(flags)); if (err) goto cleanup; fileattr_fill_flags(fa, flags); } else { err = fuse_priv_ioctl(inode, ff, FS_IOC_FSGETXATTR, &xfa, sizeof(xfa)); if (err) goto cleanup; fileattr_fill_xflags(fa, xfa.fsx_xflags); fa->fsx_extsize = xfa.fsx_extsize; fa->fsx_nextents = xfa.fsx_nextents; fa->fsx_projid = xfa.fsx_projid; fa->fsx_cowextsize = xfa.fsx_cowextsize; } cleanup: fuse_priv_ioctl_cleanup(inode, ff); return err; } int fuse_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry, struct fileattr *fa) { struct inode *inode = d_inode(dentry); struct fuse_file *ff; unsigned int flags = fa->flags; struct fsxattr xfa; int err; ff = fuse_priv_ioctl_prepare(inode); if (IS_ERR(ff)) return PTR_ERR(ff); if (fa->flags_valid) { err = fuse_priv_ioctl(inode, ff, FS_IOC_SETFLAGS, &flags, sizeof(flags)); if (err) goto cleanup; } else { memset(&xfa, 0, sizeof(xfa)); xfa.fsx_xflags = fa->fsx_xflags; xfa.fsx_extsize = fa->fsx_extsize; xfa.fsx_nextents = fa->fsx_nextents; xfa.fsx_projid = fa->fsx_projid; xfa.fsx_cowextsize = fa->fsx_cowextsize; err = fuse_priv_ioctl(inode, ff, FS_IOC_FSSETXATTR, &xfa, sizeof(xfa)); } cleanup: fuse_priv_ioctl_cleanup(inode, ff); return err; }
2 16 18 18 18 18 2 18 28 25 32 21 7 41 41 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 /* * Linear conversion Plug-In * Copyright (c) 1999 by Jaroslav Kysela <perex@perex.cz>, * Abramo Bagnara <abramo@alsa-project.org> * * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include <linux/time.h> #include <sound/core.h> #include <sound/pcm.h> #include "pcm_plugin.h" /* * Basic linear conversion plugin */ struct linear_priv { int cvt_endian; /* need endian conversion? */ unsigned int src_ofs; /* byte offset in source format */ unsigned int dst_ofs; /* byte soffset in destination format */ unsigned int copy_ofs; /* byte offset in temporary u32 data */ unsigned int dst_bytes; /* byte size of destination format */ unsigned int copy_bytes; /* bytes to copy per conversion */ unsigned int flip; /* MSB flip for signeness, done after endian conv */ }; static inline void do_convert(struct linear_priv *data, unsigned char *dst, unsigned char *src) { unsigned int tmp = 0; unsigned char *p = (unsigned char *)&tmp; memcpy(p + data->copy_ofs, src + data->src_ofs, data->copy_bytes); if (data->cvt_endian) tmp = swab32(tmp); tmp ^= data->flip; memcpy(dst, p + data->dst_ofs, data->dst_bytes); } static void convert(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { struct linear_priv *data = (struct linear_priv *)plugin->extra_data; int channel; int nchannels = plugin->src_format.channels; for (channel = 0; channel < nchannels; ++channel) { char *src; char *dst; int src_step, dst_step; snd_pcm_uframes_t frames1; if (!src_channels[channel].enabled) { if (dst_channels[channel].wanted) snd_pcm_area_silence(&dst_channels[channel].area, 0, frames, plugin->dst_format.format); dst_channels[channel].enabled = 0; continue; } dst_channels[channel].enabled = 1; src = src_channels[channel].area.addr + src_channels[channel].area.first / 8; dst = dst_channels[channel].area.addr + dst_channels[channel].area.first / 8; src_step = src_channels[channel].area.step / 8; dst_step = dst_channels[channel].area.step / 8; frames1 = frames; while (frames1-- > 0) { do_convert(data, dst, src); src += src_step; dst += dst_step; } } } static snd_pcm_sframes_t linear_transfer(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { if (snd_BUG_ON(!plugin || !src_channels || !dst_channels)) return -ENXIO; if (frames == 0) return 0; #ifdef CONFIG_SND_DEBUG { unsigned int channel; for (channel = 0; channel < plugin->src_format.channels; channel++) { if (snd_BUG_ON(src_channels[channel].area.first % 8 || src_channels[channel].area.step % 8)) return -ENXIO; if (snd_BUG_ON(dst_channels[channel].area.first % 8 || dst_channels[channel].area.step % 8)) return -ENXIO; } } #endif if (frames > dst_channels[0].frames) frames = dst_channels[0].frames; convert(plugin, src_channels, dst_channels, frames); return frames; } static void init_data(struct linear_priv *data, snd_pcm_format_t src_format, snd_pcm_format_t dst_format) { int src_le, dst_le, src_bytes, dst_bytes; src_bytes = snd_pcm_format_width(src_format) / 8; dst_bytes = snd_pcm_format_width(dst_format) / 8; src_le = snd_pcm_format_little_endian(src_format) > 0; dst_le = snd_pcm_format_little_endian(dst_format) > 0; data->dst_bytes = dst_bytes; data->cvt_endian = src_le != dst_le; data->copy_bytes = src_bytes < dst_bytes ? src_bytes : dst_bytes; if (src_le) { data->copy_ofs = 4 - data->copy_bytes; data->src_ofs = src_bytes - data->copy_bytes; } else data->src_ofs = snd_pcm_format_physical_width(src_format) / 8 - src_bytes; if (dst_le) data->dst_ofs = 4 - data->dst_bytes; else data->dst_ofs = snd_pcm_format_physical_width(dst_format) / 8 - dst_bytes; if (snd_pcm_format_signed(src_format) != snd_pcm_format_signed(dst_format)) { if (dst_le) data->flip = (__force u32)cpu_to_le32(0x80000000); else data->flip = (__force u32)cpu_to_be32(0x80000000); } } int snd_pcm_plugin_build_linear(struct snd_pcm_substream *plug, struct snd_pcm_plugin_format *src_format, struct snd_pcm_plugin_format *dst_format, struct snd_pcm_plugin **r_plugin) { int err; struct linear_priv *data; struct snd_pcm_plugin *plugin; if (snd_BUG_ON(!r_plugin)) return -ENXIO; *r_plugin = NULL; if (snd_BUG_ON(src_format->rate != dst_format->rate)) return -ENXIO; if (snd_BUG_ON(src_format->channels != dst_format->channels)) return -ENXIO; if (snd_BUG_ON(!snd_pcm_format_linear(src_format->format) || !snd_pcm_format_linear(dst_format->format))) return -ENXIO; err = snd_pcm_plugin_build(plug, "linear format conversion", src_format, dst_format, sizeof(struct linear_priv), &plugin); if (err < 0) return err; data = (struct linear_priv *)plugin->extra_data; init_data(data, src_format->format, dst_format->format); plugin->transfer = linear_transfer; *r_plugin = plugin; return 0; }
6 6 6 5 1 4 2 1 1 2 2 12 1 2 7 5 3 6 1 5 5 5 5 5 5 5 5 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 // SPDX-License-Identifier: GPL-2.0-or-later /****************************************************************************** * usbtouchscreen.c * Driver for USB Touchscreens, supporting those devices: * - eGalax Touchkit * includes eTurboTouch CT-410/510/700 * - 3M/Microtouch EX II series * - ITM * - PanJit TouchSet * - eTurboTouch * - Gunze AHL61 * - DMC TSC-10/25 * - IRTOUCHSYSTEMS/UNITOP * - IdealTEK URTC1000 * - General Touch * - GoTop Super_Q2/GogoPen/PenPower tablets * - JASTEC USB touch controller/DigiTech DTR-02U * - Zytronic capacitive touchscreen * - NEXIO/iNexio * - Elo TouchSystems 2700 IntelliTouch * - EasyTouch USB Dual/Multi touch controller from Data Modul * * Copyright (C) 2004-2007 by Daniel Ritz <daniel.ritz@gmx.ch> * Copyright (C) by Todd E. Johnson (mtouchusb.c) * * Driver is based on touchkitusb.c * - ITM parts are from itmtouch.c * - 3M parts are from mtouchusb.c * - PanJit parts are from an unmerged driver by Lanslott Gish * - DMC TSC 10/25 are from Holger Schurig, with ideas from an unmerged * driver from Marius Vollmer * *****************************************************************************/ //#define DEBUG #include <linux/kernel.h> #include <linux/slab.h> #include <linux/input.h> #include <linux/module.h> #include <linux/usb.h> #include <linux/usb/input.h> #include <linux/hid.h> #include <linux/mutex.h> static bool swap_xy; module_param(swap_xy, bool, 0644); MODULE_PARM_DESC(swap_xy, "If set X and Y axes are swapped."); static bool hwcalib_xy; module_param(hwcalib_xy, bool, 0644); MODULE_PARM_DESC(hwcalib_xy, "If set hw-calibrated X/Y are used if available"); /* device specifc data/functions */ struct usbtouch_usb; struct usbtouch_device_info { int min_xc, max_xc; int min_yc, max_yc; int min_press, max_press; int rept_size; /* * Always service the USB devices irq not just when the input device is * open. This is useful when devices have a watchdog which prevents us * from periodically polling the device. Leave this unset unless your * touchscreen device requires it, as it does consume more of the USB * bandwidth. */ bool irq_always; void (*process_pkt) (struct usbtouch_usb *usbtouch, unsigned char *pkt, int len); /* * used to get the packet len. possible return values: * > 0: packet len * = 0: skip one byte * < 0: -return value more bytes needed */ int (*get_pkt_len) (unsigned char *pkt, int len); int (*read_data) (struct usbtouch_usb *usbtouch, unsigned char *pkt); int (*alloc) (struct usbtouch_usb *usbtouch); int (*init) (struct usbtouch_usb *usbtouch); void (*exit) (struct usbtouch_usb *usbtouch); }; /* a usbtouch device */ struct usbtouch_usb { unsigned char *data; dma_addr_t data_dma; int data_size; unsigned char *buffer; int buf_len; struct urb *irq; struct usb_interface *interface; struct input_dev *input; struct usbtouch_device_info *type; struct mutex pm_mutex; /* serialize access to open/suspend */ bool is_open; char name[128]; char phys[64]; void *priv; int x, y; int touch, press; }; /* device types */ enum { DEVTYPE_IGNORE = -1, DEVTYPE_EGALAX, DEVTYPE_PANJIT, DEVTYPE_3M, DEVTYPE_ITM, DEVTYPE_ETURBO, DEVTYPE_GUNZE, DEVTYPE_DMC_TSC10, DEVTYPE_IRTOUCH, DEVTYPE_IRTOUCH_HIRES, DEVTYPE_IDEALTEK, DEVTYPE_GENERAL_TOUCH, DEVTYPE_GOTOP, DEVTYPE_JASTEC, DEVTYPE_E2I, DEVTYPE_ZYTRONIC, DEVTYPE_TC45USB, DEVTYPE_NEXIO, DEVTYPE_ELO, DEVTYPE_ETOUCH, }; #define USB_DEVICE_HID_CLASS(vend, prod) \ .match_flags = USB_DEVICE_ID_MATCH_INT_CLASS \ | USB_DEVICE_ID_MATCH_DEVICE, \ .idVendor = (vend), \ .idProduct = (prod), \ .bInterfaceClass = USB_INTERFACE_CLASS_HID static const struct usb_device_id usbtouch_devices[] = { #ifdef CONFIG_TOUCHSCREEN_USB_EGALAX /* ignore the HID capable devices, handled by usbhid */ {USB_DEVICE_HID_CLASS(0x0eef, 0x0001), .driver_info = DEVTYPE_IGNORE}, {USB_DEVICE_HID_CLASS(0x0eef, 0x0002), .driver_info = DEVTYPE_IGNORE}, /* normal device IDs */ {USB_DEVICE(0x3823, 0x0001), .driver_info = DEVTYPE_EGALAX}, {USB_DEVICE(0x3823, 0x0002), .driver_info = DEVTYPE_EGALAX}, {USB_DEVICE(0x0123, 0x0001), .driver_info = DEVTYPE_EGALAX}, {USB_DEVICE(0x0eef, 0x0001), .driver_info = DEVTYPE_EGALAX}, {USB_DEVICE(0x0eef, 0x0002), .driver_info = DEVTYPE_EGALAX}, {USB_DEVICE(0x1234, 0x0001), .driver_info = DEVTYPE_EGALAX}, {USB_DEVICE(0x1234, 0x0002), .driver_info = DEVTYPE_EGALAX}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_PANJIT {USB_DEVICE(0x134c, 0x0001), .driver_info = DEVTYPE_PANJIT}, {USB_DEVICE(0x134c, 0x0002), .driver_info = DEVTYPE_PANJIT}, {USB_DEVICE(0x134c, 0x0003), .driver_info = DEVTYPE_PANJIT}, {USB_DEVICE(0x134c, 0x0004), .driver_info = DEVTYPE_PANJIT}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_3M {USB_DEVICE(0x0596, 0x0001), .driver_info = DEVTYPE_3M}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ITM {USB_DEVICE(0x0403, 0xf9e9), .driver_info = DEVTYPE_ITM}, {USB_DEVICE(0x16e3, 0xf9e9), .driver_info = DEVTYPE_ITM}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ETURBO {USB_DEVICE(0x1234, 0x5678), .driver_info = DEVTYPE_ETURBO}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_GUNZE {USB_DEVICE(0x0637, 0x0001), .driver_info = DEVTYPE_GUNZE}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_DMC_TSC10 {USB_DEVICE(0x0afa, 0x03e8), .driver_info = DEVTYPE_DMC_TSC10}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_IRTOUCH {USB_DEVICE(0x255e, 0x0001), .driver_info = DEVTYPE_IRTOUCH}, {USB_DEVICE(0x595a, 0x0001), .driver_info = DEVTYPE_IRTOUCH}, {USB_DEVICE(0x6615, 0x0001), .driver_info = DEVTYPE_IRTOUCH}, {USB_DEVICE(0x6615, 0x0012), .driver_info = DEVTYPE_IRTOUCH_HIRES}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK {USB_DEVICE(0x1391, 0x1000), .driver_info = DEVTYPE_IDEALTEK}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH {USB_DEVICE(0x0dfc, 0x0001), .driver_info = DEVTYPE_GENERAL_TOUCH}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_GOTOP {USB_DEVICE(0x08f2, 0x007f), .driver_info = DEVTYPE_GOTOP}, {USB_DEVICE(0x08f2, 0x00ce), .driver_info = DEVTYPE_GOTOP}, {USB_DEVICE(0x08f2, 0x00f4), .driver_info = DEVTYPE_GOTOP}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_JASTEC {USB_DEVICE(0x0f92, 0x0001), .driver_info = DEVTYPE_JASTEC}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_E2I {USB_DEVICE(0x1ac7, 0x0001), .driver_info = DEVTYPE_E2I}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ZYTRONIC {USB_DEVICE(0x14c8, 0x0003), .driver_info = DEVTYPE_ZYTRONIC}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ETT_TC45USB /* TC5UH */ {USB_DEVICE(0x0664, 0x0309), .driver_info = DEVTYPE_TC45USB}, /* TC4UM */ {USB_DEVICE(0x0664, 0x0306), .driver_info = DEVTYPE_TC45USB}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_NEXIO /* data interface only */ {USB_DEVICE_AND_INTERFACE_INFO(0x10f0, 0x2002, 0x0a, 0x00, 0x00), .driver_info = DEVTYPE_NEXIO}, {USB_DEVICE_AND_INTERFACE_INFO(0x1870, 0x0001, 0x0a, 0x00, 0x00), .driver_info = DEVTYPE_NEXIO}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ELO {USB_DEVICE(0x04e7, 0x0020), .driver_info = DEVTYPE_ELO}, #endif #ifdef CONFIG_TOUCHSCREEN_USB_EASYTOUCH {USB_DEVICE(0x7374, 0x0001), .driver_info = DEVTYPE_ETOUCH}, #endif {} }; /***************************************************************************** * e2i Part */ #ifdef CONFIG_TOUCHSCREEN_USB_E2I static int e2i_init(struct usbtouch_usb *usbtouch) { int ret; struct usb_device *udev = interface_to_usbdev(usbtouch->interface); ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0x01, 0x02, 0x0000, 0x0081, NULL, 0, USB_CTRL_SET_TIMEOUT); dev_dbg(&usbtouch->interface->dev, "%s - usb_control_msg - E2I_RESET - bytes|err: %d\n", __func__, ret); return ret; } static int e2i_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { int tmp = (pkt[0] << 8) | pkt[1]; dev->x = (pkt[2] << 8) | pkt[3]; dev->y = (pkt[4] << 8) | pkt[5]; tmp = tmp - 0xA000; dev->touch = (tmp > 0); dev->press = (tmp > 0 ? tmp : 0); return 1; } #endif /***************************************************************************** * eGalax part */ #ifdef CONFIG_TOUCHSCREEN_USB_EGALAX #ifndef MULTI_PACKET #define MULTI_PACKET #endif #define EGALAX_PKT_TYPE_MASK 0xFE #define EGALAX_PKT_TYPE_REPT 0x80 #define EGALAX_PKT_TYPE_DIAG 0x0A static int egalax_init(struct usbtouch_usb *usbtouch) { int ret, i; unsigned char *buf; struct usb_device *udev = interface_to_usbdev(usbtouch->interface); /* * An eGalax diagnostic packet kicks the device into using the right * protocol. We send a "check active" packet. The response will be * read later and ignored. */ buf = kmalloc(3, GFP_KERNEL); if (!buf) return -ENOMEM; buf[0] = EGALAX_PKT_TYPE_DIAG; buf[1] = 1; /* length */ buf[2] = 'A'; /* command - check active */ for (i = 0; i < 3; i++) { ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), 0, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, buf, 3, USB_CTRL_SET_TIMEOUT); if (ret >= 0) { ret = 0; break; } if (ret != -EPIPE) break; } kfree(buf); return ret; } static int egalax_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { if ((pkt[0] & EGALAX_PKT_TYPE_MASK) != EGALAX_PKT_TYPE_REPT) return 0; dev->x = ((pkt[3] & 0x0F) << 7) | (pkt[4] & 0x7F); dev->y = ((pkt[1] & 0x0F) << 7) | (pkt[2] & 0x7F); dev->touch = pkt[0] & 0x01; return 1; } static int egalax_get_pkt_len(unsigned char *buf, int len) { switch (buf[0] & EGALAX_PKT_TYPE_MASK) { case EGALAX_PKT_TYPE_REPT: return 5; case EGALAX_PKT_TYPE_DIAG: if (len < 2) return -1; return buf[1] + 2; } return 0; } #endif /***************************************************************************** * EasyTouch part */ #ifdef CONFIG_TOUCHSCREEN_USB_EASYTOUCH #ifndef MULTI_PACKET #define MULTI_PACKET #endif #define ETOUCH_PKT_TYPE_MASK 0xFE #define ETOUCH_PKT_TYPE_REPT 0x80 #define ETOUCH_PKT_TYPE_REPT2 0xB0 #define ETOUCH_PKT_TYPE_DIAG 0x0A static int etouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { if ((pkt[0] & ETOUCH_PKT_TYPE_MASK) != ETOUCH_PKT_TYPE_REPT && (pkt[0] & ETOUCH_PKT_TYPE_MASK) != ETOUCH_PKT_TYPE_REPT2) return 0; dev->x = ((pkt[1] & 0x1F) << 7) | (pkt[2] & 0x7F); dev->y = ((pkt[3] & 0x1F) << 7) | (pkt[4] & 0x7F); dev->touch = pkt[0] & 0x01; return 1; } static int etouch_get_pkt_len(unsigned char *buf, int len) { switch (buf[0] & ETOUCH_PKT_TYPE_MASK) { case ETOUCH_PKT_TYPE_REPT: case ETOUCH_PKT_TYPE_REPT2: return 5; case ETOUCH_PKT_TYPE_DIAG: if (len < 2) return -1; return buf[1] + 2; } return 0; } #endif /***************************************************************************** * PanJit Part */ #ifdef CONFIG_TOUCHSCREEN_USB_PANJIT static int panjit_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1]; dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3]; dev->touch = pkt[0] & 0x01; return 1; } #endif /***************************************************************************** * 3M/Microtouch Part */ #ifdef CONFIG_TOUCHSCREEN_USB_3M #define MTOUCHUSB_ASYNC_REPORT 1 #define MTOUCHUSB_RESET 7 #define MTOUCHUSB_REQ_CTRLLR_ID 10 #define MTOUCHUSB_REQ_CTRLLR_ID_LEN 16 static int mtouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { if (hwcalib_xy) { dev->x = (pkt[4] << 8) | pkt[3]; dev->y = 0xffff - ((pkt[6] << 8) | pkt[5]); } else { dev->x = (pkt[8] << 8) | pkt[7]; dev->y = (pkt[10] << 8) | pkt[9]; } dev->touch = (pkt[2] & 0x40) ? 1 : 0; return 1; } struct mtouch_priv { u8 fw_rev_major; u8 fw_rev_minor; }; static ssize_t mtouch_firmware_rev_show(struct device *dev, struct device_attribute *attr, char *output) { struct usb_interface *intf = to_usb_interface(dev); struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); struct mtouch_priv *priv = usbtouch->priv; return sysfs_emit(output, "%1x.%1x\n", priv->fw_rev_major, priv->fw_rev_minor); } static DEVICE_ATTR(firmware_rev, 0444, mtouch_firmware_rev_show, NULL); static struct attribute *mtouch_attrs[] = { &dev_attr_firmware_rev.attr, NULL }; static const struct attribute_group mtouch_attr_group = { .attrs = mtouch_attrs, }; static int mtouch_get_fw_revision(struct usbtouch_usb *usbtouch) { struct usb_device *udev = interface_to_usbdev(usbtouch->interface); struct mtouch_priv *priv = usbtouch->priv; u8 *buf; int ret; buf = kzalloc(MTOUCHUSB_REQ_CTRLLR_ID_LEN, GFP_NOIO); if (!buf) return -ENOMEM; ret = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), MTOUCHUSB_REQ_CTRLLR_ID, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, buf, MTOUCHUSB_REQ_CTRLLR_ID_LEN, USB_CTRL_SET_TIMEOUT); if (ret != MTOUCHUSB_REQ_CTRLLR_ID_LEN) { dev_warn(&usbtouch->interface->dev, "Failed to read FW rev: %d\n", ret); ret = ret < 0 ? ret : -EIO; goto free; } priv->fw_rev_major = buf[3]; priv->fw_rev_minor = buf[4]; ret = 0; free: kfree(buf); return ret; } static int mtouch_alloc(struct usbtouch_usb *usbtouch) { struct mtouch_priv *priv; int ret; priv = kmalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; usbtouch->priv = priv; ret = sysfs_create_group(&usbtouch->interface->dev.kobj, &mtouch_attr_group); if (ret) { kfree(usbtouch->priv); usbtouch->priv = NULL; return ret; } return 0; } static int mtouch_init(struct usbtouch_usb *usbtouch) { int ret, i; struct usb_device *udev = interface_to_usbdev(usbtouch->interface); ret = mtouch_get_fw_revision(usbtouch); if (ret) return ret; ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), MTOUCHUSB_RESET, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 1, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); dev_dbg(&usbtouch->interface->dev, "%s - usb_control_msg - MTOUCHUSB_RESET - bytes|err: %d\n", __func__, ret); if (ret < 0) return ret; msleep(150); for (i = 0; i < 3; i++) { ret = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), MTOUCHUSB_ASYNC_REPORT, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 1, 1, NULL, 0, USB_CTRL_SET_TIMEOUT); dev_dbg(&usbtouch->interface->dev, "%s - usb_control_msg - MTOUCHUSB_ASYNC_REPORT - bytes|err: %d\n", __func__, ret); if (ret >= 0) break; if (ret != -EPIPE) return ret; } /* Default min/max xy are the raw values, override if using hw-calib */ if (hwcalib_xy) { input_set_abs_params(usbtouch->input, ABS_X, 0, 0xffff, 0, 0); input_set_abs_params(usbtouch->input, ABS_Y, 0, 0xffff, 0, 0); } return 0; } static void mtouch_exit(struct usbtouch_usb *usbtouch) { struct mtouch_priv *priv = usbtouch->priv; sysfs_remove_group(&usbtouch->interface->dev.kobj, &mtouch_attr_group); kfree(priv); } #endif /***************************************************************************** * ITM Part */ #ifdef CONFIG_TOUCHSCREEN_USB_ITM static int itm_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { int touch; /* * ITM devices report invalid x/y data if not touched. * if the screen was touched before but is not touched any more * report touch as 0 with the last valid x/y data once. then stop * reporting data until touched again. */ dev->press = ((pkt[2] & 0x01) << 7) | (pkt[5] & 0x7F); touch = ~pkt[7] & 0x20; if (!touch) { if (dev->touch) { dev->touch = 0; return 1; } return 0; } dev->x = ((pkt[0] & 0x1F) << 7) | (pkt[3] & 0x7F); dev->y = ((pkt[1] & 0x1F) << 7) | (pkt[4] & 0x7F); dev->touch = touch; return 1; } #endif /***************************************************************************** * eTurboTouch part */ #ifdef CONFIG_TOUCHSCREEN_USB_ETURBO #ifndef MULTI_PACKET #define MULTI_PACKET #endif static int eturbo_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { unsigned int shift; /* packets should start with sync */ if (!(pkt[0] & 0x80)) return 0; shift = (6 - (pkt[0] & 0x03)); dev->x = ((pkt[3] << 7) | pkt[4]) >> shift; dev->y = ((pkt[1] << 7) | pkt[2]) >> shift; dev->touch = (pkt[0] & 0x10) ? 1 : 0; return 1; } static int eturbo_get_pkt_len(unsigned char *buf, int len) { if (buf[0] & 0x80) return 5; if (buf[0] == 0x01) return 3; return 0; } #endif /***************************************************************************** * Gunze part */ #ifdef CONFIG_TOUCHSCREEN_USB_GUNZE static int gunze_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { if (!(pkt[0] & 0x80) || ((pkt[1] | pkt[2] | pkt[3]) & 0x80)) return 0; dev->x = ((pkt[0] & 0x1F) << 7) | (pkt[2] & 0x7F); dev->y = ((pkt[1] & 0x1F) << 7) | (pkt[3] & 0x7F); dev->touch = pkt[0] & 0x20; return 1; } #endif /***************************************************************************** * DMC TSC-10/25 Part * * Documentation about the controller and it's protocol can be found at * http://www.dmccoltd.com/files/controler/tsc10usb_pi_e.pdf * http://www.dmccoltd.com/files/controler/tsc25_usb_e.pdf */ #ifdef CONFIG_TOUCHSCREEN_USB_DMC_TSC10 /* supported data rates. currently using 130 */ #define TSC10_RATE_POINT 0x50 #define TSC10_RATE_30 0x40 #define TSC10_RATE_50 0x41 #define TSC10_RATE_80 0x42 #define TSC10_RATE_100 0x43 #define TSC10_RATE_130 0x44 #define TSC10_RATE_150 0x45 /* commands */ #define TSC10_CMD_RESET 0x55 #define TSC10_CMD_RATE 0x05 #define TSC10_CMD_DATA1 0x01 static int dmc_tsc10_init(struct usbtouch_usb *usbtouch) { struct usb_device *dev = interface_to_usbdev(usbtouch->interface); int ret = -ENOMEM; unsigned char *buf; buf = kmalloc(2, GFP_NOIO); if (!buf) goto err_nobuf; /* reset */ buf[0] = buf[1] = 0xFF; ret = usb_control_msg(dev, usb_rcvctrlpipe (dev, 0), TSC10_CMD_RESET, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, buf, 2, USB_CTRL_SET_TIMEOUT); if (ret < 0) goto err_out; if (buf[0] != 0x06) { ret = -ENODEV; goto err_out; } /* TSC-25 data sheet specifies a delay after the RESET command */ msleep(150); /* set coordinate output rate */ buf[0] = buf[1] = 0xFF; ret = usb_control_msg(dev, usb_rcvctrlpipe (dev, 0), TSC10_CMD_RATE, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, TSC10_RATE_150, 0, buf, 2, USB_CTRL_SET_TIMEOUT); if (ret < 0) goto err_out; if ((buf[0] != 0x06) && (buf[0] != 0x15 || buf[1] != 0x01)) { ret = -ENODEV; goto err_out; } /* start sending data */ ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0), TSC10_CMD_DATA1, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, 0, 0, NULL, 0, USB_CTRL_SET_TIMEOUT); err_out: kfree(buf); err_nobuf: return ret; } static int dmc_tsc10_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = ((pkt[2] & 0x03) << 8) | pkt[1]; dev->y = ((pkt[4] & 0x03) << 8) | pkt[3]; dev->touch = pkt[0] & 0x01; return 1; } #endif /***************************************************************************** * IRTOUCH Part */ #ifdef CONFIG_TOUCHSCREEN_USB_IRTOUCH static int irtouch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = (pkt[3] << 8) | pkt[2]; dev->y = (pkt[5] << 8) | pkt[4]; dev->touch = (pkt[1] & 0x03) ? 1 : 0; return 1; } #endif /***************************************************************************** * ET&T TC5UH/TC4UM part */ #ifdef CONFIG_TOUCHSCREEN_USB_ETT_TC45USB static int tc45usb_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = ((pkt[2] & 0x0F) << 8) | pkt[1]; dev->y = ((pkt[4] & 0x0F) << 8) | pkt[3]; dev->touch = pkt[0] & 0x01; return 1; } #endif /***************************************************************************** * IdealTEK URTC1000 Part */ #ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK #ifndef MULTI_PACKET #define MULTI_PACKET #endif static int idealtek_get_pkt_len(unsigned char *buf, int len) { if (buf[0] & 0x80) return 5; if (buf[0] == 0x01) return len; return 0; } static int idealtek_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { switch (pkt[0] & 0x98) { case 0x88: /* touch data in IdealTEK mode */ dev->x = (pkt[1] << 5) | (pkt[2] >> 2); dev->y = (pkt[3] << 5) | (pkt[4] >> 2); dev->touch = (pkt[0] & 0x40) ? 1 : 0; return 1; case 0x98: /* touch data in MT emulation mode */ dev->x = (pkt[2] << 5) | (pkt[1] >> 2); dev->y = (pkt[4] << 5) | (pkt[3] >> 2); dev->touch = (pkt[0] & 0x40) ? 1 : 0; return 1; default: return 0; } } #endif /***************************************************************************** * General Touch Part */ #ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH static int general_touch_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = (pkt[2] << 8) | pkt[1]; dev->y = (pkt[4] << 8) | pkt[3]; dev->press = pkt[5] & 0xff; dev->touch = pkt[0] & 0x01; return 1; } #endif /***************************************************************************** * GoTop Part */ #ifdef CONFIG_TOUCHSCREEN_USB_GOTOP static int gotop_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = ((pkt[1] & 0x38) << 4) | pkt[2]; dev->y = ((pkt[1] & 0x07) << 7) | pkt[3]; dev->touch = pkt[0] & 0x01; return 1; } #endif /***************************************************************************** * JASTEC Part */ #ifdef CONFIG_TOUCHSCREEN_USB_JASTEC static int jastec_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = ((pkt[0] & 0x3f) << 6) | (pkt[2] & 0x3f); dev->y = ((pkt[1] & 0x3f) << 6) | (pkt[3] & 0x3f); dev->touch = (pkt[0] & 0x40) >> 6; return 1; } #endif /***************************************************************************** * Zytronic Part */ #ifdef CONFIG_TOUCHSCREEN_USB_ZYTRONIC static int zytronic_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { struct usb_interface *intf = dev->interface; switch (pkt[0]) { case 0x3A: /* command response */ dev_dbg(&intf->dev, "%s: Command response %d\n", __func__, pkt[1]); break; case 0xC0: /* down */ dev->x = (pkt[1] & 0x7f) | ((pkt[2] & 0x07) << 7); dev->y = (pkt[3] & 0x7f) | ((pkt[4] & 0x07) << 7); dev->touch = 1; dev_dbg(&intf->dev, "%s: down %d,%d\n", __func__, dev->x, dev->y); return 1; case 0x80: /* up */ dev->x = (pkt[1] & 0x7f) | ((pkt[2] & 0x07) << 7); dev->y = (pkt[3] & 0x7f) | ((pkt[4] & 0x07) << 7); dev->touch = 0; dev_dbg(&intf->dev, "%s: up %d,%d\n", __func__, dev->x, dev->y); return 1; default: dev_dbg(&intf->dev, "%s: Unknown return %d\n", __func__, pkt[0]); break; } return 0; } #endif /***************************************************************************** * NEXIO Part */ #ifdef CONFIG_TOUCHSCREEN_USB_NEXIO #define NEXIO_TIMEOUT 5000 #define NEXIO_BUFSIZE 1024 #define NEXIO_THRESHOLD 50 struct nexio_priv { struct urb *ack; unsigned char *ack_buf; }; struct nexio_touch_packet { u8 flags; /* 0xe1 = touch, 0xe1 = release */ __be16 data_len; /* total bytes of touch data */ __be16 x_len; /* bytes for X axis */ __be16 y_len; /* bytes for Y axis */ u8 data[]; } __attribute__ ((packed)); static unsigned char nexio_ack_pkt[2] = { 0xaa, 0x02 }; static unsigned char nexio_init_pkt[4] = { 0x82, 0x04, 0x0a, 0x0f }; static void nexio_ack_complete(struct urb *urb) { } static int nexio_alloc(struct usbtouch_usb *usbtouch) { struct nexio_priv *priv; int ret = -ENOMEM; priv = kmalloc(sizeof(*priv), GFP_KERNEL); if (!priv) goto out_buf; usbtouch->priv = priv; priv->ack_buf = kmemdup(nexio_ack_pkt, sizeof(nexio_ack_pkt), GFP_KERNEL); if (!priv->ack_buf) goto err_priv; priv->ack = usb_alloc_urb(0, GFP_KERNEL); if (!priv->ack) { dev_dbg(&usbtouch->interface->dev, "%s - usb_alloc_urb failed: usbtouch->ack\n", __func__); goto err_ack_buf; } return 0; err_ack_buf: kfree(priv->ack_buf); err_priv: kfree(priv); out_buf: return ret; } static int nexio_init(struct usbtouch_usb *usbtouch) { struct usb_device *dev = interface_to_usbdev(usbtouch->interface); struct usb_host_interface *interface = usbtouch->interface->cur_altsetting; struct nexio_priv *priv = usbtouch->priv; int ret = -ENOMEM; int actual_len, i; unsigned char *buf; char *firmware_ver = NULL, *device_name = NULL; int input_ep = 0, output_ep = 0; /* find first input and output endpoint */ for (i = 0; i < interface->desc.bNumEndpoints; i++) { if (!input_ep && usb_endpoint_dir_in(&interface->endpoint[i].desc)) input_ep = interface->endpoint[i].desc.bEndpointAddress; if (!output_ep && usb_endpoint_dir_out(&interface->endpoint[i].desc)) output_ep = interface->endpoint[i].desc.bEndpointAddress; } if (!input_ep || !output_ep) return -ENXIO; buf = kmalloc(NEXIO_BUFSIZE, GFP_NOIO); if (!buf) goto out_buf; /* two empty reads */ for (i = 0; i < 2; i++) { ret = usb_bulk_msg(dev, usb_rcvbulkpipe(dev, input_ep), buf, NEXIO_BUFSIZE, &actual_len, NEXIO_TIMEOUT); if (ret < 0) goto out_buf; } /* send init command */ memcpy(buf, nexio_init_pkt, sizeof(nexio_init_pkt)); ret = usb_bulk_msg(dev, usb_sndbulkpipe(dev, output_ep), buf, sizeof(nexio_init_pkt), &actual_len, NEXIO_TIMEOUT); if (ret < 0) goto out_buf; /* read replies */ for (i = 0; i < 3; i++) { memset(buf, 0, NEXIO_BUFSIZE); ret = usb_bulk_msg(dev, usb_rcvbulkpipe(dev, input_ep), buf, NEXIO_BUFSIZE, &actual_len, NEXIO_TIMEOUT); if (ret < 0 || actual_len < 1 || buf[1] != actual_len) continue; switch (buf[0]) { case 0x83: /* firmware version */ if (!firmware_ver) firmware_ver = kstrdup(&buf[2], GFP_NOIO); break; case 0x84: /* device name */ if (!device_name) device_name = kstrdup(&buf[2], GFP_NOIO); break; } } printk(KERN_INFO "Nexio device: %s, firmware version: %s\n", device_name, firmware_ver); kfree(firmware_ver); kfree(device_name); usb_fill_bulk_urb(priv->ack, dev, usb_sndbulkpipe(dev, output_ep), priv->ack_buf, sizeof(nexio_ack_pkt), nexio_ack_complete, usbtouch); ret = 0; out_buf: kfree(buf); return ret; } static void nexio_exit(struct usbtouch_usb *usbtouch) { struct nexio_priv *priv = usbtouch->priv; usb_kill_urb(priv->ack); usb_free_urb(priv->ack); kfree(priv->ack_buf); kfree(priv); } static int nexio_read_data(struct usbtouch_usb *usbtouch, unsigned char *pkt) { struct device *dev = &usbtouch->interface->dev; struct nexio_touch_packet *packet = (void *) pkt; struct nexio_priv *priv = usbtouch->priv; unsigned int data_len = be16_to_cpu(packet->data_len); unsigned int x_len = be16_to_cpu(packet->x_len); unsigned int y_len = be16_to_cpu(packet->y_len); int x, y, begin_x, begin_y, end_x, end_y, w, h, ret; /* got touch data? */ if ((pkt[0] & 0xe0) != 0xe0) return 0; if (data_len > 0xff) data_len -= 0x100; if (x_len > 0xff) x_len -= 0x80; /* send ACK */ ret = usb_submit_urb(priv->ack, GFP_ATOMIC); if (ret) dev_warn(dev, "Failed to submit ACK URB: %d\n", ret); if (!usbtouch->type->max_xc) { usbtouch->type->max_xc = 2 * x_len; input_set_abs_params(usbtouch->input, ABS_X, 0, usbtouch->type->max_xc, 0, 0); usbtouch->type->max_yc = 2 * y_len; input_set_abs_params(usbtouch->input, ABS_Y, 0, usbtouch->type->max_yc, 0, 0); } /* * The device reports state of IR sensors on X and Y axes. * Each byte represents "darkness" percentage (0-100) of one element. * 17" touchscreen reports only 64 x 52 bytes so the resolution is low. * This also means that there's a limited multi-touch capability but * it's disabled (and untested) here as there's no X driver for that. */ begin_x = end_x = begin_y = end_y = -1; for (x = 0; x < x_len; x++) { if (begin_x == -1 && packet->data[x] > NEXIO_THRESHOLD) { begin_x = x; continue; } if (end_x == -1 && begin_x != -1 && packet->data[x] < NEXIO_THRESHOLD) { end_x = x - 1; for (y = x_len; y < data_len; y++) { if (begin_y == -1 && packet->data[y] > NEXIO_THRESHOLD) { begin_y = y - x_len; continue; } if (end_y == -1 && begin_y != -1 && packet->data[y] < NEXIO_THRESHOLD) { end_y = y - 1 - x_len; w = end_x - begin_x; h = end_y - begin_y; #if 0 /* multi-touch */ input_report_abs(usbtouch->input, ABS_MT_TOUCH_MAJOR, max(w,h)); input_report_abs(usbtouch->input, ABS_MT_TOUCH_MINOR, min(x,h)); input_report_abs(usbtouch->input, ABS_MT_POSITION_X, 2*begin_x+w); input_report_abs(usbtouch->input, ABS_MT_POSITION_Y, 2*begin_y+h); input_report_abs(usbtouch->input, ABS_MT_ORIENTATION, w > h); input_mt_sync(usbtouch->input); #endif /* single touch */ usbtouch->x = 2 * begin_x + w; usbtouch->y = 2 * begin_y + h; usbtouch->touch = packet->flags & 0x01; begin_y = end_y = -1; return 1; } } begin_x = end_x = -1; } } return 0; } #endif /***************************************************************************** * ELO part */ #ifdef CONFIG_TOUCHSCREEN_USB_ELO static int elo_read_data(struct usbtouch_usb *dev, unsigned char *pkt) { dev->x = (pkt[3] << 8) | pkt[2]; dev->y = (pkt[5] << 8) | pkt[4]; dev->touch = pkt[6] > 0; dev->press = pkt[6]; return 1; } #endif /***************************************************************************** * the different device descriptors */ #ifdef MULTI_PACKET static void usbtouch_process_multi(struct usbtouch_usb *usbtouch, unsigned char *pkt, int len); #endif static struct usbtouch_device_info usbtouch_dev_info[] = { #ifdef CONFIG_TOUCHSCREEN_USB_ELO [DEVTYPE_ELO] = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .max_press = 0xff, .rept_size = 8, .read_data = elo_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_EGALAX [DEVTYPE_EGALAX] = { .min_xc = 0x0, .max_xc = 0x07ff, .min_yc = 0x0, .max_yc = 0x07ff, .rept_size = 16, .process_pkt = usbtouch_process_multi, .get_pkt_len = egalax_get_pkt_len, .read_data = egalax_read_data, .init = egalax_init, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_PANJIT [DEVTYPE_PANJIT] = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 8, .read_data = panjit_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_3M [DEVTYPE_3M] = { .min_xc = 0x0, .max_xc = 0x4000, .min_yc = 0x0, .max_yc = 0x4000, .rept_size = 11, .read_data = mtouch_read_data, .alloc = mtouch_alloc, .init = mtouch_init, .exit = mtouch_exit, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ITM [DEVTYPE_ITM] = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .max_press = 0xff, .rept_size = 8, .read_data = itm_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ETURBO [DEVTYPE_ETURBO] = { .min_xc = 0x0, .max_xc = 0x07ff, .min_yc = 0x0, .max_yc = 0x07ff, .rept_size = 8, .process_pkt = usbtouch_process_multi, .get_pkt_len = eturbo_get_pkt_len, .read_data = eturbo_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_GUNZE [DEVTYPE_GUNZE] = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 4, .read_data = gunze_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_DMC_TSC10 [DEVTYPE_DMC_TSC10] = { .min_xc = 0x0, .max_xc = 0x03ff, .min_yc = 0x0, .max_yc = 0x03ff, .rept_size = 5, .init = dmc_tsc10_init, .read_data = dmc_tsc10_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_IRTOUCH [DEVTYPE_IRTOUCH] = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 8, .read_data = irtouch_read_data, }, [DEVTYPE_IRTOUCH_HIRES] = { .min_xc = 0x0, .max_xc = 0x7fff, .min_yc = 0x0, .max_yc = 0x7fff, .rept_size = 8, .read_data = irtouch_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_IDEALTEK [DEVTYPE_IDEALTEK] = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 8, .process_pkt = usbtouch_process_multi, .get_pkt_len = idealtek_get_pkt_len, .read_data = idealtek_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_GENERAL_TOUCH [DEVTYPE_GENERAL_TOUCH] = { .min_xc = 0x0, .max_xc = 0x7fff, .min_yc = 0x0, .max_yc = 0x7fff, .rept_size = 7, .read_data = general_touch_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_GOTOP [DEVTYPE_GOTOP] = { .min_xc = 0x0, .max_xc = 0x03ff, .min_yc = 0x0, .max_yc = 0x03ff, .rept_size = 4, .read_data = gotop_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_JASTEC [DEVTYPE_JASTEC] = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 4, .read_data = jastec_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_E2I [DEVTYPE_E2I] = { .min_xc = 0x0, .max_xc = 0x7fff, .min_yc = 0x0, .max_yc = 0x7fff, .rept_size = 6, .init = e2i_init, .read_data = e2i_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ZYTRONIC [DEVTYPE_ZYTRONIC] = { .min_xc = 0x0, .max_xc = 0x03ff, .min_yc = 0x0, .max_yc = 0x03ff, .rept_size = 5, .read_data = zytronic_read_data, .irq_always = true, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_ETT_TC45USB [DEVTYPE_TC45USB] = { .min_xc = 0x0, .max_xc = 0x0fff, .min_yc = 0x0, .max_yc = 0x0fff, .rept_size = 5, .read_data = tc45usb_read_data, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_NEXIO [DEVTYPE_NEXIO] = { .rept_size = 1024, .irq_always = true, .read_data = nexio_read_data, .alloc = nexio_alloc, .init = nexio_init, .exit = nexio_exit, }, #endif #ifdef CONFIG_TOUCHSCREEN_USB_EASYTOUCH [DEVTYPE_ETOUCH] = { .min_xc = 0x0, .max_xc = 0x07ff, .min_yc = 0x0, .max_yc = 0x07ff, .rept_size = 16, .process_pkt = usbtouch_process_multi, .get_pkt_len = etouch_get_pkt_len, .read_data = etouch_read_data, }, #endif }; /***************************************************************************** * Generic Part */ static void usbtouch_process_pkt(struct usbtouch_usb *usbtouch, unsigned char *pkt, int len) { struct usbtouch_device_info *type = usbtouch->type; if (!type->read_data(usbtouch, pkt)) return; input_report_key(usbtouch->input, BTN_TOUCH, usbtouch->touch); if (swap_xy) { input_report_abs(usbtouch->input, ABS_X, usbtouch->y); input_report_abs(usbtouch->input, ABS_Y, usbtouch->x); } else { input_report_abs(usbtouch->input, ABS_X, usbtouch->x); input_report_abs(usbtouch->input, ABS_Y, usbtouch->y); } if (type->max_press) input_report_abs(usbtouch->input, ABS_PRESSURE, usbtouch->press); input_sync(usbtouch->input); } #ifdef MULTI_PACKET static void usbtouch_process_multi(struct usbtouch_usb *usbtouch, unsigned char *pkt, int len) { unsigned char *buffer; int pkt_len, pos, buf_len, tmp; /* process buffer */ if (unlikely(usbtouch->buf_len)) { /* try to get size */ pkt_len = usbtouch->type->get_pkt_len( usbtouch->buffer, usbtouch->buf_len); /* drop? */ if (unlikely(!pkt_len)) goto out_flush_buf; /* need to append -pkt_len bytes before able to get size */ if (unlikely(pkt_len < 0)) { int append = -pkt_len; if (unlikely(append > len)) append = len; if (usbtouch->buf_len + append >= usbtouch->type->rept_size) goto out_flush_buf; memcpy(usbtouch->buffer + usbtouch->buf_len, pkt, append); usbtouch->buf_len += append; pkt_len = usbtouch->type->get_pkt_len( usbtouch->buffer, usbtouch->buf_len); if (pkt_len < 0) return; } /* append */ tmp = pkt_len - usbtouch->buf_len; if (usbtouch->buf_len + tmp >= usbtouch->type->rept_size) goto out_flush_buf; memcpy(usbtouch->buffer + usbtouch->buf_len, pkt, tmp); usbtouch_process_pkt(usbtouch, usbtouch->buffer, pkt_len); buffer = pkt + tmp; buf_len = len - tmp; } else { buffer = pkt; buf_len = len; } /* loop over the received packet, process */ pos = 0; while (pos < buf_len) { /* get packet len */ pkt_len = usbtouch->type->get_pkt_len(buffer + pos, buf_len - pos); /* unknown packet: skip one byte */ if (unlikely(!pkt_len)) { pos++; continue; } /* full packet: process */ if (likely((pkt_len > 0) && (pkt_len <= buf_len - pos))) { usbtouch_process_pkt(usbtouch, buffer + pos, pkt_len); } else { /* incomplete packet: save in buffer */ memcpy(usbtouch->buffer, buffer + pos, buf_len - pos); usbtouch->buf_len = buf_len - pos; return; } pos += pkt_len; } out_flush_buf: usbtouch->buf_len = 0; return; } #endif static void usbtouch_irq(struct urb *urb) { struct usbtouch_usb *usbtouch = urb->context; struct device *dev = &usbtouch->interface->dev; int retval; switch (urb->status) { case 0: /* success */ break; case -ETIME: /* this urb is timing out */ dev_dbg(dev, "%s - urb timed out - was the device unplugged?\n", __func__); return; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: case -EPIPE: /* this urb is terminated, clean up */ dev_dbg(dev, "%s - urb shutting down with status: %d\n", __func__, urb->status); return; default: dev_dbg(dev, "%s - nonzero urb status received: %d\n", __func__, urb->status); goto exit; } usbtouch->type->process_pkt(usbtouch, usbtouch->data, urb->actual_length); exit: usb_mark_last_busy(interface_to_usbdev(usbtouch->interface)); retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(dev, "%s - usb_submit_urb failed with result: %d\n", __func__, retval); } static int usbtouch_open(struct input_dev *input) { struct usbtouch_usb *usbtouch = input_get_drvdata(input); int r; usbtouch->irq->dev = interface_to_usbdev(usbtouch->interface); r = usb_autopm_get_interface(usbtouch->interface) ? -EIO : 0; if (r < 0) goto out; mutex_lock(&usbtouch->pm_mutex); if (!usbtouch->type->irq_always) { if (usb_submit_urb(usbtouch->irq, GFP_KERNEL)) { r = -EIO; goto out_put; } } usbtouch->interface->needs_remote_wakeup = 1; usbtouch->is_open = true; out_put: mutex_unlock(&usbtouch->pm_mutex); usb_autopm_put_interface(usbtouch->interface); out: return r; } static void usbtouch_close(struct input_dev *input) { struct usbtouch_usb *usbtouch = input_get_drvdata(input); int r; mutex_lock(&usbtouch->pm_mutex); if (!usbtouch->type->irq_always) usb_kill_urb(usbtouch->irq); usbtouch->is_open = false; mutex_unlock(&usbtouch->pm_mutex); r = usb_autopm_get_interface(usbtouch->interface); usbtouch->interface->needs_remote_wakeup = 0; if (!r) usb_autopm_put_interface(usbtouch->interface); } static int usbtouch_suspend (struct usb_interface *intf, pm_message_t message) { struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); usb_kill_urb(usbtouch->irq); return 0; } static int usbtouch_resume(struct usb_interface *intf) { struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); int result = 0; mutex_lock(&usbtouch->pm_mutex); if (usbtouch->is_open || usbtouch->type->irq_always) result = usb_submit_urb(usbtouch->irq, GFP_NOIO); mutex_unlock(&usbtouch->pm_mutex); return result; } static int usbtouch_reset_resume(struct usb_interface *intf) { struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); int err = 0; /* reinit the device */ if (usbtouch->type->init) { err = usbtouch->type->init(usbtouch); if (err) { dev_dbg(&intf->dev, "%s - type->init() failed, err: %d\n", __func__, err); return err; } } /* restart IO if needed */ mutex_lock(&usbtouch->pm_mutex); if (usbtouch->is_open) err = usb_submit_urb(usbtouch->irq, GFP_NOIO); mutex_unlock(&usbtouch->pm_mutex); return err; } static void usbtouch_free_buffers(struct usb_device *udev, struct usbtouch_usb *usbtouch) { usb_free_coherent(udev, usbtouch->data_size, usbtouch->data, usbtouch->data_dma); kfree(usbtouch->buffer); } static struct usb_endpoint_descriptor * usbtouch_get_input_endpoint(struct usb_host_interface *interface) { int i; for (i = 0; i < interface->desc.bNumEndpoints; i++) if (usb_endpoint_dir_in(&interface->endpoint[i].desc)) return &interface->endpoint[i].desc; return NULL; } static int usbtouch_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usbtouch_usb *usbtouch; struct input_dev *input_dev; struct usb_endpoint_descriptor *endpoint; struct usb_device *udev = interface_to_usbdev(intf); struct usbtouch_device_info *type; int err = -ENOMEM; /* some devices are ignored */ if (id->driver_info == DEVTYPE_IGNORE) return -ENODEV; if (id->driver_info >= ARRAY_SIZE(usbtouch_dev_info)) return -ENODEV; endpoint = usbtouch_get_input_endpoint(intf->cur_altsetting); if (!endpoint) return -ENXIO; usbtouch = kzalloc(sizeof(*usbtouch), GFP_KERNEL); input_dev = input_allocate_device(); if (!usbtouch || !input_dev) goto out_free; mutex_init(&usbtouch->pm_mutex); type = &usbtouch_dev_info[id->driver_info]; usbtouch->type = type; if (!type->process_pkt) type->process_pkt = usbtouch_process_pkt; usbtouch->data_size = type->rept_size; if (type->get_pkt_len) { /* * When dealing with variable-length packets we should * not request more than wMaxPacketSize bytes at once * as we do not know if there is more data coming or * we filled exactly wMaxPacketSize bytes and there is * nothing else. */ usbtouch->data_size = min(usbtouch->data_size, usb_endpoint_maxp(endpoint)); } usbtouch->data = usb_alloc_coherent(udev, usbtouch->data_size, GFP_KERNEL, &usbtouch->data_dma); if (!usbtouch->data) goto out_free; if (type->get_pkt_len) { usbtouch->buffer = kmalloc(type->rept_size, GFP_KERNEL); if (!usbtouch->buffer) goto out_free_buffers; } usbtouch->irq = usb_alloc_urb(0, GFP_KERNEL); if (!usbtouch->irq) { dev_dbg(&intf->dev, "%s - usb_alloc_urb failed: usbtouch->irq\n", __func__); goto out_free_buffers; } usbtouch->interface = intf; usbtouch->input = input_dev; if (udev->manufacturer) strscpy(usbtouch->name, udev->manufacturer, sizeof(usbtouch->name)); if (udev->product) { if (udev->manufacturer) strlcat(usbtouch->name, " ", sizeof(usbtouch->name)); strlcat(usbtouch->name, udev->product, sizeof(usbtouch->name)); } if (!strlen(usbtouch->name)) snprintf(usbtouch->name, sizeof(usbtouch->name), "USB Touchscreen %04x:%04x", le16_to_cpu(udev->descriptor.idVendor), le16_to_cpu(udev->descriptor.idProduct)); usb_make_path(udev, usbtouch->phys, sizeof(usbtouch->phys)); strlcat(usbtouch->phys, "/input0", sizeof(usbtouch->phys)); input_dev->name = usbtouch->name; input_dev->phys = usbtouch->phys; usb_to_input_id(udev, &input_dev->id); input_dev->dev.parent = &intf->dev; input_set_drvdata(input_dev, usbtouch); input_dev->open = usbtouch_open; input_dev->close = usbtouch_close; input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS); input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH); input_set_abs_params(input_dev, ABS_X, type->min_xc, type->max_xc, 0, 0); input_set_abs_params(input_dev, ABS_Y, type->min_yc, type->max_yc, 0, 0); if (type->max_press) input_set_abs_params(input_dev, ABS_PRESSURE, type->min_press, type->max_press, 0, 0); if (usb_endpoint_type(endpoint) == USB_ENDPOINT_XFER_INT) usb_fill_int_urb(usbtouch->irq, udev, usb_rcvintpipe(udev, endpoint->bEndpointAddress), usbtouch->data, usbtouch->data_size, usbtouch_irq, usbtouch, endpoint->bInterval); else usb_fill_bulk_urb(usbtouch->irq, udev, usb_rcvbulkpipe(udev, endpoint->bEndpointAddress), usbtouch->data, usbtouch->data_size, usbtouch_irq, usbtouch); usbtouch->irq->dev = udev; usbtouch->irq->transfer_dma = usbtouch->data_dma; usbtouch->irq->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; /* device specific allocations */ if (type->alloc) { err = type->alloc(usbtouch); if (err) { dev_dbg(&intf->dev, "%s - type->alloc() failed, err: %d\n", __func__, err); goto out_free_urb; } } /* device specific initialisation*/ if (type->init) { err = type->init(usbtouch); if (err) { dev_dbg(&intf->dev, "%s - type->init() failed, err: %d\n", __func__, err); goto out_do_exit; } } err = input_register_device(usbtouch->input); if (err) { dev_dbg(&intf->dev, "%s - input_register_device failed, err: %d\n", __func__, err); goto out_do_exit; } usb_set_intfdata(intf, usbtouch); if (usbtouch->type->irq_always) { /* this can't fail */ usb_autopm_get_interface(intf); err = usb_submit_urb(usbtouch->irq, GFP_KERNEL); if (err) { usb_autopm_put_interface(intf); dev_err(&intf->dev, "%s - usb_submit_urb failed with result: %d\n", __func__, err); goto out_unregister_input; } } return 0; out_unregister_input: input_unregister_device(input_dev); input_dev = NULL; out_do_exit: if (type->exit) type->exit(usbtouch); out_free_urb: usb_free_urb(usbtouch->irq); out_free_buffers: usbtouch_free_buffers(udev, usbtouch); out_free: input_free_device(input_dev); kfree(usbtouch); return err; } static void usbtouch_disconnect(struct usb_interface *intf) { struct usbtouch_usb *usbtouch = usb_get_intfdata(intf); if (!usbtouch) return; dev_dbg(&intf->dev, "%s - usbtouch is initialized, cleaning up\n", __func__); usb_set_intfdata(intf, NULL); /* this will stop IO via close */ input_unregister_device(usbtouch->input); usb_free_urb(usbtouch->irq); if (usbtouch->type->exit) usbtouch->type->exit(usbtouch); usbtouch_free_buffers(interface_to_usbdev(intf), usbtouch); kfree(usbtouch); } MODULE_DEVICE_TABLE(usb, usbtouch_devices); static struct usb_driver usbtouch_driver = { .name = "usbtouchscreen", .probe = usbtouch_probe, .disconnect = usbtouch_disconnect, .suspend = usbtouch_suspend, .resume = usbtouch_resume, .reset_resume = usbtouch_reset_resume, .id_table = usbtouch_devices, .supports_autosuspend = 1, }; module_usb_driver(usbtouch_driver); MODULE_AUTHOR("Daniel Ritz <daniel.ritz@gmx.ch>"); MODULE_DESCRIPTION("USB Touchscreen Driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("touchkitusb"); MODULE_ALIAS("itmtouch"); MODULE_ALIAS("mtouchusb");
73 73 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 /* SPDX-License-Identifier: GPL-2.0 */ /* Rewritten and vastly simplified by Rusty Russell for in-kernel * module loader: * Copyright 2002 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation */ #ifndef _LINUX_KALLSYMS_H #define _LINUX_KALLSYMS_H #include <linux/errno.h> #include <linux/buildid.h> #include <linux/kernel.h> #include <linux/stddef.h> #include <linux/mm.h> #include <linux/module.h> #include <asm/sections.h> #define KSYM_NAME_LEN 512 #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s %s]") + \ (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + \ (BUILD_ID_SIZE_MAX * 2) + 1) struct cred; struct module; static inline int is_kernel_text(unsigned long addr) { if (__is_kernel_text(addr)) return 1; return in_gate_area_no_mm(addr); } static inline int is_kernel(unsigned long addr) { if (__is_kernel(addr)) return 1; return in_gate_area_no_mm(addr); } static inline int is_ksym_addr(unsigned long addr) { if (IS_ENABLED(CONFIG_KALLSYMS_ALL)) return is_kernel(addr); return is_kernel_text(addr) || is_kernel_inittext(addr); } static inline void *dereference_symbol_descriptor(void *ptr) { #ifdef CONFIG_HAVE_FUNCTION_DESCRIPTORS struct module *mod; ptr = dereference_kernel_function_descriptor(ptr); if (is_ksym_addr((unsigned long)ptr)) return ptr; preempt_disable(); mod = __module_address((unsigned long)ptr); preempt_enable(); if (mod) ptr = dereference_module_function_descriptor(mod, ptr); #endif return ptr; } /* How and when do we show kallsyms values? */ extern bool kallsyms_show_value(const struct cred *cred); #ifdef CONFIG_KALLSYMS unsigned long kallsyms_sym_address(int idx); int kallsyms_on_each_symbol(int (*fn)(void *, const char *, unsigned long), void *data); int kallsyms_on_each_match_symbol(int (*fn)(void *, unsigned long), const char *name, void *data); /* Lookup the address for a symbol. Returns 0 if not found. */ unsigned long kallsyms_lookup_name(const char *name); extern int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset); /* Lookup an address. modname is set to NULL if it's in the kernel. */ const char *kallsyms_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf); /* Look up a kernel symbol and return it in a text buffer. */ extern int sprint_symbol(char *buffer, unsigned long address); extern int sprint_symbol_build_id(char *buffer, unsigned long address); extern int sprint_symbol_no_offset(char *buffer, unsigned long address); extern int sprint_backtrace(char *buffer, unsigned long address); extern int sprint_backtrace_build_id(char *buffer, unsigned long address); int lookup_symbol_name(unsigned long addr, char *symname); #else /* !CONFIG_KALLSYMS */ static inline unsigned long kallsyms_lookup_name(const char *name) { return 0; } static inline int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) { return 0; } static inline const char *kallsyms_lookup(unsigned long addr, unsigned long *symbolsize, unsigned long *offset, char **modname, char *namebuf) { return NULL; } static inline int sprint_symbol(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int sprint_symbol_build_id(char *buffer, unsigned long address) { *buffer = '\0'; return 0; } static inline int sprint_symbol_no_offset(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int sprint_backtrace(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int sprint_backtrace_build_id(char *buffer, unsigned long addr) { *buffer = '\0'; return 0; } static inline int lookup_symbol_name(unsigned long addr, char *symname) { return -ERANGE; } static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *, unsigned long), void *data) { return -EOPNOTSUPP; } static inline int kallsyms_on_each_match_symbol(int (*fn)(void *, unsigned long), const char *name, void *data) { return -EOPNOTSUPP; } #endif /*CONFIG_KALLSYMS*/ static inline void print_ip_sym(const char *loglvl, unsigned long ip) { printk("%s[<%px>] %pS\n", loglvl, (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/
145 83 13 2 11 6 3 4 13 5 8 2 1 511 271 495 2 5 504 4 4 14 515 5 497 26 342 498 496 451 182 355 224 342 342 92 92 92 3 197 197 197 3 195 1 25 1 1 496 496 1 498 498 498 498 15 498 41 351 195 233 350 355 236 197 166 166 441 2 159 4 64 23 64 3 61 85 522 522 9 499 12 86 449 3 452 167 146 92 92 92 92 59 1 148 148 32 1 121 40 40 4 9 1 27 11 9 25 25 5 4 23 2 1 1 81 34 52 10 9 8 8 8 7 6 7 7 7 10 5 10 10 7 5 75 14 24 55 53 7 16 34 25 25 25 25 20 5 25 25 9 18 3 1 2 4 1 1 2 5 4 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 // SPDX-License-Identifier: GPL-2.0-only /* * 9P Client * * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2007 by Latchesar Ionkov <lucho@ionkov.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/poll.h> #include <linux/idr.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/sched/signal.h> #include <linux/uaccess.h> #include <linux/uio.h> #include <linux/netfs.h> #include <net/9p/9p.h> #include <linux/parser.h> #include <linux/seq_file.h> #include <net/9p/client.h> #include <net/9p/transport.h> #include "protocol.h" #define CREATE_TRACE_POINTS #include <trace/events/9p.h> /* DEFAULT MSIZE = 32 pages worth of payload + P9_HDRSZ + * room for write (16 extra) or read (11 extra) operands. */ #define DEFAULT_MSIZE ((128 * 1024) + P9_IOHDRSZ) /* Client Option Parsing (code inspired by NFS code) * - a little lazy - parse all client options */ enum { Opt_msize, Opt_trans, Opt_legacy, Opt_version, Opt_err, }; static const match_table_t tokens = { {Opt_msize, "msize=%u"}, {Opt_legacy, "noextend"}, {Opt_trans, "trans=%s"}, {Opt_version, "version=%s"}, {Opt_err, NULL}, }; inline int p9_is_proto_dotl(struct p9_client *clnt) { return clnt->proto_version == p9_proto_2000L; } EXPORT_SYMBOL(p9_is_proto_dotl); inline int p9_is_proto_dotu(struct p9_client *clnt) { return clnt->proto_version == p9_proto_2000u; } EXPORT_SYMBOL(p9_is_proto_dotu); int p9_show_client_options(struct seq_file *m, struct p9_client *clnt) { if (clnt->msize != DEFAULT_MSIZE) seq_printf(m, ",msize=%u", clnt->msize); seq_printf(m, ",trans=%s", clnt->trans_mod->name); switch (clnt->proto_version) { case p9_proto_legacy: seq_puts(m, ",noextend"); break; case p9_proto_2000u: seq_puts(m, ",version=9p2000.u"); break; case p9_proto_2000L: /* Default */ break; } if (clnt->trans_mod->show_options) return clnt->trans_mod->show_options(m, clnt); return 0; } EXPORT_SYMBOL(p9_show_client_options); /* Some error codes are taken directly from the server replies, * make sure they are valid. */ static int safe_errno(int err) { if (err > 0 || err < -MAX_ERRNO) { p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err); return -EPROTO; } return err; } /* Interpret mount option for protocol version */ static int get_protocol_version(char *s) { int version = -EINVAL; if (!strcmp(s, "9p2000")) { version = p9_proto_legacy; p9_debug(P9_DEBUG_9P, "Protocol version: Legacy\n"); } else if (!strcmp(s, "9p2000.u")) { version = p9_proto_2000u; p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); } else if (!strcmp(s, "9p2000.L")) { version = p9_proto_2000L; p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); } else { pr_info("Unknown protocol version %s\n", s); } return version; } /** * parse_opts - parse mount options into client structure * @opts: options string passed from mount * @clnt: existing v9fs client information * * Return 0 upon success, -ERRNO upon failure */ static int parse_opts(char *opts, struct p9_client *clnt) { char *options, *tmp_options; char *p; substring_t args[MAX_OPT_ARGS]; int option; char *s; int ret = 0; clnt->proto_version = p9_proto_2000L; clnt->msize = DEFAULT_MSIZE; if (!opts) return 0; tmp_options = kstrdup(opts, GFP_KERNEL); if (!tmp_options) return -ENOMEM; options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token, r; if (!*p) continue; token = match_token(p, tokens, args); switch (token) { case Opt_msize: r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); ret = r; continue; } if (option < 4096) { p9_debug(P9_DEBUG_ERROR, "msize should be at least 4k\n"); ret = -EINVAL; continue; } clnt->msize = option; break; case Opt_trans: s = match_strdup(&args[0]); if (!s) { ret = -ENOMEM; p9_debug(P9_DEBUG_ERROR, "problem allocating copy of trans arg\n"); goto free_and_return; } v9fs_put_trans(clnt->trans_mod); clnt->trans_mod = v9fs_get_trans_by_name(s); if (!clnt->trans_mod) { pr_info("Could not find request transport: %s\n", s); ret = -EINVAL; } kfree(s); break; case Opt_legacy: clnt->proto_version = p9_proto_legacy; break; case Opt_version: s = match_strdup(&args[0]); if (!s) { ret = -ENOMEM; p9_debug(P9_DEBUG_ERROR, "problem allocating copy of version arg\n"); goto free_and_return; } r = get_protocol_version(s); if (r < 0) ret = r; else clnt->proto_version = r; kfree(s); break; default: continue; } } free_and_return: if (ret) v9fs_put_trans(clnt->trans_mod); kfree(tmp_options); return ret; } static int p9_fcall_init(struct p9_client *c, struct p9_fcall *fc, int alloc_msize) { if (likely(c->fcall_cache) && alloc_msize == c->msize) { fc->sdata = kmem_cache_alloc(c->fcall_cache, GFP_NOFS); fc->cache = c->fcall_cache; } else { fc->sdata = kmalloc(alloc_msize, GFP_NOFS); fc->cache = NULL; } if (!fc->sdata) return -ENOMEM; fc->capacity = alloc_msize; fc->id = 0; fc->tag = P9_NOTAG; return 0; } void p9_fcall_fini(struct p9_fcall *fc) { /* sdata can be NULL for interrupted requests in trans_rdma, * and kmem_cache_free does not do NULL-check for us */ if (unlikely(!fc->sdata)) return; if (fc->cache) kmem_cache_free(fc->cache, fc->sdata); else kfree(fc->sdata); } EXPORT_SYMBOL(p9_fcall_fini); static struct kmem_cache *p9_req_cache; /** * p9_tag_alloc - Allocate a new request. * @c: Client session. * @type: Transaction type. * @t_size: Buffer size for holding this request * (automatic calculation by format template if 0). * @r_size: Buffer size for holding server's reply on this request * (automatic calculation by format template if 0). * @fmt: Format template for assembling 9p request message * (see p9pdu_vwritef). * @ap: Variable arguments to be fed to passed format template * (see p9pdu_vwritef). * * Context: Process context. * Return: Pointer to new request. */ static struct p9_req_t * p9_tag_alloc(struct p9_client *c, int8_t type, uint t_size, uint r_size, const char *fmt, va_list ap) { struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS); int alloc_tsize; int alloc_rsize; int tag; va_list apc; va_copy(apc, ap); alloc_tsize = min_t(size_t, c->msize, t_size ?: p9_msg_buf_size(c, type, fmt, apc)); va_end(apc); alloc_rsize = min_t(size_t, c->msize, r_size ?: p9_msg_buf_size(c, type + 1, fmt, ap)); if (!req) return ERR_PTR(-ENOMEM); if (p9_fcall_init(c, &req->tc, alloc_tsize)) goto free_req; if (p9_fcall_init(c, &req->rc, alloc_rsize)) goto free; p9pdu_reset(&req->tc); p9pdu_reset(&req->rc); req->t_err = 0; req->status = REQ_STATUS_ALLOC; /* refcount needs to be set to 0 before inserting into the idr * so p9_tag_lookup does not accept a request that is not fully * initialized. refcount_set to 2 below will mark request ready. */ refcount_set(&req->refcount, 0); init_waitqueue_head(&req->wq); INIT_LIST_HEAD(&req->req_list); idr_preload(GFP_NOFS); spin_lock_irq(&c->lock); if (type == P9_TVERSION) tag = idr_alloc(&c->reqs, req, P9_NOTAG, P9_NOTAG + 1, GFP_NOWAIT); else tag = idr_alloc(&c->reqs, req, 0, P9_NOTAG, GFP_NOWAIT); req->tc.tag = tag; spin_unlock_irq(&c->lock); idr_preload_end(); if (tag < 0) goto free; /* Init ref to two because in the general case there is one ref * that is put asynchronously by a writer thread, one ref * temporarily given by p9_tag_lookup and put by p9_client_cb * in the recv thread, and one ref put by p9_req_put in the * main thread. The only exception is virtio that does not use * p9_tag_lookup but does not have a writer thread either * (the write happens synchronously in the request/zc_request * callback), so p9_client_cb eats the second ref there * as the pointer is duplicated directly by virtqueue_add_sgs() */ refcount_set(&req->refcount, 2); return req; free: p9_fcall_fini(&req->tc); p9_fcall_fini(&req->rc); free_req: kmem_cache_free(p9_req_cache, req); return ERR_PTR(-ENOMEM); } /** * p9_tag_lookup - Look up a request by tag. * @c: Client session. * @tag: Transaction ID. * * Context: Any context. * Return: A request, or %NULL if there is no request with that tag. */ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) { struct p9_req_t *req; rcu_read_lock(); again: req = idr_find(&c->reqs, tag); if (req) { /* We have to be careful with the req found under rcu_read_lock * Thanks to SLAB_TYPESAFE_BY_RCU we can safely try to get the * ref again without corrupting other data, then check again * that the tag matches once we have the ref */ if (!p9_req_try_get(req)) goto again; if (req->tc.tag != tag) { p9_req_put(c, req); goto again; } } rcu_read_unlock(); return req; } EXPORT_SYMBOL(p9_tag_lookup); /** * p9_tag_remove - Remove a tag. * @c: Client session. * @r: Request of reference. * * Context: Any context. */ static void p9_tag_remove(struct p9_client *c, struct p9_req_t *r) { unsigned long flags; u16 tag = r->tc.tag; p9_debug(P9_DEBUG_MUX, "freeing clnt %p req %p tag: %d\n", c, r, tag); spin_lock_irqsave(&c->lock, flags); idr_remove(&c->reqs, tag); spin_unlock_irqrestore(&c->lock, flags); } int p9_req_put(struct p9_client *c, struct p9_req_t *r) { if (refcount_dec_and_test(&r->refcount)) { p9_tag_remove(c, r); p9_fcall_fini(&r->tc); p9_fcall_fini(&r->rc); kmem_cache_free(p9_req_cache, r); return 1; } return 0; } EXPORT_SYMBOL(p9_req_put); /** * p9_tag_cleanup - cleans up tags structure and reclaims resources * @c: v9fs client struct * * This frees resources associated with the tags structure * */ static void p9_tag_cleanup(struct p9_client *c) { struct p9_req_t *req; int id; rcu_read_lock(); idr_for_each_entry(&c->reqs, req, id) { pr_info("Tag %d still in use\n", id); if (p9_req_put(c, req) == 0) pr_warn("Packet with tag %d has still references", req->tc.tag); } rcu_read_unlock(); } /** * p9_client_cb - call back from transport to client * @c: client state * @req: request received * @status: request status, one of REQ_STATUS_* * */ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) { p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc.tag); /* This barrier is needed to make sure any change made to req before * the status change is visible to another thread */ smp_wmb(); WRITE_ONCE(req->status, status); wake_up(&req->wq); p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag); p9_req_put(c, req); } EXPORT_SYMBOL(p9_client_cb); /** * p9_parse_header - parse header arguments out of a packet * @pdu: packet to parse * @size: size of packet * @type: type of request * @tag: tag of packet * @rewind: set if we need to rewind offset afterwards */ int p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, int rewind) { s8 r_type; s16 r_tag; s32 r_size; int offset = pdu->offset; int err; pdu->offset = 0; err = p9pdu_readf(pdu, 0, "dbw", &r_size, &r_type, &r_tag); if (err) goto rewind_and_exit; if (type) *type = r_type; if (tag) *tag = r_tag; if (size) *size = r_size; if (pdu->size != r_size || r_size < 7) { err = -EINVAL; goto rewind_and_exit; } pdu->id = r_type; pdu->tag = r_tag; p9_debug(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size, pdu->id, pdu->tag); rewind_and_exit: if (rewind) pdu->offset = offset; return err; } EXPORT_SYMBOL(p9_parse_header); /** * p9_check_errors - check 9p packet for error return and process it * @c: current client instance * @req: request to parse and check for error conditions * * returns error code if one is discovered, otherwise returns 0 * * this will have to be more complicated if we have multiple * error packet types */ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) { s8 type; int err; int ecode; err = p9_parse_header(&req->rc, NULL, &type, NULL, 0); if (req->rc.size > req->rc.capacity && !req->rc.zc) { pr_err("requested packet size too big: %d does not fit %zu (type=%d)\n", req->rc.size, req->rc.capacity, req->rc.id); return -EIO; } /* dump the response from server * This should be after check errors which poplulate pdu_fcall. */ trace_9p_protocol_dump(c, &req->rc); if (err) { p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); return err; } if (type != P9_RERROR && type != P9_RLERROR) return 0; if (!p9_is_proto_dotl(c)) { char *ename = NULL; err = p9pdu_readf(&req->rc, c->proto_version, "s?d", &ename, &ecode); if (err) { kfree(ename); goto out_err; } if (p9_is_proto_dotu(c) && ecode < 512) err = -ecode; if (!err) { err = p9_errstr2errno(ename, strlen(ename)); p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); } kfree(ename); } else { err = p9pdu_readf(&req->rc, c->proto_version, "d", &ecode); if (err) goto out_err; err = -ecode; p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); } return err; out_err: p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); return err; } static struct p9_req_t * p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); /** * p9_client_flush - flush (cancel) a request * @c: client state * @oldreq: request to cancel * * This sents a flush for a particular request and links * the flush request to the original request. The current * code only supports a single flush request although the protocol * allows for multiple flush requests to be sent for a single request. * */ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) { struct p9_req_t *req; s16 oldtag; int err; err = p9_parse_header(&oldreq->tc, NULL, NULL, &oldtag, 1); if (err) return err; p9_debug(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag); if (IS_ERR(req)) return PTR_ERR(req); /* if we haven't received a response for oldreq, * remove it from the list */ if (READ_ONCE(oldreq->status) == REQ_STATUS_SENT) { if (c->trans_mod->cancelled) c->trans_mod->cancelled(c, oldreq); } p9_req_put(c, req); return 0; } static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, int8_t type, uint t_size, uint r_size, const char *fmt, va_list ap) { int err; struct p9_req_t *req; va_list apc; p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type); /* we allow for any status other than disconnected */ if (c->status == Disconnected) return ERR_PTR(-EIO); /* if status is begin_disconnected we allow only clunk request */ if (c->status == BeginDisconnect && type != P9_TCLUNK) return ERR_PTR(-EIO); va_copy(apc, ap); req = p9_tag_alloc(c, type, t_size, r_size, fmt, apc); va_end(apc); if (IS_ERR(req)) return req; /* marshall the data */ p9pdu_prepare(&req->tc, req->tc.tag, type); err = p9pdu_vwritef(&req->tc, c->proto_version, fmt, ap); if (err) goto reterr; p9pdu_finalize(c, &req->tc); trace_9p_client_req(c, type, req->tc.tag); return req; reterr: p9_req_put(c, req); /* We have to put also the 2nd reference as it won't be used */ p9_req_put(c, req); return ERR_PTR(err); } /** * p9_client_rpc - issue a request and wait for a response * @c: client session * @type: type of request * @fmt: protocol format string (see protocol.c) * * Returns request structure (which client must free using p9_req_put) */ static struct p9_req_t * p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) { va_list ap; int sigpending, err; unsigned long flags; struct p9_req_t *req; /* Passing zero for tsize/rsize to p9_client_prepare_req() tells it to * auto determine an appropriate (small) request/response size * according to actual message data being sent. Currently RDMA * transport is excluded from this response message size optimization, * as it would not cope with it, due to its pooled response buffers * (using an optimized request size for RDMA as well though). */ const uint tsize = 0; const uint rsize = c->trans_mod->pooled_rbuffers ? c->msize : 0; va_start(ap, fmt); req = p9_client_prepare_req(c, type, tsize, rsize, fmt, ap); va_end(ap); if (IS_ERR(req)) return req; req->tc.zc = false; req->rc.zc = false; if (signal_pending(current)) { sigpending = 1; clear_thread_flag(TIF_SIGPENDING); } else { sigpending = 0; } err = c->trans_mod->request(c, req); if (err < 0) { /* write won't happen */ p9_req_put(c, req); if (err != -ERESTARTSYS && err != -EFAULT) c->status = Disconnected; goto recalc_sigpending; } again: /* Wait for the response */ err = wait_event_killable(req->wq, READ_ONCE(req->status) >= REQ_STATUS_RCVD); /* Make sure our req is coherent with regard to updates in other * threads - echoes to wmb() in the callback */ smp_rmb(); if (err == -ERESTARTSYS && c->status == Connected && type == P9_TFLUSH) { sigpending = 1; clear_thread_flag(TIF_SIGPENDING); goto again; } if (READ_ONCE(req->status) == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } if (err == -ERESTARTSYS && c->status == Connected) { p9_debug(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; clear_thread_flag(TIF_SIGPENDING); if (c->trans_mod->cancel(c, req)) p9_client_flush(c, req); /* if we received the response anyway, don't signal error */ if (READ_ONCE(req->status) == REQ_STATUS_RCVD) err = 0; } recalc_sigpending: if (sigpending) { spin_lock_irqsave(&current->sighand->siglock, flags); recalc_sigpending(); spin_unlock_irqrestore(&current->sighand->siglock, flags); } if (err < 0) goto reterr; err = p9_check_errors(c, req); trace_9p_client_res(c, type, req->rc.tag, err); if (!err) return req; reterr: p9_req_put(c, req); return ERR_PTR(safe_errno(err)); } /** * p9_client_zc_rpc - issue a request and wait for a response * @c: client session * @type: type of request * @uidata: destination for zero copy read * @uodata: source for zero copy write * @inlen: read buffer size * @olen: write buffer size * @in_hdrlen: reader header size, This is the size of response protocol data * @fmt: protocol format string (see protocol.c) * * Returns request structure (which client must free using p9_req_put) */ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, struct iov_iter *uidata, struct iov_iter *uodata, int inlen, int olen, int in_hdrlen, const char *fmt, ...) { va_list ap; int sigpending, err; unsigned long flags; struct p9_req_t *req; va_start(ap, fmt); /* We allocate a inline protocol data of only 4k bytes. * The actual content is passed in zero-copy fashion. */ req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, P9_ZC_HDR_SZ, fmt, ap); va_end(ap); if (IS_ERR(req)) return req; req->tc.zc = true; req->rc.zc = true; if (signal_pending(current)) { sigpending = 1; clear_thread_flag(TIF_SIGPENDING); } else { sigpending = 0; } err = c->trans_mod->zc_request(c, req, uidata, uodata, inlen, olen, in_hdrlen); if (err < 0) { if (err == -EIO) c->status = Disconnected; if (err != -ERESTARTSYS) goto recalc_sigpending; } if (READ_ONCE(req->status) == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; } if (err == -ERESTARTSYS && c->status == Connected) { p9_debug(P9_DEBUG_MUX, "flushing\n"); sigpending = 1; clear_thread_flag(TIF_SIGPENDING); if (c->trans_mod->cancel(c, req)) p9_client_flush(c, req); /* if we received the response anyway, don't signal error */ if (READ_ONCE(req->status) == REQ_STATUS_RCVD) err = 0; } recalc_sigpending: if (sigpending) { spin_lock_irqsave(&current->sighand->siglock, flags); recalc_sigpending(); spin_unlock_irqrestore(&current->sighand->siglock, flags); } if (err < 0) goto reterr; err = p9_check_errors(c, req); trace_9p_client_res(c, type, req->rc.tag, err); if (!err) return req; reterr: p9_req_put(c, req); return ERR_PTR(safe_errno(err)); } static struct p9_fid *p9_fid_create(struct p9_client *clnt) { int ret; struct p9_fid *fid; p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt); fid = kzalloc(sizeof(*fid), GFP_KERNEL); if (!fid) return NULL; fid->mode = -1; fid->uid = current_fsuid(); fid->clnt = clnt; refcount_set(&fid->count, 1); idr_preload(GFP_KERNEL); spin_lock_irq(&clnt->lock); ret = idr_alloc_u32(&clnt->fids, fid, &fid->fid, P9_NOFID - 1, GFP_NOWAIT); spin_unlock_irq(&clnt->lock); idr_preload_end(); if (!ret) { trace_9p_fid_ref(fid, P9_FID_REF_CREATE); return fid; } kfree(fid); return NULL; } static void p9_fid_destroy(struct p9_fid *fid) { struct p9_client *clnt; unsigned long flags; p9_debug(P9_DEBUG_FID, "fid %d\n", fid->fid); trace_9p_fid_ref(fid, P9_FID_REF_DESTROY); clnt = fid->clnt; spin_lock_irqsave(&clnt->lock, flags); idr_remove(&clnt->fids, fid->fid); spin_unlock_irqrestore(&clnt->lock, flags); kfree(fid->rdir); kfree(fid); } /* We also need to export tracepoint symbols for tracepoint_enabled() */ EXPORT_TRACEPOINT_SYMBOL(9p_fid_ref); void do_trace_9p_fid_get(struct p9_fid *fid) { trace_9p_fid_ref(fid, P9_FID_REF_GET); } EXPORT_SYMBOL(do_trace_9p_fid_get); void do_trace_9p_fid_put(struct p9_fid *fid) { trace_9p_fid_ref(fid, P9_FID_REF_PUT); } EXPORT_SYMBOL(do_trace_9p_fid_put); static int p9_client_version(struct p9_client *c) { int err; struct p9_req_t *req; char *version = NULL; int msize; p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", c->msize, c->proto_version); switch (c->proto_version) { case p9_proto_2000L: req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize, "9P2000.L"); break; case p9_proto_2000u: req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize, "9P2000.u"); break; case p9_proto_legacy: req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize, "9P2000"); break; default: return -EINVAL; } if (IS_ERR(req)) return PTR_ERR(req); err = p9pdu_readf(&req->rc, c->proto_version, "ds", &msize, &version); if (err) { p9_debug(P9_DEBUG_9P, "version error %d\n", err); trace_9p_protocol_dump(c, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); if (!strncmp(version, "9P2000.L", 8)) { c->proto_version = p9_proto_2000L; } else if (!strncmp(version, "9P2000.u", 8)) { c->proto_version = p9_proto_2000u; } else if (!strncmp(version, "9P2000", 6)) { c->proto_version = p9_proto_legacy; } else { p9_debug(P9_DEBUG_ERROR, "server returned an unknown version: %s\n", version); err = -EREMOTEIO; goto error; } if (msize < 4096) { p9_debug(P9_DEBUG_ERROR, "server returned a msize < 4096: %d\n", msize); err = -EREMOTEIO; goto error; } if (msize < c->msize) c->msize = msize; error: kfree(version); p9_req_put(c, req); return err; } struct p9_client *p9_client_create(const char *dev_name, char *options) { int err; struct p9_client *clnt; char *client_id; clnt = kmalloc(sizeof(*clnt), GFP_KERNEL); if (!clnt) return ERR_PTR(-ENOMEM); clnt->trans_mod = NULL; clnt->trans = NULL; clnt->fcall_cache = NULL; client_id = utsname()->nodename; memcpy(clnt->name, client_id, strlen(client_id) + 1); spin_lock_init(&clnt->lock); idr_init(&clnt->fids); idr_init(&clnt->reqs); err = parse_opts(options, clnt); if (err < 0) goto free_client; if (!clnt->trans_mod) clnt->trans_mod = v9fs_get_default_trans(); if (!clnt->trans_mod) { err = -EPROTONOSUPPORT; p9_debug(P9_DEBUG_ERROR, "No transport defined or default transport\n"); goto free_client; } p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); err = clnt->trans_mod->create(clnt, dev_name, options); if (err) goto put_trans; if (clnt->msize > clnt->trans_mod->maxsize) { clnt->msize = clnt->trans_mod->maxsize; pr_info("Limiting 'msize' to %d as this is the maximum " "supported by transport %s\n", clnt->msize, clnt->trans_mod->name ); } if (clnt->msize < 4096) { p9_debug(P9_DEBUG_ERROR, "Please specify a msize of at least 4k\n"); err = -EINVAL; goto close_trans; } err = p9_client_version(clnt); if (err) goto close_trans; /* P9_HDRSZ + 4 is the smallest packet header we can have that is * followed by data accessed from userspace by read */ clnt->fcall_cache = kmem_cache_create_usercopy("9p-fcall-cache", clnt->msize, 0, 0, P9_HDRSZ + 4, clnt->msize - (P9_HDRSZ + 4), NULL); return clnt; close_trans: clnt->trans_mod->close(clnt); put_trans: v9fs_put_trans(clnt->trans_mod); free_client: kfree(clnt); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_create); void p9_client_destroy(struct p9_client *clnt) { struct p9_fid *fid; int id; p9_debug(P9_DEBUG_MUX, "clnt %p\n", clnt); if (clnt->trans_mod) clnt->trans_mod->close(clnt); v9fs_put_trans(clnt->trans_mod); idr_for_each_entry(&clnt->fids, fid, id) { pr_info("Found fid %d not clunked\n", fid->fid); p9_fid_destroy(fid); } p9_tag_cleanup(clnt); kmem_cache_destroy(clnt->fcall_cache); kfree(clnt); } EXPORT_SYMBOL(p9_client_destroy); void p9_client_disconnect(struct p9_client *clnt) { p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt); clnt->status = Disconnected; } EXPORT_SYMBOL(p9_client_disconnect); void p9_client_begin_disconnect(struct p9_client *clnt) { p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt); clnt->status = BeginDisconnect; } EXPORT_SYMBOL(p9_client_begin_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, const char *uname, kuid_t n_uname, const char *aname) { int err; struct p9_req_t *req; struct p9_fid *fid; struct p9_qid qid; p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", afid ? afid->fid : -1, uname, aname); fid = p9_fid_create(clnt); if (!fid) { err = -ENOMEM; goto error; } fid->uid = n_uname; req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid, afid ? afid->fid : P9_NOFID, uname, aname, n_uname); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", &qid); if (err) { trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", qid.type, qid.path, qid.version); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); p9_req_put(clnt, req); return fid; error: if (fid) p9_fid_destroy(fid); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_attach); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, const unsigned char * const *wnames, int clone) { int err; struct p9_client *clnt; struct p9_fid *fid; struct p9_qid *wqids; struct p9_req_t *req; u16 nwqids, count; wqids = NULL; clnt = oldfid->clnt; if (clone) { fid = p9_fid_create(clnt); if (!fid) { err = -ENOMEM; goto error; } fid->uid = oldfid->uid; } else { fid = oldfid; } p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, nwname, wnames); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "R", &nwqids, &wqids); if (err) { trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); goto clunk_fid; } p9_req_put(clnt, req); p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); if (nwqids != nwname) { err = -ENOENT; goto clunk_fid; } for (count = 0; count < nwqids; count++) p9_debug(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", count, wqids[count].type, wqids[count].path, wqids[count].version); if (nwname) memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); else memmove(&fid->qid, &oldfid->qid, sizeof(struct p9_qid)); kfree(wqids); return fid; clunk_fid: kfree(wqids); p9_fid_put(fid); fid = NULL; error: if (fid && fid != oldfid) p9_fid_destroy(fid); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_walk); int p9_client_open(struct p9_fid *fid, int mode) { int err; struct p9_client *clnt; struct p9_req_t *req; struct p9_qid qid; int iounit; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode); if (fid->mode != -1) return -EINVAL; if (p9_is_proto_dotl(clnt)) req = p9_client_rpc(clnt, P9_TLOPEN, "dd", fid->fid, mode & P9L_MODE_MASK); else req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode & P9L_MODE_MASK); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { trace_9p_protocol_dump(clnt, &req->rc); goto free_and_error; } p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type, qid.path, qid.version, iounit); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); fid->mode = mode; fid->iounit = iounit; free_and_error: p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_open); int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode, kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; struct p9_req_t *req; int iounit; p9_debug(P9_DEBUG_9P, ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", ofid->fid, name, flags, mode, from_kgid(&init_user_ns, gid)); clnt = ofid->clnt; if (ofid->mode != -1) return -EINVAL; req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags, mode & P9L_MODE_MASK, gid); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", qid, &iounit); if (err) { trace_9p_protocol_dump(clnt, &req->rc); goto free_and_error; } p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", qid->type, qid->path, qid->version, iounit); memmove(&ofid->qid, qid, sizeof(struct p9_qid)); ofid->mode = flags; ofid->iounit = iounit; free_and_error: p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_create_dotl); int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode, char *extension) { int err; struct p9_client *clnt; struct p9_req_t *req; struct p9_qid qid; int iounit; p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", fid->fid, name, perm, mode); clnt = fid->clnt; if (fid->mode != -1) return -EINVAL; req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm, mode & P9L_MODE_MASK, extension); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "Qd", &qid, &iounit); if (err) { trace_9p_protocol_dump(clnt, &req->rc); goto free_and_error; } p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", qid.type, qid.path, qid.version, iounit); memmove(&fid->qid, &qid, sizeof(struct p9_qid)); fid->mode = mode; fid->iounit = iounit; free_and_error: p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_fcreate); int p9_client_symlink(struct p9_fid *dfid, const char *name, const char *symtgt, kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s symtgt %s\n", dfid->fid, name, symtgt); clnt = dfid->clnt; req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt, gid); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid); if (err) { trace_9p_protocol_dump(clnt, &req->rc); goto free_and_error; } p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", qid->type, qid->path, qid->version); free_and_error: p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_symlink); int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newname) { struct p9_client *clnt; struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", dfid->fid, oldfid->fid, newname); clnt = dfid->clnt; req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid, newname); if (IS_ERR(req)) return PTR_ERR(req); p9_debug(P9_DEBUG_9P, "<<< RLINK\n"); p9_req_put(clnt, req); return 0; } EXPORT_SYMBOL(p9_client_link); int p9_client_fsync(struct p9_fid *fid, int datasync) { int err = 0; struct p9_client *clnt; struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", fid->fid, datasync); clnt = fid->clnt; req = p9_client_rpc(clnt, P9_TFSYNC, "dd", fid->fid, datasync); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_fsync); int p9_client_clunk(struct p9_fid *fid) { int err = 0; struct p9_client *clnt; struct p9_req_t *req; int retries = 0; again: p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid, retries); clnt = fid->clnt; req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); p9_req_put(clnt, req); error: /* Fid is not valid even after a failed clunk * If interrupted, retry once then give up and * leak fid until umount. */ if (err == -ERESTARTSYS) { if (retries++ == 0) goto again; } else { p9_fid_destroy(fid); } return err; } EXPORT_SYMBOL(p9_client_clunk); int p9_client_remove(struct p9_fid *fid) { int err = 0; struct p9_client *clnt; struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); clnt = fid->clnt; req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); p9_req_put(clnt, req); error: if (err == -ERESTARTSYS) p9_fid_put(fid); else p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_remove); int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) { int err = 0; struct p9_req_t *req; struct p9_client *clnt; p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", dfid->fid, name, flags); clnt = dfid->clnt; req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_unlinkat); int p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) { int total = 0; *err = 0; while (iov_iter_count(to)) { int count; count = p9_client_read_once(fid, offset, to, err); if (!count || *err) break; offset += count; total += count; } return total; } EXPORT_SYMBOL(p9_client_read); int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err) { struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int count = iov_iter_count(to); int rsize, received, non_zc = 0; char *dataptr; *err = 0; p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %zu\n", fid->fid, offset, iov_iter_count(to)); rsize = fid->iounit; if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; if (count < rsize) rsize = count; /* Don't bother zerocopy for small IO (< 1024) */ if (clnt->trans_mod->zc_request && rsize > 1024) { /* response header len is 11 * PDU Header(7) + IO Size (4) */ req = p9_client_zc_rpc(clnt, P9_TREAD, to, NULL, rsize, 0, 11, "dqd", fid->fid, offset, rsize); } else { non_zc = 1; req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); } if (IS_ERR(req)) { *err = PTR_ERR(req); if (!non_zc) iov_iter_revert(to, count - iov_iter_count(to)); return 0; } *err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &received, &dataptr); if (*err) { if (!non_zc) iov_iter_revert(to, count - iov_iter_count(to)); trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); return 0; } if (rsize < received) { pr_err("bogus RREAD count (%d > %d)\n", received, rsize); received = rsize; } p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", received); if (non_zc) { int n = copy_to_iter(dataptr, received, to); if (n != received) { *err = -EFAULT; p9_req_put(clnt, req); return n; } } else { iov_iter_revert(to, count - received - iov_iter_count(to)); } p9_req_put(clnt, req); return received; } EXPORT_SYMBOL(p9_client_read_once); int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err) { struct p9_client *clnt = fid->clnt; struct p9_req_t *req; int total = 0; *err = 0; while (iov_iter_count(from)) { int count = iov_iter_count(from); int rsize = fid->iounit; int written; if (!rsize || rsize > clnt->msize - P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; if (count < rsize) rsize = count; p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d (/%d)\n", fid->fid, offset, rsize, count); /* Don't bother zerocopy for small IO (< 1024) */ if (clnt->trans_mod->zc_request && rsize > 1024) { req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, from, 0, rsize, P9_ZC_HDR_SZ, "dqd", fid->fid, offset, rsize); } else { req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid, offset, rsize, from); } if (IS_ERR(req)) { iov_iter_revert(from, count - iov_iter_count(from)); *err = PTR_ERR(req); break; } *err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written); if (*err) { iov_iter_revert(from, count - iov_iter_count(from)); trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); break; } if (rsize < written) { pr_err("bogus RWRITE count (%d > %d)\n", written, rsize); written = rsize; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", written); p9_req_put(clnt, req); iov_iter_revert(from, count - written - iov_iter_count(from)); total += written; offset += written; } return total; } EXPORT_SYMBOL(p9_client_write); void p9_client_write_subreq(struct netfs_io_subrequest *subreq) { struct netfs_io_request *wreq = subreq->rreq; struct p9_fid *fid = wreq->netfs_priv; struct p9_client *clnt = fid->clnt; struct p9_req_t *req; unsigned long long start = subreq->start + subreq->transferred; int written, len = subreq->len - subreq->transferred; int err; p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu len %d\n", fid->fid, start, len); /* Don't bother zerocopy for small IO (< 1024) */ if (clnt->trans_mod->zc_request && len > 1024) { req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, &subreq->io_iter, 0, wreq->len, P9_ZC_HDR_SZ, "dqd", fid->fid, start, len); } else { req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid, start, len, &subreq->io_iter); } if (IS_ERR(req)) { netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false); return; } err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written); if (err) { trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); netfs_write_subrequest_terminated(subreq, err, false); return; } if (written > len) { pr_err("bogus RWRITE count (%d > %u)\n", written, len); written = len; } p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len); p9_req_put(clnt, req); netfs_write_subrequest_terminated(subreq, written, false); } EXPORT_SYMBOL(p9_client_write_subreq); struct p9_wstat *p9_client_stat(struct p9_fid *fid) { int err; struct p9_client *clnt; struct p9_wstat *ret; struct p9_req_t *req; u16 ignored; p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); ret = kmalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return ERR_PTR(-ENOMEM); clnt = fid->clnt; req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "wS", &ignored, ret); if (err) { trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n" "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" "<<< uid=%d gid=%d n_muid=%d\n", ret->size, ret->type, ret->dev, ret->qid.type, ret->qid.path, ret->qid.version, ret->mode, ret->atime, ret->mtime, ret->length, ret->name, ret->uid, ret->gid, ret->muid, ret->extension, from_kuid(&init_user_ns, ret->n_uid), from_kgid(&init_user_ns, ret->n_gid), from_kuid(&init_user_ns, ret->n_muid)); p9_req_put(clnt, req); return ret; error: kfree(ret); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_stat); struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, u64 request_mask) { int err; struct p9_client *clnt; struct p9_stat_dotl *ret; struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", fid->fid, request_mask); ret = kmalloc(sizeof(*ret), GFP_KERNEL); if (!ret) return ERR_PTR(-ENOMEM); clnt = fid->clnt; req = p9_client_rpc(clnt, P9_TGETATTR, "dq", fid->fid, request_mask); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "A", ret); if (err) { trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RGETATTR st_result_mask=%lld\n" "<<< qid=%x.%llx.%x\n" "<<< st_mode=%8.8x st_nlink=%llu\n" "<<< st_uid=%d st_gid=%d\n" "<<< st_rdev=%llx st_size=%llx st_blksize=%llu st_blocks=%llu\n" "<<< st_atime_sec=%lld st_atime_nsec=%lld\n" "<<< st_mtime_sec=%lld st_mtime_nsec=%lld\n" "<<< st_ctime_sec=%lld st_ctime_nsec=%lld\n" "<<< st_btime_sec=%lld st_btime_nsec=%lld\n" "<<< st_gen=%lld st_data_version=%lld\n", ret->st_result_mask, ret->qid.type, ret->qid.path, ret->qid.version, ret->st_mode, ret->st_nlink, from_kuid(&init_user_ns, ret->st_uid), from_kgid(&init_user_ns, ret->st_gid), ret->st_rdev, ret->st_size, ret->st_blksize, ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec, ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec, ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, ret->st_gen, ret->st_data_version); p9_req_put(clnt, req); return ret; error: kfree(ret); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_getattr_dotl); static int p9_client_statsize(struct p9_wstat *wst, int proto_version) { int ret; /* NOTE: size shouldn't include its own length */ /* size[2] type[2] dev[4] qid[13] */ /* mode[4] atime[4] mtime[4] length[8]*/ /* name[s] uid[s] gid[s] muid[s] */ ret = 2 + 4 + 13 + 4 + 4 + 4 + 8 + 2 + 2 + 2 + 2; if (wst->name) ret += strlen(wst->name); if (wst->uid) ret += strlen(wst->uid); if (wst->gid) ret += strlen(wst->gid); if (wst->muid) ret += strlen(wst->muid); if (proto_version == p9_proto_2000u || proto_version == p9_proto_2000L) { /* extension[s] n_uid[4] n_gid[4] n_muid[4] */ ret += 2 + 4 + 4 + 4; if (wst->extension) ret += strlen(wst->extension); } return ret; } int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) { int err = 0; struct p9_req_t *req; struct p9_client *clnt; clnt = fid->clnt; wst->size = p9_client_statsize(wst, clnt->proto_version); p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); p9_debug(P9_DEBUG_9P, " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" " uid=%d gid=%d n_muid=%d\n", wst->size, wst->type, wst->dev, wst->qid.type, wst->qid.path, wst->qid.version, wst->mode, wst->atime, wst->mtime, wst->length, wst->name, wst->uid, wst->gid, wst->muid, wst->extension, from_kuid(&init_user_ns, wst->n_uid), from_kgid(&init_user_ns, wst->n_gid), from_kuid(&init_user_ns, wst->n_muid)); req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size + 2, wst); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_wstat); int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) { int err = 0; struct p9_req_t *req; struct p9_client *clnt; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); p9_debug(P9_DEBUG_9P, " valid=%x mode=%x uid=%d gid=%d size=%lld\n", p9attr->valid, p9attr->mode, from_kuid(&init_user_ns, p9attr->uid), from_kgid(&init_user_ns, p9attr->gid), p9attr->size); p9_debug(P9_DEBUG_9P, " atime_sec=%lld atime_nsec=%lld\n", p9attr->atime_sec, p9attr->atime_nsec); p9_debug(P9_DEBUG_9P, " mtime_sec=%lld mtime_nsec=%lld\n", p9attr->mtime_sec, p9attr->mtime_nsec); req = p9_client_rpc(clnt, P9_TSETATTR, "dI", fid->fid, p9attr); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_setattr); int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb) { int err; struct p9_req_t *req; struct p9_client *clnt; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid); req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "ddqqqqqqd", &sb->type, &sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail, &sb->files, &sb->ffree, &sb->fsid, &sb->namelen); if (err) { trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%x bsize %u blocks %llu bfree %llu bavail %llu files %llu ffree %llu fsid %llu namelen %u\n", fid->fid, sb->type, sb->bsize, sb->blocks, sb->bfree, sb->bavail, sb->files, sb->ffree, sb->fsid, sb->namelen); p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_statfs); int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, const char *name) { int err = 0; struct p9_req_t *req; struct p9_client *clnt; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", fid->fid, newdirfid->fid, name); req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid, newdirfid->fid, name); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_rename); int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, struct p9_fid *newdirfid, const char *new_name) { int err = 0; struct p9_req_t *req; struct p9_client *clnt; clnt = olddirfid->clnt; p9_debug(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s newdirfid %d new name %s\n", olddirfid->fid, old_name, newdirfid->fid, new_name); req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid, old_name, newdirfid->fid, new_name); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", newdirfid->fid, new_name); p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_renameat); /* An xattrwalk without @attr_name gives the fid for the lisxattr namespace */ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid, const char *attr_name, u64 *attr_size) { int err; struct p9_req_t *req; struct p9_client *clnt; struct p9_fid *attr_fid; clnt = file_fid->clnt; attr_fid = p9_fid_create(clnt); if (!attr_fid) { err = -ENOMEM; goto error; } p9_debug(P9_DEBUG_9P, ">>> TXATTRWALK file_fid %d, attr_fid %d name '%s'\n", file_fid->fid, attr_fid->fid, attr_name); req = p9_client_rpc(clnt, P9_TXATTRWALK, "dds", file_fid->fid, attr_fid->fid, attr_name); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "q", attr_size); if (err) { trace_9p_protocol_dump(clnt, &req->rc); p9_req_put(clnt, req); goto clunk_fid; } p9_req_put(clnt, req); p9_debug(P9_DEBUG_9P, "<<< RXATTRWALK fid %d size %llu\n", attr_fid->fid, *attr_size); return attr_fid; clunk_fid: p9_fid_put(attr_fid); attr_fid = NULL; error: if (attr_fid && attr_fid != file_fid) p9_fid_destroy(attr_fid); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(p9_client_xattrwalk); int p9_client_xattrcreate(struct p9_fid *fid, const char *name, u64 attr_size, int flags) { int err = 0; struct p9_req_t *req; struct p9_client *clnt; p9_debug(P9_DEBUG_9P, ">>> TXATTRCREATE fid %d name %s size %llu flag %d\n", fid->fid, name, attr_size, flags); clnt = fid->clnt; req = p9_client_rpc(clnt, P9_TXATTRCREATE, "dsqd", fid->fid, name, attr_size, flags); if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL_GPL(p9_client_xattrcreate); int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) { int err, rsize, non_zc = 0; struct p9_client *clnt; struct p9_req_t *req; char *dataptr; struct kvec kv = {.iov_base = data, .iov_len = count}; struct iov_iter to; iov_iter_kvec(&to, ITER_DEST, &kv, 1, count); p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", fid->fid, offset, count); clnt = fid->clnt; rsize = fid->iounit; if (!rsize || rsize > clnt->msize - P9_READDIRHDRSZ) rsize = clnt->msize - P9_READDIRHDRSZ; if (count < rsize) rsize = count; /* Don't bother zerocopy for small IO (< 1024) */ if (clnt->trans_mod->zc_request && rsize > 1024) { /* response header len is 11 * PDU Header(7) + IO Size (4) */ req = p9_client_zc_rpc(clnt, P9_TREADDIR, &to, NULL, rsize, 0, 11, "dqd", fid->fid, offset, rsize); } else { non_zc = 1; req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); } if (IS_ERR(req)) { err = PTR_ERR(req); goto error; } err = p9pdu_readf(&req->rc, clnt->proto_version, "D", &count, &dataptr); if (err) { trace_9p_protocol_dump(clnt, &req->rc); goto free_and_error; } if (rsize < count) { pr_err("bogus RREADDIR count (%d > %d)\n", count, rsize); count = rsize; } p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); if (non_zc) memmove(data, dataptr, count); p9_req_put(clnt, req); return count; free_and_error: p9_req_put(clnt, req); error: return err; } EXPORT_SYMBOL(p9_client_readdir); int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode, dev_t rdev, kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; struct p9_req_t *req; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev), gid); if (IS_ERR(req)) return PTR_ERR(req); err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid); if (err) { trace_9p_protocol_dump(clnt, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, qid->path, qid->version); error: p9_req_put(clnt, req); return err; } EXPORT_SYMBOL(p9_client_mknod_dotl); int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode, kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; struct p9_req_t *req; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", fid->fid, name, mode, from_kgid(&init_user_ns, gid)); req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode, gid); if (IS_ERR(req)) return PTR_ERR(req); err = p9pdu_readf(&req->rc, clnt->proto_version, "Q", qid); if (err) { trace_9p_protocol_dump(clnt, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, qid->path, qid->version); error: p9_req_put(clnt, req); return err; } EXPORT_SYMBOL(p9_client_mkdir_dotl); int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status) { int err; struct p9_client *clnt; struct p9_req_t *req; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d start %lld length %lld proc_id %d client_id %s\n", fid->fid, flock->type, flock->flags, flock->start, flock->length, flock->proc_id, flock->client_id); req = p9_client_rpc(clnt, P9_TLOCK, "dbdqqds", fid->fid, flock->type, flock->flags, flock->start, flock->length, flock->proc_id, flock->client_id); if (IS_ERR(req)) return PTR_ERR(req); err = p9pdu_readf(&req->rc, clnt->proto_version, "b", status); if (err) { trace_9p_protocol_dump(clnt, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); error: p9_req_put(clnt, req); return err; } EXPORT_SYMBOL(p9_client_lock_dotl); int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock) { int err; struct p9_client *clnt; struct p9_req_t *req; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld length %lld proc_id %d client_id %s\n", fid->fid, glock->type, glock->start, glock->length, glock->proc_id, glock->client_id); req = p9_client_rpc(clnt, P9_TGETLOCK, "dbqqds", fid->fid, glock->type, glock->start, glock->length, glock->proc_id, glock->client_id); if (IS_ERR(req)) return PTR_ERR(req); err = p9pdu_readf(&req->rc, clnt->proto_version, "bqqds", &glock->type, &glock->start, &glock->length, &glock->proc_id, &glock->client_id); if (err) { trace_9p_protocol_dump(clnt, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld proc_id %d client_id %s\n", glock->type, glock->start, glock->length, glock->proc_id, glock->client_id); error: p9_req_put(clnt, req); return err; } EXPORT_SYMBOL(p9_client_getlock_dotl); int p9_client_readlink(struct p9_fid *fid, char **target) { int err; struct p9_client *clnt; struct p9_req_t *req; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid); req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid); if (IS_ERR(req)) return PTR_ERR(req); err = p9pdu_readf(&req->rc, clnt->proto_version, "s", target); if (err) { trace_9p_protocol_dump(clnt, &req->rc); goto error; } p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); error: p9_req_put(clnt, req); return err; } EXPORT_SYMBOL(p9_client_readlink); int __init p9_client_init(void) { p9_req_cache = KMEM_CACHE(p9_req_t, SLAB_TYPESAFE_BY_RCU); return p9_req_cache ? 0 : -ENOMEM; } void __exit p9_client_exit(void) { kmem_cache_destroy(p9_req_cache); }
14 228 228 228 227 226 233 233 21 228 227 226 227 227 232 232 21 227 208 207 202 208 229 229 229 30 218 229 229 229 229 229 11 11 43 43 43 43 43 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 // SPDX-License-Identifier: GPL-2.0-only /* * linux/mm/mmu_notifier.c * * Copyright (C) 2008 Qumranet, Inc. * Copyright (C) 2008 SGI * Christoph Lameter <cl@linux.com> */ #include <linux/rculist.h> #include <linux/mmu_notifier.h> #include <linux/export.h> #include <linux/mm.h> #include <linux/err.h> #include <linux/interval_tree.h> #include <linux/srcu.h> #include <linux/rcupdate.h> #include <linux/sched.h> #include <linux/sched/mm.h> #include <linux/slab.h> /* global SRCU for all MMs */ DEFINE_STATIC_SRCU(srcu); #ifdef CONFIG_LOCKDEP struct lockdep_map __mmu_notifier_invalidate_range_start_map = { .name = "mmu_notifier_invalidate_range_start" }; #endif /* * The mmu_notifier_subscriptions structure is allocated and installed in * mm->notifier_subscriptions inside the mm_take_all_locks() protected * critical section and it's released only when mm_count reaches zero * in mmdrop(). */ struct mmu_notifier_subscriptions { /* all mmu notifiers registered in this mm are queued in this list */ struct hlist_head list; bool has_itree; /* to serialize the list modifications and hlist_unhashed */ spinlock_t lock; unsigned long invalidate_seq; unsigned long active_invalidate_ranges; struct rb_root_cached itree; wait_queue_head_t wq; struct hlist_head deferred_list; }; /* * This is a collision-retry read-side/write-side 'lock', a lot like a * seqcount, however this allows multiple write-sides to hold it at * once. Conceptually the write side is protecting the values of the PTEs in * this mm, such that PTES cannot be read into SPTEs (shadow PTEs) while any * writer exists. * * Note that the core mm creates nested invalidate_range_start()/end() regions * within the same thread, and runs invalidate_range_start()/end() in parallel * on multiple CPUs. This is designed to not reduce concurrency or block * progress on the mm side. * * As a secondary function, holding the full write side also serves to prevent * writers for the itree, this is an optimization to avoid extra locking * during invalidate_range_start/end notifiers. * * The write side has two states, fully excluded: * - mm->active_invalidate_ranges != 0 * - subscriptions->invalidate_seq & 1 == True (odd) * - some range on the mm_struct is being invalidated * - the itree is not allowed to change * * And partially excluded: * - mm->active_invalidate_ranges != 0 * - subscriptions->invalidate_seq & 1 == False (even) * - some range on the mm_struct is being invalidated * - the itree is allowed to change * * Operations on notifier_subscriptions->invalidate_seq (under spinlock): * seq |= 1 # Begin writing * seq++ # Release the writing state * seq & 1 # True if a writer exists * * The later state avoids some expensive work on inv_end in the common case of * no mmu_interval_notifier monitoring the VA. */ static bool mn_itree_is_invalidating(struct mmu_notifier_subscriptions *subscriptions) { lockdep_assert_held(&subscriptions->lock); return subscriptions->invalidate_seq & 1; } static struct mmu_interval_notifier * mn_itree_inv_start_range(struct mmu_notifier_subscriptions *subscriptions, const struct mmu_notifier_range *range, unsigned long *seq) { struct interval_tree_node *node; struct mmu_interval_notifier *res = NULL; spin_lock(&subscriptions->lock); subscriptions->active_invalidate_ranges++; node = interval_tree_iter_first(&subscriptions->itree, range->start, range->end - 1); if (node) { subscriptions->invalidate_seq |= 1; res = container_of(node, struct mmu_interval_notifier, interval_tree); } *seq = subscriptions->invalidate_seq; spin_unlock(&subscriptions->lock); return res; } static struct mmu_interval_notifier * mn_itree_inv_next(struct mmu_interval_notifier *interval_sub, const struct mmu_notifier_range *range) { struct interval_tree_node *node; node = interval_tree_iter_next(&interval_sub->interval_tree, range->start, range->end - 1); if (!node) return NULL; return container_of(node, struct mmu_interval_notifier, interval_tree); } static void mn_itree_inv_end(struct mmu_notifier_subscriptions *subscriptions) { struct mmu_interval_notifier *interval_sub; struct hlist_node *next; spin_lock(&subscriptions->lock); if (--subscriptions->active_invalidate_ranges || !mn_itree_is_invalidating(subscriptions)) { spin_unlock(&subscriptions->lock); return; } /* Make invalidate_seq even */ subscriptions->invalidate_seq++; /* * The inv_end incorporates a deferred mechanism like rtnl_unlock(). * Adds and removes are queued until the final inv_end happens then * they are progressed. This arrangement for tree updates is used to * avoid using a blocking lock during invalidate_range_start. */ hlist_for_each_entry_safe(interval_sub, next, &subscriptions->deferred_list, deferred_item) { if (RB_EMPTY_NODE(&interval_sub->interval_tree.rb)) interval_tree_insert(&interval_sub->interval_tree, &subscriptions->itree); else interval_tree_remove(&interval_sub->interval_tree, &subscriptions->itree); hlist_del(&interval_sub->deferred_item); } spin_unlock(&subscriptions->lock); wake_up_all(&subscriptions->wq); } /** * mmu_interval_read_begin - Begin a read side critical section against a VA * range * @interval_sub: The interval subscription * * mmu_iterval_read_begin()/mmu_iterval_read_retry() implement a * collision-retry scheme similar to seqcount for the VA range under * subscription. If the mm invokes invalidation during the critical section * then mmu_interval_read_retry() will return true. * * This is useful to obtain shadow PTEs where teardown or setup of the SPTEs * require a blocking context. The critical region formed by this can sleep, * and the required 'user_lock' can also be a sleeping lock. * * The caller is required to provide a 'user_lock' to serialize both teardown * and setup. * * The return value should be passed to mmu_interval_read_retry(). */ unsigned long mmu_interval_read_begin(struct mmu_interval_notifier *interval_sub) { struct mmu_notifier_subscriptions *subscriptions = interval_sub->mm->notifier_subscriptions; unsigned long seq; bool is_invalidating; /* * If the subscription has a different seq value under the user_lock * than we started with then it has collided. * * If the subscription currently has the same seq value as the * subscriptions seq, then it is currently between * invalidate_start/end and is colliding. * * The locking looks broadly like this: * mn_itree_inv_start(): mmu_interval_read_begin(): * spin_lock * seq = READ_ONCE(interval_sub->invalidate_seq); * seq == subs->invalidate_seq * spin_unlock * spin_lock * seq = ++subscriptions->invalidate_seq * spin_unlock * op->invalidate(): * user_lock * mmu_interval_set_seq() * interval_sub->invalidate_seq = seq * user_unlock * * [Required: mmu_interval_read_retry() == true] * * mn_itree_inv_end(): * spin_lock * seq = ++subscriptions->invalidate_seq * spin_unlock * * user_lock * mmu_interval_read_retry(): * interval_sub->invalidate_seq != seq * user_unlock * * Barriers are not needed here as any races here are closed by an * eventual mmu_interval_read_retry(), which provides a barrier via the * user_lock. */ spin_lock(&subscriptions->lock); /* Pairs with the WRITE_ONCE in mmu_interval_set_seq() */ seq = READ_ONCE(interval_sub->invalidate_seq); is_invalidating = seq == subscriptions->invalidate_seq; spin_unlock(&subscriptions->lock); /* * interval_sub->invalidate_seq must always be set to an odd value via * mmu_interval_set_seq() using the provided cur_seq from * mn_itree_inv_start_range(). This ensures that if seq does wrap we * will always clear the below sleep in some reasonable time as * subscriptions->invalidate_seq is even in the idle state. */ lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); lock_map_release(&__mmu_notifier_invalidate_range_start_map); if (is_invalidating) wait_event(subscriptions->wq, READ_ONCE(subscriptions->invalidate_seq) != seq); /* * Notice that mmu_interval_read_retry() can already be true at this * point, avoiding loops here allows the caller to provide a global * time bound. */ return seq; } EXPORT_SYMBOL_GPL(mmu_interval_read_begin); static void mn_itree_release(struct mmu_notifier_subscriptions *subscriptions, struct mm_struct *mm) { struct mmu_notifier_range range = { .flags = MMU_NOTIFIER_RANGE_BLOCKABLE, .event = MMU_NOTIFY_RELEASE, .mm = mm, .start = 0, .end = ULONG_MAX, }; struct mmu_interval_notifier *interval_sub; unsigned long cur_seq; bool ret; for (interval_sub = mn_itree_inv_start_range(subscriptions, &range, &cur_seq); interval_sub; interval_sub = mn_itree_inv_next(interval_sub, &range)) { ret = interval_sub->ops->invalidate(interval_sub, &range, cur_seq); WARN_ON(!ret); } mn_itree_inv_end(subscriptions); } /* * This function can't run concurrently against mmu_notifier_register * because mm->mm_users > 0 during mmu_notifier_register and exit_mmap * runs with mm_users == 0. Other tasks may still invoke mmu notifiers * in parallel despite there being no task using this mm any more, * through the vmas outside of the exit_mmap context, such as with * vmtruncate. This serializes against mmu_notifier_unregister with * the notifier_subscriptions->lock in addition to SRCU and it serializes * against the other mmu notifiers with SRCU. struct mmu_notifier_subscriptions * can't go away from under us as exit_mmap holds an mm_count pin * itself. */ static void mn_hlist_release(struct mmu_notifier_subscriptions *subscriptions, struct mm_struct *mm) { struct mmu_notifier *subscription; int id; /* * SRCU here will block mmu_notifier_unregister until * ->release returns. */ id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) /* * If ->release runs before mmu_notifier_unregister it must be * handled, as it's the only way for the driver to flush all * existing sptes and stop the driver from establishing any more * sptes before all the pages in the mm are freed. */ if (subscription->ops->release) subscription->ops->release(subscription, mm); spin_lock(&subscriptions->lock); while (unlikely(!hlist_empty(&subscriptions->list))) { subscription = hlist_entry(subscriptions->list.first, struct mmu_notifier, hlist); /* * We arrived before mmu_notifier_unregister so * mmu_notifier_unregister will do nothing other than to wait * for ->release to finish and for mmu_notifier_unregister to * return. */ hlist_del_init_rcu(&subscription->hlist); } spin_unlock(&subscriptions->lock); srcu_read_unlock(&srcu, id); /* * synchronize_srcu here prevents mmu_notifier_release from returning to * exit_mmap (which would proceed with freeing all pages in the mm) * until the ->release method returns, if it was invoked by * mmu_notifier_unregister. * * The notifier_subscriptions can't go away from under us because * one mm_count is held by exit_mmap. */ synchronize_srcu(&srcu); } void __mmu_notifier_release(struct mm_struct *mm) { struct mmu_notifier_subscriptions *subscriptions = mm->notifier_subscriptions; if (subscriptions->has_itree) mn_itree_release(subscriptions, mm); if (!hlist_empty(&subscriptions->list)) mn_hlist_release(subscriptions, mm); } /* * If no young bitflag is supported by the hardware, ->clear_flush_young can * unmap the address and return 1 or 0 depending if the mapping previously * existed or not. */ int __mmu_notifier_clear_flush_young(struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_notifier *subscription; int young = 0, id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (subscription->ops->clear_flush_young) young |= subscription->ops->clear_flush_young( subscription, mm, start, end); } srcu_read_unlock(&srcu, id); return young; } int __mmu_notifier_clear_young(struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_notifier *subscription; int young = 0, id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (subscription->ops->clear_young) young |= subscription->ops->clear_young(subscription, mm, start, end); } srcu_read_unlock(&srcu, id); return young; } int __mmu_notifier_test_young(struct mm_struct *mm, unsigned long address) { struct mmu_notifier *subscription; int young = 0, id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (subscription->ops->test_young) { young = subscription->ops->test_young(subscription, mm, address); if (young) break; } } srcu_read_unlock(&srcu, id); return young; } static int mn_itree_invalidate(struct mmu_notifier_subscriptions *subscriptions, const struct mmu_notifier_range *range) { struct mmu_interval_notifier *interval_sub; unsigned long cur_seq; for (interval_sub = mn_itree_inv_start_range(subscriptions, range, &cur_seq); interval_sub; interval_sub = mn_itree_inv_next(interval_sub, range)) { bool ret; ret = interval_sub->ops->invalidate(interval_sub, range, cur_seq); if (!ret) { if (WARN_ON(mmu_notifier_range_blockable(range))) continue; goto out_would_block; } } return 0; out_would_block: /* * On -EAGAIN the non-blocking caller is not allowed to call * invalidate_range_end() */ mn_itree_inv_end(subscriptions); return -EAGAIN; } static int mn_hlist_invalidate_range_start( struct mmu_notifier_subscriptions *subscriptions, struct mmu_notifier_range *range) { struct mmu_notifier *subscription; int ret = 0; int id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { const struct mmu_notifier_ops *ops = subscription->ops; if (ops->invalidate_range_start) { int _ret; if (!mmu_notifier_range_blockable(range)) non_block_start(); _ret = ops->invalidate_range_start(subscription, range); if (!mmu_notifier_range_blockable(range)) non_block_end(); if (_ret) { pr_info("%pS callback failed with %d in %sblockable context.\n", ops->invalidate_range_start, _ret, !mmu_notifier_range_blockable(range) ? "non-" : ""); WARN_ON(mmu_notifier_range_blockable(range) || _ret != -EAGAIN); /* * We call all the notifiers on any EAGAIN, * there is no way for a notifier to know if * its start method failed, thus a start that * does EAGAIN can't also do end. */ WARN_ON(ops->invalidate_range_end); ret = _ret; } } } if (ret) { /* * Must be non-blocking to get here. If there are multiple * notifiers and one or more failed start, any that succeeded * start are expecting their end to be called. Do so now. */ hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (!subscription->ops->invalidate_range_end) continue; subscription->ops->invalidate_range_end(subscription, range); } } srcu_read_unlock(&srcu, id); return ret; } int __mmu_notifier_invalidate_range_start(struct mmu_notifier_range *range) { struct mmu_notifier_subscriptions *subscriptions = range->mm->notifier_subscriptions; int ret; if (subscriptions->has_itree) { ret = mn_itree_invalidate(subscriptions, range); if (ret) return ret; } if (!hlist_empty(&subscriptions->list)) return mn_hlist_invalidate_range_start(subscriptions, range); return 0; } static void mn_hlist_invalidate_end(struct mmu_notifier_subscriptions *subscriptions, struct mmu_notifier_range *range) { struct mmu_notifier *subscription; int id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (subscription->ops->invalidate_range_end) { if (!mmu_notifier_range_blockable(range)) non_block_start(); subscription->ops->invalidate_range_end(subscription, range); if (!mmu_notifier_range_blockable(range)) non_block_end(); } } srcu_read_unlock(&srcu, id); } void __mmu_notifier_invalidate_range_end(struct mmu_notifier_range *range) { struct mmu_notifier_subscriptions *subscriptions = range->mm->notifier_subscriptions; lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); if (subscriptions->has_itree) mn_itree_inv_end(subscriptions); if (!hlist_empty(&subscriptions->list)) mn_hlist_invalidate_end(subscriptions, range); lock_map_release(&__mmu_notifier_invalidate_range_start_map); } void __mmu_notifier_arch_invalidate_secondary_tlbs(struct mm_struct *mm, unsigned long start, unsigned long end) { struct mmu_notifier *subscription; int id; id = srcu_read_lock(&srcu); hlist_for_each_entry_rcu(subscription, &mm->notifier_subscriptions->list, hlist, srcu_read_lock_held(&srcu)) { if (subscription->ops->arch_invalidate_secondary_tlbs) subscription->ops->arch_invalidate_secondary_tlbs( subscription, mm, start, end); } srcu_read_unlock(&srcu, id); } /* * Same as mmu_notifier_register but here the caller must hold the mmap_lock in * write mode. A NULL mn signals the notifier is being registered for itree * mode. */ int __mmu_notifier_register(struct mmu_notifier *subscription, struct mm_struct *mm) { struct mmu_notifier_subscriptions *subscriptions = NULL; int ret; mmap_assert_write_locked(mm); BUG_ON(atomic_read(&mm->mm_users) <= 0); /* * Subsystems should only register for invalidate_secondary_tlbs() or * invalidate_range_start()/end() callbacks, not both. */ if (WARN_ON_ONCE(subscription && (subscription->ops->arch_invalidate_secondary_tlbs && (subscription->ops->invalidate_range_start || subscription->ops->invalidate_range_end)))) return -EINVAL; if (!mm->notifier_subscriptions) { /* * kmalloc cannot be called under mm_take_all_locks(), but we * know that mm->notifier_subscriptions can't change while we * hold the write side of the mmap_lock. */ subscriptions = kzalloc( sizeof(struct mmu_notifier_subscriptions), GFP_KERNEL); if (!subscriptions) return -ENOMEM; INIT_HLIST_HEAD(&subscriptions->list); spin_lock_init(&subscriptions->lock); subscriptions->invalidate_seq = 2; subscriptions->itree = RB_ROOT_CACHED; init_waitqueue_head(&subscriptions->wq); INIT_HLIST_HEAD(&subscriptions->deferred_list); } ret = mm_take_all_locks(mm); if (unlikely(ret)) goto out_clean; /* * Serialize the update against mmu_notifier_unregister. A * side note: mmu_notifier_release can't run concurrently with * us because we hold the mm_users pin (either implicitly as * current->mm or explicitly with get_task_mm() or similar). * We can't race against any other mmu notifier method either * thanks to mm_take_all_locks(). * * release semantics on the initialization of the * mmu_notifier_subscriptions's contents are provided for unlocked * readers. acquire can only be used while holding the mmgrab or * mmget, and is safe because once created the * mmu_notifier_subscriptions is not freed until the mm is destroyed. * As above, users holding the mmap_lock or one of the * mm_take_all_locks() do not need to use acquire semantics. */ if (subscriptions) smp_store_release(&mm->notifier_subscriptions, subscriptions); if (subscription) { /* Pairs with the mmdrop in mmu_notifier_unregister_* */ mmgrab(mm); subscription->mm = mm; subscription->users = 1; spin_lock(&mm->notifier_subscriptions->lock); hlist_add_head_rcu(&subscription->hlist, &mm->notifier_subscriptions->list); spin_unlock(&mm->notifier_subscriptions->lock); } else mm->notifier_subscriptions->has_itree = true; mm_drop_all_locks(mm); BUG_ON(atomic_read(&mm->mm_users) <= 0); return 0; out_clean: kfree(subscriptions); return ret; } EXPORT_SYMBOL_GPL(__mmu_notifier_register); /** * mmu_notifier_register - Register a notifier on a mm * @subscription: The notifier to attach * @mm: The mm to attach the notifier to * * Must not hold mmap_lock nor any other VM related lock when calling * this registration function. Must also ensure mm_users can't go down * to zero while this runs to avoid races with mmu_notifier_release, * so mm has to be current->mm or the mm should be pinned safely such * as with get_task_mm(). If the mm is not current->mm, the mm_users * pin should be released by calling mmput after mmu_notifier_register * returns. * * mmu_notifier_unregister() or mmu_notifier_put() must be always called to * unregister the notifier. * * While the caller has a mmu_notifier get the subscription->mm pointer will remain * valid, and can be converted to an active mm pointer via mmget_not_zero(). */ int mmu_notifier_register(struct mmu_notifier *subscription, struct mm_struct *mm) { int ret; mmap_write_lock(mm); ret = __mmu_notifier_register(subscription, mm); mmap_write_unlock(mm); return ret; } EXPORT_SYMBOL_GPL(mmu_notifier_register); static struct mmu_notifier * find_get_mmu_notifier(struct mm_struct *mm, const struct mmu_notifier_ops *ops) { struct mmu_notifier *subscription; spin_lock(&mm->notifier_subscriptions->lock); hlist_for_each_entry_rcu(subscription, &mm->notifier_subscriptions->list, hlist, lockdep_is_held(&mm->notifier_subscriptions->lock)) { if (subscription->ops != ops) continue; if (likely(subscription->users != UINT_MAX)) subscription->users++; else subscription = ERR_PTR(-EOVERFLOW); spin_unlock(&mm->notifier_subscriptions->lock); return subscription; } spin_unlock(&mm->notifier_subscriptions->lock); return NULL; } /** * mmu_notifier_get_locked - Return the single struct mmu_notifier for * the mm & ops * @ops: The operations struct being subscribe with * @mm : The mm to attach notifiers too * * This function either allocates a new mmu_notifier via * ops->alloc_notifier(), or returns an already existing notifier on the * list. The value of the ops pointer is used to determine when two notifiers * are the same. * * Each call to mmu_notifier_get() must be paired with a call to * mmu_notifier_put(). The caller must hold the write side of mm->mmap_lock. * * While the caller has a mmu_notifier get the mm pointer will remain valid, * and can be converted to an active mm pointer via mmget_not_zero(). */ struct mmu_notifier *mmu_notifier_get_locked(const struct mmu_notifier_ops *ops, struct mm_struct *mm) { struct mmu_notifier *subscription; int ret; mmap_assert_write_locked(mm); if (mm->notifier_subscriptions) { subscription = find_get_mmu_notifier(mm, ops); if (subscription) return subscription; } subscription = ops->alloc_notifier(mm); if (IS_ERR(subscription)) return subscription; subscription->ops = ops; ret = __mmu_notifier_register(subscription, mm); if (ret) goto out_free; return subscription; out_free: subscription->ops->free_notifier(subscription); return ERR_PTR(ret); } EXPORT_SYMBOL_GPL(mmu_notifier_get_locked); /* this is called after the last mmu_notifier_unregister() returned */ void __mmu_notifier_subscriptions_destroy(struct mm_struct *mm) { BUG_ON(!hlist_empty(&mm->notifier_subscriptions->list)); kfree(mm->notifier_subscriptions); mm->notifier_subscriptions = LIST_POISON1; /* debug */ } /* * This releases the mm_count pin automatically and frees the mm * structure if it was the last user of it. It serializes against * running mmu notifiers with SRCU and against mmu_notifier_unregister * with the unregister lock + SRCU. All sptes must be dropped before * calling mmu_notifier_unregister. ->release or any other notifier * method may be invoked concurrently with mmu_notifier_unregister, * and only after mmu_notifier_unregister returned we're guaranteed * that ->release or any other method can't run anymore. */ void mmu_notifier_unregister(struct mmu_notifier *subscription, struct mm_struct *mm) { BUG_ON(atomic_read(&mm->mm_count) <= 0); if (!hlist_unhashed(&subscription->hlist)) { /* * SRCU here will force exit_mmap to wait for ->release to * finish before freeing the pages. */ int id; id = srcu_read_lock(&srcu); /* * exit_mmap will block in mmu_notifier_release to guarantee * that ->release is called before freeing the pages. */ if (subscription->ops->release) subscription->ops->release(subscription, mm); srcu_read_unlock(&srcu, id); spin_lock(&mm->notifier_subscriptions->lock); /* * Can not use list_del_rcu() since __mmu_notifier_release * can delete it before we hold the lock. */ hlist_del_init_rcu(&subscription->hlist); spin_unlock(&mm->notifier_subscriptions->lock); } /* * Wait for any running method to finish, of course including * ->release if it was run by mmu_notifier_release instead of us. */ synchronize_srcu(&srcu); BUG_ON(atomic_read(&mm->mm_count) <= 0); mmdrop(mm); } EXPORT_SYMBOL_GPL(mmu_notifier_unregister); static void mmu_notifier_free_rcu(struct rcu_head *rcu) { struct mmu_notifier *subscription = container_of(rcu, struct mmu_notifier, rcu); struct mm_struct *mm = subscription->mm; subscription->ops->free_notifier(subscription); /* Pairs with the get in __mmu_notifier_register() */ mmdrop(mm); } /** * mmu_notifier_put - Release the reference on the notifier * @subscription: The notifier to act on * * This function must be paired with each mmu_notifier_get(), it releases the * reference obtained by the get. If this is the last reference then process * to free the notifier will be run asynchronously. * * Unlike mmu_notifier_unregister() the get/put flow only calls ops->release * when the mm_struct is destroyed. Instead free_notifier is always called to * release any resources held by the user. * * As ops->release is not guaranteed to be called, the user must ensure that * all sptes are dropped, and no new sptes can be established before * mmu_notifier_put() is called. * * This function can be called from the ops->release callback, however the * caller must still ensure it is called pairwise with mmu_notifier_get(). * * Modules calling this function must call mmu_notifier_synchronize() in * their __exit functions to ensure the async work is completed. */ void mmu_notifier_put(struct mmu_notifier *subscription) { struct mm_struct *mm = subscription->mm; spin_lock(&mm->notifier_subscriptions->lock); if (WARN_ON(!subscription->users) || --subscription->users) goto out_unlock; hlist_del_init_rcu(&subscription->hlist); spin_unlock(&mm->notifier_subscriptions->lock); call_srcu(&srcu, &subscription->rcu, mmu_notifier_free_rcu); return; out_unlock: spin_unlock(&mm->notifier_subscriptions->lock); } EXPORT_SYMBOL_GPL(mmu_notifier_put); static int __mmu_interval_notifier_insert( struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, struct mmu_notifier_subscriptions *subscriptions, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops) { interval_sub->mm = mm; interval_sub->ops = ops; RB_CLEAR_NODE(&interval_sub->interval_tree.rb); interval_sub->interval_tree.start = start; /* * Note that the representation of the intervals in the interval tree * considers the ending point as contained in the interval. */ if (length == 0 || check_add_overflow(start, length - 1, &interval_sub->interval_tree.last)) return -EOVERFLOW; /* Must call with a mmget() held */ if (WARN_ON(atomic_read(&mm->mm_users) <= 0)) return -EINVAL; /* pairs with mmdrop in mmu_interval_notifier_remove() */ mmgrab(mm); /* * If some invalidate_range_start/end region is going on in parallel * we don't know what VA ranges are affected, so we must assume this * new range is included. * * If the itree is invalidating then we are not allowed to change * it. Retrying until invalidation is done is tricky due to the * possibility for live lock, instead defer the add to * mn_itree_inv_end() so this algorithm is deterministic. * * In all cases the value for the interval_sub->invalidate_seq should be * odd, see mmu_interval_read_begin() */ spin_lock(&subscriptions->lock); if (subscriptions->active_invalidate_ranges) { if (mn_itree_is_invalidating(subscriptions)) hlist_add_head(&interval_sub->deferred_item, &subscriptions->deferred_list); else { subscriptions->invalidate_seq |= 1; interval_tree_insert(&interval_sub->interval_tree, &subscriptions->itree); } interval_sub->invalidate_seq = subscriptions->invalidate_seq; } else { WARN_ON(mn_itree_is_invalidating(subscriptions)); /* * The starting seq for a subscription not under invalidation * should be odd, not equal to the current invalidate_seq and * invalidate_seq should not 'wrap' to the new seq any time * soon. */ interval_sub->invalidate_seq = subscriptions->invalidate_seq - 1; interval_tree_insert(&interval_sub->interval_tree, &subscriptions->itree); } spin_unlock(&subscriptions->lock); return 0; } /** * mmu_interval_notifier_insert - Insert an interval notifier * @interval_sub: Interval subscription to register * @start: Starting virtual address to monitor * @length: Length of the range to monitor * @mm: mm_struct to attach to * @ops: Interval notifier operations to be called on matching events * * This function subscribes the interval notifier for notifications from the * mm. Upon return the ops related to mmu_interval_notifier will be called * whenever an event that intersects with the given range occurs. * * Upon return the range_notifier may not be present in the interval tree yet. * The caller must use the normal interval notifier read flow via * mmu_interval_read_begin() to establish SPTEs for this range. */ int mmu_interval_notifier_insert(struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops) { struct mmu_notifier_subscriptions *subscriptions; int ret; might_lock(&mm->mmap_lock); subscriptions = smp_load_acquire(&mm->notifier_subscriptions); if (!subscriptions || !subscriptions->has_itree) { ret = mmu_notifier_register(NULL, mm); if (ret) return ret; subscriptions = mm->notifier_subscriptions; } return __mmu_interval_notifier_insert(interval_sub, mm, subscriptions, start, length, ops); } EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert); int mmu_interval_notifier_insert_locked( struct mmu_interval_notifier *interval_sub, struct mm_struct *mm, unsigned long start, unsigned long length, const struct mmu_interval_notifier_ops *ops) { struct mmu_notifier_subscriptions *subscriptions = mm->notifier_subscriptions; int ret; mmap_assert_write_locked(mm); if (!subscriptions || !subscriptions->has_itree) { ret = __mmu_notifier_register(NULL, mm); if (ret) return ret; subscriptions = mm->notifier_subscriptions; } return __mmu_interval_notifier_insert(interval_sub, mm, subscriptions, start, length, ops); } EXPORT_SYMBOL_GPL(mmu_interval_notifier_insert_locked); static bool mmu_interval_seq_released(struct mmu_notifier_subscriptions *subscriptions, unsigned long seq) { bool ret; spin_lock(&subscriptions->lock); ret = subscriptions->invalidate_seq != seq; spin_unlock(&subscriptions->lock); return ret; } /** * mmu_interval_notifier_remove - Remove a interval notifier * @interval_sub: Interval subscription to unregister * * This function must be paired with mmu_interval_notifier_insert(). It cannot * be called from any ops callback. * * Once this returns ops callbacks are no longer running on other CPUs and * will not be called in future. */ void mmu_interval_notifier_remove(struct mmu_interval_notifier *interval_sub) { struct mm_struct *mm = interval_sub->mm; struct mmu_notifier_subscriptions *subscriptions = mm->notifier_subscriptions; unsigned long seq = 0; might_sleep(); spin_lock(&subscriptions->lock); if (mn_itree_is_invalidating(subscriptions)) { /* * remove is being called after insert put this on the * deferred list, but before the deferred list was processed. */ if (RB_EMPTY_NODE(&interval_sub->interval_tree.rb)) { hlist_del(&interval_sub->deferred_item); } else { hlist_add_head(&interval_sub->deferred_item, &subscriptions->deferred_list); seq = subscriptions->invalidate_seq; } } else { WARN_ON(RB_EMPTY_NODE(&interval_sub->interval_tree.rb)); interval_tree_remove(&interval_sub->interval_tree, &subscriptions->itree); } spin_unlock(&subscriptions->lock); /* * The possible sleep on progress in the invalidation requires the * caller not hold any locks held by invalidation callbacks. */ lock_map_acquire(&__mmu_notifier_invalidate_range_start_map); lock_map_release(&__mmu_notifier_invalidate_range_start_map); if (seq) wait_event(subscriptions->wq, mmu_interval_seq_released(subscriptions, seq)); /* pairs with mmgrab in mmu_interval_notifier_insert() */ mmdrop(mm); } EXPORT_SYMBOL_GPL(mmu_interval_notifier_remove); /** * mmu_notifier_synchronize - Ensure all mmu_notifiers are freed * * This function ensures that all outstanding async SRU work from * mmu_notifier_put() is completed. After it returns any mmu_notifier_ops * associated with an unused mmu_notifier will no longer be called. * * Before using the caller must ensure that all of its mmu_notifiers have been * fully released via mmu_notifier_put(). * * Modules using the mmu_notifier_put() API should call this in their __exit * function to avoid module unloading races. */ void mmu_notifier_synchronize(void) { synchronize_srcu(&srcu); } EXPORT_SYMBOL_GPL(mmu_notifier_synchronize);
7 1 1 5 1 1 1 1 4 4 12 1 1 1 6 3 7 2 1 7 7 7 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 // SPDX-License-Identifier: GPL-2.0-or-later /* Daemon interface * * Copyright (C) 2007, 2021 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/module.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/completion.h> #include <linux/slab.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/namei.h> #include <linux/poll.h> #include <linux/mount.h> #include <linux/statfs.h> #include <linux/ctype.h> #include <linux/string.h> #include <linux/fs_struct.h> #include "internal.h" static int cachefiles_daemon_open(struct inode *, struct file *); static int cachefiles_daemon_release(struct inode *, struct file *); static ssize_t cachefiles_daemon_read(struct file *, char __user *, size_t, loff_t *); static ssize_t cachefiles_daemon_write(struct file *, const char __user *, size_t, loff_t *); static __poll_t cachefiles_daemon_poll(struct file *, struct poll_table_struct *); static int cachefiles_daemon_frun(struct cachefiles_cache *, char *); static int cachefiles_daemon_fcull(struct cachefiles_cache *, char *); static int cachefiles_daemon_fstop(struct cachefiles_cache *, char *); static int cachefiles_daemon_brun(struct cachefiles_cache *, char *); static int cachefiles_daemon_bcull(struct cachefiles_cache *, char *); static int cachefiles_daemon_bstop(struct cachefiles_cache *, char *); static int cachefiles_daemon_cull(struct cachefiles_cache *, char *); static int cachefiles_daemon_debug(struct cachefiles_cache *, char *); static int cachefiles_daemon_dir(struct cachefiles_cache *, char *); static int cachefiles_daemon_inuse(struct cachefiles_cache *, char *); static int cachefiles_daemon_secctx(struct cachefiles_cache *, char *); static int cachefiles_daemon_tag(struct cachefiles_cache *, char *); static int cachefiles_daemon_bind(struct cachefiles_cache *, char *); static void cachefiles_daemon_unbind(struct cachefiles_cache *); static unsigned long cachefiles_open; const struct file_operations cachefiles_daemon_fops = { .owner = THIS_MODULE, .open = cachefiles_daemon_open, .release = cachefiles_daemon_release, .read = cachefiles_daemon_read, .write = cachefiles_daemon_write, .poll = cachefiles_daemon_poll, .llseek = noop_llseek, }; struct cachefiles_daemon_cmd { char name[8]; int (*handler)(struct cachefiles_cache *cache, char *args); }; static const struct cachefiles_daemon_cmd cachefiles_daemon_cmds[] = { { "bind", cachefiles_daemon_bind }, { "brun", cachefiles_daemon_brun }, { "bcull", cachefiles_daemon_bcull }, { "bstop", cachefiles_daemon_bstop }, { "cull", cachefiles_daemon_cull }, { "debug", cachefiles_daemon_debug }, { "dir", cachefiles_daemon_dir }, { "frun", cachefiles_daemon_frun }, { "fcull", cachefiles_daemon_fcull }, { "fstop", cachefiles_daemon_fstop }, { "inuse", cachefiles_daemon_inuse }, { "secctx", cachefiles_daemon_secctx }, { "tag", cachefiles_daemon_tag }, #ifdef CONFIG_CACHEFILES_ONDEMAND { "copen", cachefiles_ondemand_copen }, { "restore", cachefiles_ondemand_restore }, #endif { "", NULL } }; /* * Prepare a cache for caching. */ static int cachefiles_daemon_open(struct inode *inode, struct file *file) { struct cachefiles_cache *cache; _enter(""); /* only the superuser may do this */ if (!capable(CAP_SYS_ADMIN)) return -EPERM; /* the cachefiles device may only be open once at a time */ if (xchg(&cachefiles_open, 1) == 1) return -EBUSY; /* allocate a cache record */ cache = kzalloc(sizeof(struct cachefiles_cache), GFP_KERNEL); if (!cache) { cachefiles_open = 0; return -ENOMEM; } mutex_init(&cache->daemon_mutex); init_waitqueue_head(&cache->daemon_pollwq); INIT_LIST_HEAD(&cache->volumes); INIT_LIST_HEAD(&cache->object_list); spin_lock_init(&cache->object_list_lock); refcount_set(&cache->unbind_pincount, 1); xa_init_flags(&cache->reqs, XA_FLAGS_ALLOC); xa_init_flags(&cache->ondemand_ids, XA_FLAGS_ALLOC1); /* set default caching limits * - limit at 1% free space and/or free files * - cull below 5% free space and/or free files * - cease culling above 7% free space and/or free files */ cache->frun_percent = 7; cache->fcull_percent = 5; cache->fstop_percent = 1; cache->brun_percent = 7; cache->bcull_percent = 5; cache->bstop_percent = 1; file->private_data = cache; cache->cachefilesd = file; return 0; } void cachefiles_flush_reqs(struct cachefiles_cache *cache) { struct xarray *xa = &cache->reqs; struct cachefiles_req *req; unsigned long index; /* * Make sure the following two operations won't be reordered. * 1) set CACHEFILES_DEAD bit * 2) flush requests in the xarray * Otherwise the request may be enqueued after xarray has been * flushed, leaving the orphan request never being completed. * * CPU 1 CPU 2 * ===== ===== * flush requests in the xarray * test CACHEFILES_DEAD bit * enqueue the request * set CACHEFILES_DEAD bit */ smp_mb(); xa_lock(xa); xa_for_each(xa, index, req) { req->error = -EIO; complete(&req->done); __xa_erase(xa, index); } xa_unlock(xa); xa_destroy(&cache->reqs); xa_destroy(&cache->ondemand_ids); } void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache) { if (refcount_dec_and_test(&cache->unbind_pincount)) { cachefiles_daemon_unbind(cache); cachefiles_open = 0; kfree(cache); } } void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache) { refcount_inc(&cache->unbind_pincount); } /* * Release a cache. */ static int cachefiles_daemon_release(struct inode *inode, struct file *file) { struct cachefiles_cache *cache = file->private_data; _enter(""); ASSERT(cache); set_bit(CACHEFILES_DEAD, &cache->flags); if (cachefiles_in_ondemand_mode(cache)) cachefiles_flush_reqs(cache); /* clean up the control file interface */ cache->cachefilesd = NULL; file->private_data = NULL; cachefiles_put_unbind_pincount(cache); _leave(""); return 0; } static ssize_t cachefiles_do_daemon_read(struct cachefiles_cache *cache, char __user *_buffer, size_t buflen) { unsigned long long b_released; unsigned f_released; char buffer[256]; int n; /* check how much space the cache has */ cachefiles_has_space(cache, 0, 0, cachefiles_has_space_check); /* summarise */ f_released = atomic_xchg(&cache->f_released, 0); b_released = atomic_long_xchg(&cache->b_released, 0); clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags); n = snprintf(buffer, sizeof(buffer), "cull=%c" " frun=%llx" " fcull=%llx" " fstop=%llx" " brun=%llx" " bcull=%llx" " bstop=%llx" " freleased=%x" " breleased=%llx", test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0', (unsigned long long) cache->frun, (unsigned long long) cache->fcull, (unsigned long long) cache->fstop, (unsigned long long) cache->brun, (unsigned long long) cache->bcull, (unsigned long long) cache->bstop, f_released, b_released); if (n > buflen) return -EMSGSIZE; if (copy_to_user(_buffer, buffer, n) != 0) return -EFAULT; return n; } /* * Read the cache state. */ static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer, size_t buflen, loff_t *pos) { struct cachefiles_cache *cache = file->private_data; //_enter(",,%zu,", buflen); if (!test_bit(CACHEFILES_READY, &cache->flags)) return 0; if (cachefiles_in_ondemand_mode(cache)) return cachefiles_ondemand_daemon_read(cache, _buffer, buflen); else return cachefiles_do_daemon_read(cache, _buffer, buflen); } /* * Take a command from cachefilesd, parse it and act on it. */ static ssize_t cachefiles_daemon_write(struct file *file, const char __user *_data, size_t datalen, loff_t *pos) { const struct cachefiles_daemon_cmd *cmd; struct cachefiles_cache *cache = file->private_data; ssize_t ret; char *data, *args, *cp; //_enter(",,%zu,", datalen); ASSERT(cache); if (test_bit(CACHEFILES_DEAD, &cache->flags)) return -EIO; if (datalen > PAGE_SIZE - 1) return -EOPNOTSUPP; /* drag the command string into the kernel so we can parse it */ data = memdup_user_nul(_data, datalen); if (IS_ERR(data)) return PTR_ERR(data); ret = -EINVAL; if (memchr(data, '\0', datalen)) goto error; /* strip any newline */ cp = memchr(data, '\n', datalen); if (cp) { if (cp == data) goto error; *cp = '\0'; } /* parse the command */ ret = -EOPNOTSUPP; for (args = data; *args; args++) if (isspace(*args)) break; if (*args) { if (args == data) goto error; *args = '\0'; args = skip_spaces(++args); } /* run the appropriate command handler */ for (cmd = cachefiles_daemon_cmds; cmd->name[0]; cmd++) if (strcmp(cmd->name, data) == 0) goto found_command; error: kfree(data); //_leave(" = %zd", ret); return ret; found_command: mutex_lock(&cache->daemon_mutex); ret = -EIO; if (!test_bit(CACHEFILES_DEAD, &cache->flags)) ret = cmd->handler(cache, args); mutex_unlock(&cache->daemon_mutex); if (ret == 0) ret = datalen; goto error; } /* * Poll for culling state * - use EPOLLOUT to indicate culling state */ static __poll_t cachefiles_daemon_poll(struct file *file, struct poll_table_struct *poll) { struct cachefiles_cache *cache = file->private_data; XA_STATE(xas, &cache->reqs, 0); struct cachefiles_req *req; __poll_t mask; poll_wait(file, &cache->daemon_pollwq, poll); mask = 0; if (cachefiles_in_ondemand_mode(cache)) { if (!xa_empty(&cache->reqs)) { xas_lock(&xas); xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { if (!cachefiles_ondemand_is_reopening_read(req)) { mask |= EPOLLIN; break; } } xas_unlock(&xas); } } else { if (test_bit(CACHEFILES_STATE_CHANGED, &cache->flags)) mask |= EPOLLIN; } if (test_bit(CACHEFILES_CULLING, &cache->flags)) mask |= EPOLLOUT; return mask; } /* * Give a range error for cache space constraints * - can be tail-called */ static int cachefiles_daemon_range_error(struct cachefiles_cache *cache, char *args) { pr_err("Free space limits must be in range 0%%<=stop<cull<run<100%%\n"); return -EINVAL; } /* * Set the percentage of files at which to stop culling * - command: "frun <N>%" */ static int cachefiles_daemon_frun(struct cachefiles_cache *cache, char *args) { unsigned long frun; _enter(",%s", args); if (!*args) return -EINVAL; frun = simple_strtoul(args, &args, 10); if (args[0] != '%' || args[1] != '\0') return -EINVAL; if (frun <= cache->fcull_percent || frun >= 100) return cachefiles_daemon_range_error(cache, args); cache->frun_percent = frun; return 0; } /* * Set the percentage of files at which to start culling * - command: "fcull <N>%" */ static int cachefiles_daemon_fcull(struct cachefiles_cache *cache, char *args) { unsigned long fcull; _enter(",%s", args); if (!*args) return -EINVAL; fcull = simple_strtoul(args, &args, 10); if (args[0] != '%' || args[1] != '\0') return -EINVAL; if (fcull <= cache->fstop_percent || fcull >= cache->frun_percent) return cachefiles_daemon_range_error(cache, args); cache->fcull_percent = fcull; return 0; } /* * Set the percentage of files at which to stop allocating * - command: "fstop <N>%" */ static int cachefiles_daemon_fstop(struct cachefiles_cache *cache, char *args) { unsigned long fstop; _enter(",%s", args); if (!*args) return -EINVAL; fstop = simple_strtoul(args, &args, 10); if (args[0] != '%' || args[1] != '\0') return -EINVAL; if (fstop >= cache->fcull_percent) return cachefiles_daemon_range_error(cache, args); cache->fstop_percent = fstop; return 0; } /* * Set the percentage of blocks at which to stop culling * - command: "brun <N>%" */ static int cachefiles_daemon_brun(struct cachefiles_cache *cache, char *args) { unsigned long brun; _enter(",%s", args); if (!*args) return -EINVAL; brun = simple_strtoul(args, &args, 10); if (args[0] != '%' || args[1] != '\0') return -EINVAL; if (brun <= cache->bcull_percent || brun >= 100) return cachefiles_daemon_range_error(cache, args); cache->brun_percent = brun; return 0; } /* * Set the percentage of blocks at which to start culling * - command: "bcull <N>%" */ static int cachefiles_daemon_bcull(struct cachefiles_cache *cache, char *args) { unsigned long bcull; _enter(",%s", args); if (!*args) return -EINVAL; bcull = simple_strtoul(args, &args, 10); if (args[0] != '%' || args[1] != '\0') return -EINVAL; if (bcull <= cache->bstop_percent || bcull >= cache->brun_percent) return cachefiles_daemon_range_error(cache, args); cache->bcull_percent = bcull; return 0; } /* * Set the percentage of blocks at which to stop allocating * - command: "bstop <N>%" */ static int cachefiles_daemon_bstop(struct cachefiles_cache *cache, char *args) { unsigned long bstop; _enter(",%s", args); if (!*args) return -EINVAL; bstop = simple_strtoul(args, &args, 10); if (args[0] != '%' || args[1] != '\0') return -EINVAL; if (bstop >= cache->bcull_percent) return cachefiles_daemon_range_error(cache, args); cache->bstop_percent = bstop; return 0; } /* * Set the cache directory * - command: "dir <name>" */ static int cachefiles_daemon_dir(struct cachefiles_cache *cache, char *args) { char *dir; _enter(",%s", args); if (!*args) { pr_err("Empty directory specified\n"); return -EINVAL; } if (cache->rootdirname) { pr_err("Second cache directory specified\n"); return -EEXIST; } dir = kstrdup(args, GFP_KERNEL); if (!dir) return -ENOMEM; cache->rootdirname = dir; return 0; } /* * Set the cache security context * - command: "secctx <ctx>" */ static int cachefiles_daemon_secctx(struct cachefiles_cache *cache, char *args) { char *secctx; _enter(",%s", args); if (!*args) { pr_err("Empty security context specified\n"); return -EINVAL; } if (cache->secctx) { pr_err("Second security context specified\n"); return -EINVAL; } secctx = kstrdup(args, GFP_KERNEL); if (!secctx) return -ENOMEM; cache->secctx = secctx; return 0; } /* * Set the cache tag * - command: "tag <name>" */ static int cachefiles_daemon_tag(struct cachefiles_cache *cache, char *args) { char *tag; _enter(",%s", args); if (!*args) { pr_err("Empty tag specified\n"); return -EINVAL; } if (cache->tag) return -EEXIST; tag = kstrdup(args, GFP_KERNEL); if (!tag) return -ENOMEM; cache->tag = tag; return 0; } /* * Request a node in the cache be culled from the current working directory * - command: "cull <name>" */ static int cachefiles_daemon_cull(struct cachefiles_cache *cache, char *args) { struct path path; const struct cred *saved_cred; int ret; _enter(",%s", args); if (strchr(args, '/')) goto inval; if (!test_bit(CACHEFILES_READY, &cache->flags)) { pr_err("cull applied to unready cache\n"); return -EIO; } if (test_bit(CACHEFILES_DEAD, &cache->flags)) { pr_err("cull applied to dead cache\n"); return -EIO; } get_fs_pwd(current->fs, &path); if (!d_can_lookup(path.dentry)) goto notdir; cachefiles_begin_secure(cache, &saved_cred); ret = cachefiles_cull(cache, path.dentry, args); cachefiles_end_secure(cache, saved_cred); path_put(&path); _leave(" = %d", ret); return ret; notdir: path_put(&path); pr_err("cull command requires dirfd to be a directory\n"); return -ENOTDIR; inval: pr_err("cull command requires dirfd and filename\n"); return -EINVAL; } /* * Set debugging mode * - command: "debug <mask>" */ static int cachefiles_daemon_debug(struct cachefiles_cache *cache, char *args) { unsigned long mask; _enter(",%s", args); mask = simple_strtoul(args, &args, 0); if (args[0] != '\0') goto inval; cachefiles_debug = mask; _leave(" = 0"); return 0; inval: pr_err("debug command requires mask\n"); return -EINVAL; } /* * Find out whether an object in the current working directory is in use or not * - command: "inuse <name>" */ static int cachefiles_daemon_inuse(struct cachefiles_cache *cache, char *args) { struct path path; const struct cred *saved_cred; int ret; //_enter(",%s", args); if (strchr(args, '/')) goto inval; if (!test_bit(CACHEFILES_READY, &cache->flags)) { pr_err("inuse applied to unready cache\n"); return -EIO; } if (test_bit(CACHEFILES_DEAD, &cache->flags)) { pr_err("inuse applied to dead cache\n"); return -EIO; } get_fs_pwd(current->fs, &path); if (!d_can_lookup(path.dentry)) goto notdir; cachefiles_begin_secure(cache, &saved_cred); ret = cachefiles_check_in_use(cache, path.dentry, args); cachefiles_end_secure(cache, saved_cred); path_put(&path); //_leave(" = %d", ret); return ret; notdir: path_put(&path); pr_err("inuse command requires dirfd to be a directory\n"); return -ENOTDIR; inval: pr_err("inuse command requires dirfd and filename\n"); return -EINVAL; } /* * Bind a directory as a cache */ static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args) { _enter("{%u,%u,%u,%u,%u,%u},%s", cache->frun_percent, cache->fcull_percent, cache->fstop_percent, cache->brun_percent, cache->bcull_percent, cache->bstop_percent, args); if (cache->fstop_percent >= cache->fcull_percent || cache->fcull_percent >= cache->frun_percent || cache->frun_percent >= 100) return -ERANGE; if (cache->bstop_percent >= cache->bcull_percent || cache->bcull_percent >= cache->brun_percent || cache->brun_percent >= 100) return -ERANGE; if (!cache->rootdirname) { pr_err("No cache directory specified\n"); return -EINVAL; } /* Don't permit already bound caches to be re-bound */ if (test_bit(CACHEFILES_READY, &cache->flags)) { pr_err("Cache already bound\n"); return -EBUSY; } if (IS_ENABLED(CONFIG_CACHEFILES_ONDEMAND)) { if (!strcmp(args, "ondemand")) { set_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags); } else if (*args) { pr_err("Invalid argument to the 'bind' command\n"); return -EINVAL; } } else if (*args) { pr_err("'bind' command doesn't take an argument\n"); return -EINVAL; } /* Make sure we have copies of the tag string */ if (!cache->tag) { /* * The tag string is released by the fops->release() * function, so we don't release it on error here */ cache->tag = kstrdup("CacheFiles", GFP_KERNEL); if (!cache->tag) return -ENOMEM; } return cachefiles_add_cache(cache); } /* * Unbind a cache. */ static void cachefiles_daemon_unbind(struct cachefiles_cache *cache) { _enter(""); if (test_bit(CACHEFILES_READY, &cache->flags)) cachefiles_withdraw_cache(cache); cachefiles_put_directory(cache->graveyard); cachefiles_put_directory(cache->store); mntput(cache->mnt); put_cred(cache->cache_cred); kfree(cache->rootdirname); kfree(cache->secctx); kfree(cache->tag); _leave(""); }
2 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 // SPDX-License-Identifier: GPL-2.0 /* * ESSIV skcipher and aead template for block encryption * * This template encapsulates the ESSIV IV generation algorithm used by * dm-crypt and fscrypt, which converts the initial vector for the skcipher * used for block encryption, by encrypting it using the hash of the * skcipher key as encryption key. Usually, the input IV is a 64-bit sector * number in LE representation zero-padded to the size of the IV, but this * is not assumed by this driver. * * The typical use of this template is to instantiate the skcipher * 'essiv(cbc(aes),sha256)', which is the only instantiation used by * fscrypt, and the most relevant one for dm-crypt. However, dm-crypt * also permits ESSIV to be used in combination with the authenc template, * e.g., 'essiv(authenc(hmac(sha256),cbc(aes)),sha256)', in which case * we need to instantiate an aead that accepts the same special key format * as the authenc template, and deals with the way the encrypted IV is * embedded into the AAD area of the aead request. This means the AEAD * flavor produced by this template is tightly coupled to the way dm-crypt * happens to use it. * * Copyright (c) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> * * Heavily based on: * adiantum length-preserving encryption mode * * Copyright 2018 Google LLC */ #include <crypto/authenc.h> #include <crypto/internal/aead.h> #include <crypto/internal/cipher.h> #include <crypto/internal/hash.h> #include <crypto/internal/skcipher.h> #include <crypto/scatterwalk.h> #include <linux/module.h> #include "internal.h" struct essiv_instance_ctx { union { struct crypto_skcipher_spawn skcipher_spawn; struct crypto_aead_spawn aead_spawn; } u; char essiv_cipher_name[CRYPTO_MAX_ALG_NAME]; char shash_driver_name[CRYPTO_MAX_ALG_NAME]; }; struct essiv_tfm_ctx { union { struct crypto_skcipher *skcipher; struct crypto_aead *aead; } u; struct crypto_cipher *essiv_cipher; struct crypto_shash *hash; int ivoffset; }; struct essiv_aead_request_ctx { struct scatterlist sg[4]; u8 *assoc; struct aead_request aead_req; }; static int essiv_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int keylen) { struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); u8 salt[HASH_MAX_DIGESTSIZE]; int err; crypto_skcipher_clear_flags(tctx->u.skcipher, CRYPTO_TFM_REQ_MASK); crypto_skcipher_set_flags(tctx->u.skcipher, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); err = crypto_skcipher_setkey(tctx->u.skcipher, key, keylen); if (err) return err; err = crypto_shash_tfm_digest(tctx->hash, key, keylen, salt); if (err) return err; crypto_cipher_clear_flags(tctx->essiv_cipher, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(tctx->essiv_cipher, crypto_skcipher_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); return crypto_cipher_setkey(tctx->essiv_cipher, salt, crypto_shash_digestsize(tctx->hash)); } static int essiv_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen) { struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm); SHASH_DESC_ON_STACK(desc, tctx->hash); struct crypto_authenc_keys keys; u8 salt[HASH_MAX_DIGESTSIZE]; int err; crypto_aead_clear_flags(tctx->u.aead, CRYPTO_TFM_REQ_MASK); crypto_aead_set_flags(tctx->u.aead, crypto_aead_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); err = crypto_aead_setkey(tctx->u.aead, key, keylen); if (err) return err; if (crypto_authenc_extractkeys(&keys, key, keylen) != 0) return -EINVAL; desc->tfm = tctx->hash; err = crypto_shash_init(desc) ?: crypto_shash_update(desc, keys.enckey, keys.enckeylen) ?: crypto_shash_finup(desc, keys.authkey, keys.authkeylen, salt); if (err) return err; crypto_cipher_clear_flags(tctx->essiv_cipher, CRYPTO_TFM_REQ_MASK); crypto_cipher_set_flags(tctx->essiv_cipher, crypto_aead_get_flags(tfm) & CRYPTO_TFM_REQ_MASK); return crypto_cipher_setkey(tctx->essiv_cipher, salt, crypto_shash_digestsize(tctx->hash)); } static int essiv_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize) { struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm); return crypto_aead_setauthsize(tctx->u.aead, authsize); } static void essiv_skcipher_done(void *data, int err) { struct skcipher_request *req = data; skcipher_request_complete(req, err); } static int essiv_skcipher_crypt(struct skcipher_request *req, bool enc) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); const struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); struct skcipher_request *subreq = skcipher_request_ctx(req); crypto_cipher_encrypt_one(tctx->essiv_cipher, req->iv, req->iv); skcipher_request_set_tfm(subreq, tctx->u.skcipher); skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, req->iv); skcipher_request_set_callback(subreq, skcipher_request_flags(req), essiv_skcipher_done, req); return enc ? crypto_skcipher_encrypt(subreq) : crypto_skcipher_decrypt(subreq); } static int essiv_skcipher_encrypt(struct skcipher_request *req) { return essiv_skcipher_crypt(req, true); } static int essiv_skcipher_decrypt(struct skcipher_request *req) { return essiv_skcipher_crypt(req, false); } static void essiv_aead_done(void *data, int err) { struct aead_request *req = data; struct essiv_aead_request_ctx *rctx = aead_request_ctx(req); if (err == -EINPROGRESS) goto out; kfree(rctx->assoc); out: aead_request_complete(req, err); } static int essiv_aead_crypt(struct aead_request *req, bool enc) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); const struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm); struct essiv_aead_request_ctx *rctx = aead_request_ctx(req); struct aead_request *subreq = &rctx->aead_req; struct scatterlist *src = req->src; int err; crypto_cipher_encrypt_one(tctx->essiv_cipher, req->iv, req->iv); /* * dm-crypt embeds the sector number and the IV in the AAD region, so * we have to copy the converted IV into the right scatterlist before * we pass it on. */ rctx->assoc = NULL; if (req->src == req->dst || !enc) { scatterwalk_map_and_copy(req->iv, req->dst, req->assoclen - crypto_aead_ivsize(tfm), crypto_aead_ivsize(tfm), 1); } else { u8 *iv = (u8 *)aead_request_ctx(req) + tctx->ivoffset; int ivsize = crypto_aead_ivsize(tfm); int ssize = req->assoclen - ivsize; struct scatterlist *sg; int nents; if (ssize < 0) return -EINVAL; nents = sg_nents_for_len(req->src, ssize); if (nents < 0) return -EINVAL; memcpy(iv, req->iv, ivsize); sg_init_table(rctx->sg, 4); if (unlikely(nents > 1)) { /* * This is a case that rarely occurs in practice, but * for correctness, we have to deal with it nonetheless. */ rctx->assoc = kmalloc(ssize, GFP_ATOMIC); if (!rctx->assoc) return -ENOMEM; scatterwalk_map_and_copy(rctx->assoc, req->src, 0, ssize, 0); sg_set_buf(rctx->sg, rctx->assoc, ssize); } else { sg_set_page(rctx->sg, sg_page(req->src), ssize, req->src->offset); } sg_set_buf(rctx->sg + 1, iv, ivsize); sg = scatterwalk_ffwd(rctx->sg + 2, req->src, req->assoclen); if (sg != rctx->sg + 2) sg_chain(rctx->sg, 3, sg); src = rctx->sg; } aead_request_set_tfm(subreq, tctx->u.aead); aead_request_set_ad(subreq, req->assoclen); aead_request_set_callback(subreq, aead_request_flags(req), essiv_aead_done, req); aead_request_set_crypt(subreq, src, req->dst, req->cryptlen, req->iv); err = enc ? crypto_aead_encrypt(subreq) : crypto_aead_decrypt(subreq); if (rctx->assoc && err != -EINPROGRESS && err != -EBUSY) kfree(rctx->assoc); return err; } static int essiv_aead_encrypt(struct aead_request *req) { return essiv_aead_crypt(req, true); } static int essiv_aead_decrypt(struct aead_request *req) { return essiv_aead_crypt(req, false); } static int essiv_init_tfm(struct essiv_instance_ctx *ictx, struct essiv_tfm_ctx *tctx) { struct crypto_cipher *essiv_cipher; struct crypto_shash *hash; int err; essiv_cipher = crypto_alloc_cipher(ictx->essiv_cipher_name, 0, 0); if (IS_ERR(essiv_cipher)) return PTR_ERR(essiv_cipher); hash = crypto_alloc_shash(ictx->shash_driver_name, 0, 0); if (IS_ERR(hash)) { err = PTR_ERR(hash); goto err_free_essiv_cipher; } tctx->essiv_cipher = essiv_cipher; tctx->hash = hash; return 0; err_free_essiv_cipher: crypto_free_cipher(essiv_cipher); return err; } static int essiv_skcipher_init_tfm(struct crypto_skcipher *tfm) { struct skcipher_instance *inst = skcipher_alg_instance(tfm); struct essiv_instance_ctx *ictx = skcipher_instance_ctx(inst); struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); struct crypto_skcipher *skcipher; int err; skcipher = crypto_spawn_skcipher(&ictx->u.skcipher_spawn); if (IS_ERR(skcipher)) return PTR_ERR(skcipher); crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) + crypto_skcipher_reqsize(skcipher)); err = essiv_init_tfm(ictx, tctx); if (err) { crypto_free_skcipher(skcipher); return err; } tctx->u.skcipher = skcipher; return 0; } static int essiv_aead_init_tfm(struct crypto_aead *tfm) { struct aead_instance *inst = aead_alg_instance(tfm); struct essiv_instance_ctx *ictx = aead_instance_ctx(inst); struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm); struct crypto_aead *aead; unsigned int subreq_size; int err; BUILD_BUG_ON(offsetofend(struct essiv_aead_request_ctx, aead_req) != sizeof(struct essiv_aead_request_ctx)); aead = crypto_spawn_aead(&ictx->u.aead_spawn); if (IS_ERR(aead)) return PTR_ERR(aead); subreq_size = sizeof_field(struct essiv_aead_request_ctx, aead_req) + crypto_aead_reqsize(aead); tctx->ivoffset = offsetof(struct essiv_aead_request_ctx, aead_req) + subreq_size; crypto_aead_set_reqsize(tfm, tctx->ivoffset + crypto_aead_ivsize(aead)); err = essiv_init_tfm(ictx, tctx); if (err) { crypto_free_aead(aead); return err; } tctx->u.aead = aead; return 0; } static void essiv_skcipher_exit_tfm(struct crypto_skcipher *tfm) { struct essiv_tfm_ctx *tctx = crypto_skcipher_ctx(tfm); crypto_free_skcipher(tctx->u.skcipher); crypto_free_cipher(tctx->essiv_cipher); crypto_free_shash(tctx->hash); } static void essiv_aead_exit_tfm(struct crypto_aead *tfm) { struct essiv_tfm_ctx *tctx = crypto_aead_ctx(tfm); crypto_free_aead(tctx->u.aead); crypto_free_cipher(tctx->essiv_cipher); crypto_free_shash(tctx->hash); } static void essiv_skcipher_free_instance(struct skcipher_instance *inst) { struct essiv_instance_ctx *ictx = skcipher_instance_ctx(inst); crypto_drop_skcipher(&ictx->u.skcipher_spawn); kfree(inst); } static void essiv_aead_free_instance(struct aead_instance *inst) { struct essiv_instance_ctx *ictx = aead_instance_ctx(inst); crypto_drop_aead(&ictx->u.aead_spawn); kfree(inst); } static bool parse_cipher_name(char *essiv_cipher_name, const char *cra_name) { const char *p, *q; int len; /* find the last opening parens */ p = strrchr(cra_name, '('); if (!p++) return false; /* find the first closing parens in the tail of the string */ q = strchr(p, ')'); if (!q) return false; len = q - p; if (len >= CRYPTO_MAX_ALG_NAME) return false; memcpy(essiv_cipher_name, p, len); essiv_cipher_name[len] = '\0'; return true; } static bool essiv_supported_algorithms(const char *essiv_cipher_name, struct shash_alg *hash_alg, int ivsize) { struct crypto_alg *alg; bool ret = false; alg = crypto_alg_mod_lookup(essiv_cipher_name, CRYPTO_ALG_TYPE_CIPHER, CRYPTO_ALG_TYPE_MASK); if (IS_ERR(alg)) return false; if (hash_alg->digestsize < alg->cra_cipher.cia_min_keysize || hash_alg->digestsize > alg->cra_cipher.cia_max_keysize) goto out; if (ivsize != alg->cra_blocksize) goto out; if (crypto_shash_alg_needs_key(hash_alg)) goto out; ret = true; out: crypto_mod_put(alg); return ret; } static int essiv_create(struct crypto_template *tmpl, struct rtattr **tb) { struct skcipher_alg_common *skcipher_alg = NULL; struct crypto_attr_type *algt; const char *inner_cipher_name; const char *shash_name; struct skcipher_instance *skcipher_inst = NULL; struct aead_instance *aead_inst = NULL; struct crypto_instance *inst; struct crypto_alg *base, *block_base; struct essiv_instance_ctx *ictx; struct aead_alg *aead_alg = NULL; struct crypto_alg *_hash_alg; struct shash_alg *hash_alg; int ivsize; u32 type; u32 mask; int err; algt = crypto_get_attr_type(tb); if (IS_ERR(algt)) return PTR_ERR(algt); inner_cipher_name = crypto_attr_alg_name(tb[1]); if (IS_ERR(inner_cipher_name)) return PTR_ERR(inner_cipher_name); shash_name = crypto_attr_alg_name(tb[2]); if (IS_ERR(shash_name)) return PTR_ERR(shash_name); type = algt->type & algt->mask; mask = crypto_algt_inherited_mask(algt); switch (type) { case CRYPTO_ALG_TYPE_LSKCIPHER: skcipher_inst = kzalloc(sizeof(*skcipher_inst) + sizeof(*ictx), GFP_KERNEL); if (!skcipher_inst) return -ENOMEM; inst = skcipher_crypto_instance(skcipher_inst); base = &skcipher_inst->alg.base; ictx = crypto_instance_ctx(inst); /* Symmetric cipher, e.g., "cbc(aes)" */ err = crypto_grab_skcipher(&ictx->u.skcipher_spawn, inst, inner_cipher_name, 0, mask); if (err) goto out_free_inst; skcipher_alg = crypto_spawn_skcipher_alg_common( &ictx->u.skcipher_spawn); block_base = &skcipher_alg->base; ivsize = skcipher_alg->ivsize; break; case CRYPTO_ALG_TYPE_AEAD: aead_inst = kzalloc(sizeof(*aead_inst) + sizeof(*ictx), GFP_KERNEL); if (!aead_inst) return -ENOMEM; inst = aead_crypto_instance(aead_inst); base = &aead_inst->alg.base; ictx = crypto_instance_ctx(inst); /* AEAD cipher, e.g., "authenc(hmac(sha256),cbc(aes))" */ err = crypto_grab_aead(&ictx->u.aead_spawn, inst, inner_cipher_name, 0, mask); if (err) goto out_free_inst; aead_alg = crypto_spawn_aead_alg(&ictx->u.aead_spawn); block_base = &aead_alg->base; if (!strstarts(block_base->cra_name, "authenc(")) { pr_warn("Only authenc() type AEADs are supported by ESSIV\n"); err = -EINVAL; goto out_drop_skcipher; } ivsize = aead_alg->ivsize; break; default: return -EINVAL; } if (!parse_cipher_name(ictx->essiv_cipher_name, block_base->cra_name)) { pr_warn("Failed to parse ESSIV cipher name from skcipher cra_name\n"); err = -EINVAL; goto out_drop_skcipher; } /* Synchronous hash, e.g., "sha256" */ _hash_alg = crypto_alg_mod_lookup(shash_name, CRYPTO_ALG_TYPE_SHASH, CRYPTO_ALG_TYPE_MASK | mask); if (IS_ERR(_hash_alg)) { err = PTR_ERR(_hash_alg); goto out_drop_skcipher; } hash_alg = __crypto_shash_alg(_hash_alg); /* Check the set of algorithms */ if (!essiv_supported_algorithms(ictx->essiv_cipher_name, hash_alg, ivsize)) { pr_warn("Unsupported essiv instantiation: essiv(%s,%s)\n", block_base->cra_name, hash_alg->base.cra_name); err = -EINVAL; goto out_free_hash; } /* record the driver name so we can instantiate this exact algo later */ strscpy(ictx->shash_driver_name, hash_alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME); /* Instance fields */ err = -ENAMETOOLONG; if (snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "essiv(%s,%s)", block_base->cra_name, hash_alg->base.cra_name) >= CRYPTO_MAX_ALG_NAME) goto out_free_hash; if (snprintf(base->cra_driver_name, CRYPTO_MAX_ALG_NAME, "essiv(%s,%s)", block_base->cra_driver_name, hash_alg->base.cra_driver_name) >= CRYPTO_MAX_ALG_NAME) goto out_free_hash; /* * hash_alg wasn't gotten via crypto_grab*(), so we need to inherit its * flags manually. */ base->cra_flags |= (hash_alg->base.cra_flags & CRYPTO_ALG_INHERITED_FLAGS); base->cra_blocksize = block_base->cra_blocksize; base->cra_ctxsize = sizeof(struct essiv_tfm_ctx); base->cra_alignmask = block_base->cra_alignmask; base->cra_priority = block_base->cra_priority; if (type == CRYPTO_ALG_TYPE_LSKCIPHER) { skcipher_inst->alg.setkey = essiv_skcipher_setkey; skcipher_inst->alg.encrypt = essiv_skcipher_encrypt; skcipher_inst->alg.decrypt = essiv_skcipher_decrypt; skcipher_inst->alg.init = essiv_skcipher_init_tfm; skcipher_inst->alg.exit = essiv_skcipher_exit_tfm; skcipher_inst->alg.min_keysize = skcipher_alg->min_keysize; skcipher_inst->alg.max_keysize = skcipher_alg->max_keysize; skcipher_inst->alg.ivsize = ivsize; skcipher_inst->alg.chunksize = skcipher_alg->chunksize; skcipher_inst->free = essiv_skcipher_free_instance; err = skcipher_register_instance(tmpl, skcipher_inst); } else { aead_inst->alg.setkey = essiv_aead_setkey; aead_inst->alg.setauthsize = essiv_aead_setauthsize; aead_inst->alg.encrypt = essiv_aead_encrypt; aead_inst->alg.decrypt = essiv_aead_decrypt; aead_inst->alg.init = essiv_aead_init_tfm; aead_inst->alg.exit = essiv_aead_exit_tfm; aead_inst->alg.ivsize = ivsize; aead_inst->alg.maxauthsize = crypto_aead_alg_maxauthsize(aead_alg); aead_inst->alg.chunksize = crypto_aead_alg_chunksize(aead_alg); aead_inst->free = essiv_aead_free_instance; err = aead_register_instance(tmpl, aead_inst); } if (err) goto out_free_hash; crypto_mod_put(_hash_alg); return 0; out_free_hash: crypto_mod_put(_hash_alg); out_drop_skcipher: if (type == CRYPTO_ALG_TYPE_LSKCIPHER) crypto_drop_skcipher(&ictx->u.skcipher_spawn); else crypto_drop_aead(&ictx->u.aead_spawn); out_free_inst: kfree(skcipher_inst); kfree(aead_inst); return err; } /* essiv(cipher_name, shash_name) */ static struct crypto_template essiv_tmpl = { .name = "essiv", .create = essiv_create, .module = THIS_MODULE, }; static int __init essiv_module_init(void) { return crypto_register_template(&essiv_tmpl); } static void __exit essiv_module_exit(void) { crypto_unregister_template(&essiv_tmpl); } subsys_initcall(essiv_module_init); module_exit(essiv_module_exit); MODULE_DESCRIPTION("ESSIV skcipher/aead wrapper for block encryption"); MODULE_LICENSE("GPL v2"); MODULE_ALIAS_CRYPTO("essiv"); MODULE_IMPORT_NS(CRYPTO_INTERNAL);
2 2 104 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 // SPDX-License-Identifier: GPL-2.0-only /* * Generic show_mem() implementation * * Copyright (C) 2008 Johannes Weiner <hannes@saeurebad.de> */ #include <linux/blkdev.h> #include <linux/cma.h> #include <linux/cpuset.h> #include <linux/highmem.h> #include <linux/hugetlb.h> #include <linux/mm.h> #include <linux/mmzone.h> #include <linux/swap.h> #include <linux/vmstat.h> #include "internal.h" #include "swap.h" atomic_long_t _totalram_pages __read_mostly; EXPORT_SYMBOL(_totalram_pages); unsigned long totalreserve_pages __read_mostly; unsigned long totalcma_pages __read_mostly; static inline void show_node(struct zone *zone) { if (IS_ENABLED(CONFIG_NUMA)) printk("Node %d ", zone_to_nid(zone)); } long si_mem_available(void) { long available; unsigned long pagecache; unsigned long wmark_low = 0; unsigned long reclaimable; struct zone *zone; for_each_zone(zone) wmark_low += low_wmark_pages(zone); /* * Estimate the amount of memory available for userspace allocations, * without causing swapping or OOM. */ available = global_zone_page_state(NR_FREE_PAGES) - totalreserve_pages; /* * Not all the page cache can be freed, otherwise the system will * start swapping or thrashing. Assume at least half of the page * cache, or the low watermark worth of cache, needs to stay. */ pagecache = global_node_page_state(NR_ACTIVE_FILE) + global_node_page_state(NR_INACTIVE_FILE); pagecache -= min(pagecache / 2, wmark_low); available += pagecache; /* * Part of the reclaimable slab and other kernel memory consists of * items that are in use, and cannot be freed. Cap this estimate at the * low watermark. */ reclaimable = global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B) + global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE); reclaimable -= min(reclaimable / 2, wmark_low); available += reclaimable; if (available < 0) available = 0; return available; } EXPORT_SYMBOL_GPL(si_mem_available); void si_meminfo(struct sysinfo *val) { val->totalram = totalram_pages(); val->sharedram = global_node_page_state(NR_SHMEM); val->freeram = global_zone_page_state(NR_FREE_PAGES); val->bufferram = nr_blockdev_pages(); val->totalhigh = totalhigh_pages(); val->freehigh = nr_free_highpages(); val->mem_unit = PAGE_SIZE; } EXPORT_SYMBOL(si_meminfo); #ifdef CONFIG_NUMA void si_meminfo_node(struct sysinfo *val, int nid) { int zone_type; /* needs to be signed */ unsigned long managed_pages = 0; unsigned long managed_highpages = 0; unsigned long free_highpages = 0; pg_data_t *pgdat = NODE_DATA(nid); for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) managed_pages += zone_managed_pages(&pgdat->node_zones[zone_type]); val->totalram = managed_pages; val->sharedram = node_page_state(pgdat, NR_SHMEM); val->freeram = sum_zone_node_page_state(nid, NR_FREE_PAGES); #ifdef CONFIG_HIGHMEM for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++) { struct zone *zone = &pgdat->node_zones[zone_type]; if (is_highmem(zone)) { managed_highpages += zone_managed_pages(zone); free_highpages += zone_page_state(zone, NR_FREE_PAGES); } } val->totalhigh = managed_highpages; val->freehigh = free_highpages; #else val->totalhigh = managed_highpages; val->freehigh = free_highpages; #endif val->mem_unit = PAGE_SIZE; } #endif /* * Determine whether the node should be displayed or not, depending on whether * SHOW_MEM_FILTER_NODES was passed to show_free_areas(). */ static bool show_mem_node_skip(unsigned int flags, int nid, nodemask_t *nodemask) { if (!(flags & SHOW_MEM_FILTER_NODES)) return false; /* * no node mask - aka implicit memory numa policy. Do not bother with * the synchronization - read_mems_allowed_begin - because we do not * have to be precise here. */ if (!nodemask) nodemask = &cpuset_current_mems_allowed; return !node_isset(nid, *nodemask); } static void show_migration_types(unsigned char type) { static const char types[MIGRATE_TYPES] = { [MIGRATE_UNMOVABLE] = 'U', [MIGRATE_MOVABLE] = 'M', [MIGRATE_RECLAIMABLE] = 'E', [MIGRATE_HIGHATOMIC] = 'H', #ifdef CONFIG_CMA [MIGRATE_CMA] = 'C', #endif #ifdef CONFIG_MEMORY_ISOLATION [MIGRATE_ISOLATE] = 'I', #endif }; char tmp[MIGRATE_TYPES + 1]; char *p = tmp; int i; for (i = 0; i < MIGRATE_TYPES; i++) { if (type & (1 << i)) *p++ = types[i]; } *p = '\0'; printk(KERN_CONT "(%s) ", tmp); } static bool node_has_managed_zones(pg_data_t *pgdat, int max_zone_idx) { int zone_idx; for (zone_idx = 0; zone_idx <= max_zone_idx; zone_idx++) if (zone_managed_pages(pgdat->node_zones + zone_idx)) return true; return false; } /* * Show free area list (used inside shift_scroll-lock stuff) * We also calculate the percentage fragmentation. We do this by counting the * memory on each free list with the exception of the first item on the list. * * Bits in @filter: * SHOW_MEM_FILTER_NODES: suppress nodes that are not allowed by current's * cpuset. */ static void show_free_areas(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { unsigned long free_pcp = 0; int cpu, nid; struct zone *zone; pg_data_t *pgdat; for_each_populated_zone(zone) { if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; for_each_online_cpu(cpu) free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count; } printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n" " active_file:%lu inactive_file:%lu isolated_file:%lu\n" " unevictable:%lu dirty:%lu writeback:%lu\n" " slab_reclaimable:%lu slab_unreclaimable:%lu\n" " mapped:%lu shmem:%lu pagetables:%lu\n" " sec_pagetables:%lu bounce:%lu\n" " kernel_misc_reclaimable:%lu\n" " free:%lu free_pcp:%lu free_cma:%lu\n", global_node_page_state(NR_ACTIVE_ANON), global_node_page_state(NR_INACTIVE_ANON), global_node_page_state(NR_ISOLATED_ANON), global_node_page_state(NR_ACTIVE_FILE), global_node_page_state(NR_INACTIVE_FILE), global_node_page_state(NR_ISOLATED_FILE), global_node_page_state(NR_UNEVICTABLE), global_node_page_state(NR_FILE_DIRTY), global_node_page_state(NR_WRITEBACK), global_node_page_state_pages(NR_SLAB_RECLAIMABLE_B), global_node_page_state_pages(NR_SLAB_UNRECLAIMABLE_B), global_node_page_state(NR_FILE_MAPPED), global_node_page_state(NR_SHMEM), global_node_page_state(NR_PAGETABLE), global_node_page_state(NR_SECONDARY_PAGETABLE), global_zone_page_state(NR_BOUNCE), global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE), global_zone_page_state(NR_FREE_PAGES), free_pcp, global_zone_page_state(NR_FREE_CMA_PAGES)); for_each_online_pgdat(pgdat) { if (show_mem_node_skip(filter, pgdat->node_id, nodemask)) continue; if (!node_has_managed_zones(pgdat, max_zone_idx)) continue; printk("Node %d" " active_anon:%lukB" " inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB" " unevictable:%lukB" " isolated(anon):%lukB" " isolated(file):%lukB" " mapped:%lukB" " dirty:%lukB" " writeback:%lukB" " shmem:%lukB" #ifdef CONFIG_TRANSPARENT_HUGEPAGE " shmem_thp:%lukB" " shmem_pmdmapped:%lukB" " anon_thp:%lukB" #endif " writeback_tmp:%lukB" " kernel_stack:%lukB" #ifdef CONFIG_SHADOW_CALL_STACK " shadow_call_stack:%lukB" #endif " pagetables:%lukB" " sec_pagetables:%lukB" " all_unreclaimable? %s" "\n", pgdat->node_id, K(node_page_state(pgdat, NR_ACTIVE_ANON)), K(node_page_state(pgdat, NR_INACTIVE_ANON)), K(node_page_state(pgdat, NR_ACTIVE_FILE)), K(node_page_state(pgdat, NR_INACTIVE_FILE)), K(node_page_state(pgdat, NR_UNEVICTABLE)), K(node_page_state(pgdat, NR_ISOLATED_ANON)), K(node_page_state(pgdat, NR_ISOLATED_FILE)), K(node_page_state(pgdat, NR_FILE_MAPPED)), K(node_page_state(pgdat, NR_FILE_DIRTY)), K(node_page_state(pgdat, NR_WRITEBACK)), K(node_page_state(pgdat, NR_SHMEM)), #ifdef CONFIG_TRANSPARENT_HUGEPAGE K(node_page_state(pgdat, NR_SHMEM_THPS)), K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)), K(node_page_state(pgdat, NR_ANON_THPS)), #endif K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), node_page_state(pgdat, NR_KERNEL_STACK_KB), #ifdef CONFIG_SHADOW_CALL_STACK node_page_state(pgdat, NR_KERNEL_SCS_KB), #endif K(node_page_state(pgdat, NR_PAGETABLE)), K(node_page_state(pgdat, NR_SECONDARY_PAGETABLE)), pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? "yes" : "no"); } for_each_populated_zone(zone) { int i; if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; free_pcp = 0; for_each_online_cpu(cpu) free_pcp += per_cpu_ptr(zone->per_cpu_pageset, cpu)->count; show_node(zone); printk(KERN_CONT "%s" " free:%lukB" " boost:%lukB" " min:%lukB" " low:%lukB" " high:%lukB" " reserved_highatomic:%luKB" " active_anon:%lukB" " inactive_anon:%lukB" " active_file:%lukB" " inactive_file:%lukB" " unevictable:%lukB" " writepending:%lukB" " present:%lukB" " managed:%lukB" " mlocked:%lukB" " bounce:%lukB" " free_pcp:%lukB" " local_pcp:%ukB" " free_cma:%lukB" "\n", zone->name, K(zone_page_state(zone, NR_FREE_PAGES)), K(zone->watermark_boost), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), K(zone->nr_reserved_highatomic), K(zone_page_state(zone, NR_ZONE_ACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_INACTIVE_ANON)), K(zone_page_state(zone, NR_ZONE_ACTIVE_FILE)), K(zone_page_state(zone, NR_ZONE_INACTIVE_FILE)), K(zone_page_state(zone, NR_ZONE_UNEVICTABLE)), K(zone_page_state(zone, NR_ZONE_WRITE_PENDING)), K(zone->present_pages), K(zone_managed_pages(zone)), K(zone_page_state(zone, NR_MLOCK)), K(zone_page_state(zone, NR_BOUNCE)), K(free_pcp), K(this_cpu_read(zone->per_cpu_pageset->count)), K(zone_page_state(zone, NR_FREE_CMA_PAGES))); printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) printk(KERN_CONT " %ld", zone->lowmem_reserve[i]); printk(KERN_CONT "\n"); } for_each_populated_zone(zone) { unsigned int order; unsigned long nr[NR_PAGE_ORDERS], flags, total = 0; unsigned char types[NR_PAGE_ORDERS]; if (zone_idx(zone) > max_zone_idx) continue; if (show_mem_node_skip(filter, zone_to_nid(zone), nodemask)) continue; show_node(zone); printk(KERN_CONT "%s: ", zone->name); spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &zone->free_area[order]; int type; nr[order] = area->nr_free; total += nr[order] << order; types[order] = 0; for (type = 0; type < MIGRATE_TYPES; type++) { if (!free_area_empty(area, type)) types[order] |= 1 << type; } } spin_unlock_irqrestore(&zone->lock, flags); for (order = 0; order < NR_PAGE_ORDERS; order++) { printk(KERN_CONT "%lu*%lukB ", nr[order], K(1UL) << order); if (nr[order]) show_migration_types(types[order]); } printk(KERN_CONT "= %lukB\n", K(total)); } for_each_online_node(nid) { if (show_mem_node_skip(filter, nid, nodemask)) continue; hugetlb_show_meminfo_node(nid); } printk("%ld total pagecache pages\n", global_node_page_state(NR_FILE_PAGES)); show_swap_cache_info(); } void __show_mem(unsigned int filter, nodemask_t *nodemask, int max_zone_idx) { unsigned long total = 0, reserved = 0, highmem = 0; struct zone *zone; printk("Mem-Info:\n"); show_free_areas(filter, nodemask, max_zone_idx); for_each_populated_zone(zone) { total += zone->present_pages; reserved += zone->present_pages - zone_managed_pages(zone); if (is_highmem(zone)) highmem += zone->present_pages; } printk("%lu pages RAM\n", total); printk("%lu pages HighMem/MovableOnly\n", highmem); printk("%lu pages reserved\n", reserved); #ifdef CONFIG_CMA printk("%lu pages cma reserved\n", totalcma_pages); #endif #ifdef CONFIG_MEMORY_FAILURE printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); #endif #ifdef CONFIG_MEM_ALLOC_PROFILING { struct codetag_bytes tags[10]; size_t i, nr; nr = alloc_tag_top_users(tags, ARRAY_SIZE(tags), false); if (nr) { pr_notice("Memory allocations:\n"); for (i = 0; i < nr; i++) { struct codetag *ct = tags[i].ct; struct alloc_tag *tag = ct_to_alloc_tag(ct); struct alloc_tag_counters counter = alloc_tag_read(tag); /* Same as alloc_tag_to_text() but w/o intermediate buffer */ if (ct->modname) pr_notice("%12lli %8llu %s:%u [%s] func:%s\n", counter.bytes, counter.calls, ct->filename, ct->lineno, ct->modname, ct->function); else pr_notice("%12lli %8llu %s:%u func:%s\n", counter.bytes, counter.calls, ct->filename, ct->lineno, ct->function); } } } #endif }
578 578 46 46 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 // SPDX-License-Identifier: GPL-2.0 #include <linux/cache.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/pid_namespace.h> #include "internal.h" /* * /proc/self: */ static const char *proc_self_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) { struct pid_namespace *ns = proc_pid_ns(inode->i_sb); pid_t tgid = task_tgid_nr_ns(current, ns); char *name; if (!tgid) return ERR_PTR(-ENOENT); /* max length of unsigned int in decimal + NULL term */ name = kmalloc(10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); if (unlikely(!name)) return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD); sprintf(name, "%u", tgid); set_delayed_call(done, kfree_link, name); return name; } static const struct inode_operations proc_self_inode_operations = { .get_link = proc_self_get_link, }; static unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { struct inode *root_inode = d_inode(s->s_root); struct proc_fs_info *fs_info = proc_sb_info(s); struct dentry *self; int ret = -ENOMEM; inode_lock(root_inode); self = d_alloc_name(s->s_root, "self"); if (self) { struct inode *inode = new_inode(s); if (inode) { inode->i_ino = self_inum; simple_inode_init_ts(inode); inode->i_mode = S_IFLNK | S_IRWXUGO; inode->i_uid = GLOBAL_ROOT_UID; inode->i_gid = GLOBAL_ROOT_GID; inode->i_op = &proc_self_inode_operations; d_add(self, inode); ret = 0; } else { dput(self); } } inode_unlock(root_inode); if (ret) pr_err("proc_fill_super: can't allocate /proc/self\n"); else fs_info->proc_self = self; return ret; } void __init proc_self_init(void) { proc_alloc_inum(&self_inum); }
2 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 // SPDX-License-Identifier: GPL-2.0+ /* * TI 3410/5052 USB Serial Driver * * Copyright (C) 2004 Texas Instruments * * This driver is based on the Linux io_ti driver, which is * Copyright (C) 2000-2002 Inside Out Networks * Copyright (C) 2001-2002 Greg Kroah-Hartman * * For questions or problems with this driver, contact Texas Instruments * technical support, or Al Borchers <alborchers@steinerpoint.com>, or * Peter Berger <pberger@brimson.com>. */ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/firmware.h> #include <linux/slab.h> #include <linux/tty.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/ioctl.h> #include <linux/serial.h> #include <linux/kfifo.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/serial.h> /* Configuration ids */ #define TI_BOOT_CONFIG 1 #define TI_ACTIVE_CONFIG 2 /* Vendor and product ids */ #define TI_VENDOR_ID 0x0451 #define IBM_VENDOR_ID 0x04b3 #define STARTECH_VENDOR_ID 0x14b0 #define TI_3410_PRODUCT_ID 0x3410 #define IBM_4543_PRODUCT_ID 0x4543 #define IBM_454B_PRODUCT_ID 0x454b #define IBM_454C_PRODUCT_ID 0x454c #define TI_3410_EZ430_ID 0xF430 /* TI ez430 development tool */ #define TI_5052_BOOT_PRODUCT_ID 0x5052 /* no EEPROM, no firmware */ #define TI_5152_BOOT_PRODUCT_ID 0x5152 /* no EEPROM, no firmware */ #define TI_5052_EEPROM_PRODUCT_ID 0x505A /* EEPROM, no firmware */ #define TI_5052_FIRMWARE_PRODUCT_ID 0x505F /* firmware is running */ #define FRI2_PRODUCT_ID 0x5053 /* Fish River Island II */ /* Multi-Tech vendor and product ids */ #define MTS_VENDOR_ID 0x06E0 #define MTS_GSM_NO_FW_PRODUCT_ID 0xF108 #define MTS_CDMA_NO_FW_PRODUCT_ID 0xF109 #define MTS_CDMA_PRODUCT_ID 0xF110 #define MTS_GSM_PRODUCT_ID 0xF111 #define MTS_EDGE_PRODUCT_ID 0xF112 #define MTS_MT9234MU_PRODUCT_ID 0xF114 #define MTS_MT9234ZBA_PRODUCT_ID 0xF115 #define MTS_MT9234ZBAOLD_PRODUCT_ID 0x0319 /* Abbott Diabetics vendor and product ids */ #define ABBOTT_VENDOR_ID 0x1a61 #define ABBOTT_STEREO_PLUG_ID 0x3410 #define ABBOTT_PRODUCT_ID ABBOTT_STEREO_PLUG_ID #define ABBOTT_STRIP_PORT_ID 0x3420 /* Honeywell vendor and product IDs */ #define HONEYWELL_VENDOR_ID 0x10ac #define HONEYWELL_HGI80_PRODUCT_ID 0x0102 /* Honeywell HGI80 */ /* Moxa UPORT 11x0 vendor and product IDs */ #define MXU1_VENDOR_ID 0x110a #define MXU1_1110_PRODUCT_ID 0x1110 #define MXU1_1130_PRODUCT_ID 0x1130 #define MXU1_1150_PRODUCT_ID 0x1150 #define MXU1_1151_PRODUCT_ID 0x1151 #define MXU1_1131_PRODUCT_ID 0x1131 /* Commands */ #define TI_GET_VERSION 0x01 #define TI_GET_PORT_STATUS 0x02 #define TI_GET_PORT_DEV_INFO 0x03 #define TI_GET_CONFIG 0x04 #define TI_SET_CONFIG 0x05 #define TI_OPEN_PORT 0x06 #define TI_CLOSE_PORT 0x07 #define TI_START_PORT 0x08 #define TI_STOP_PORT 0x09 #define TI_TEST_PORT 0x0A #define TI_PURGE_PORT 0x0B #define TI_RESET_EXT_DEVICE 0x0C #define TI_WRITE_DATA 0x80 #define TI_READ_DATA 0x81 #define TI_REQ_TYPE_CLASS 0x82 /* Module identifiers */ #define TI_I2C_PORT 0x01 #define TI_IEEE1284_PORT 0x02 #define TI_UART1_PORT 0x03 #define TI_UART2_PORT 0x04 #define TI_RAM_PORT 0x05 /* Modem status */ #define TI_MSR_DELTA_CTS 0x01 #define TI_MSR_DELTA_DSR 0x02 #define TI_MSR_DELTA_RI 0x04 #define TI_MSR_DELTA_CD 0x08 #define TI_MSR_CTS 0x10 #define TI_MSR_DSR 0x20 #define TI_MSR_RI 0x40 #define TI_MSR_CD 0x80 #define TI_MSR_DELTA_MASK 0x0F #define TI_MSR_MASK 0xF0 /* Line status */ #define TI_LSR_OVERRUN_ERROR 0x01 #define TI_LSR_PARITY_ERROR 0x02 #define TI_LSR_FRAMING_ERROR 0x04 #define TI_LSR_BREAK 0x08 #define TI_LSR_ERROR 0x0F #define TI_LSR_RX_FULL 0x10 #define TI_LSR_TX_EMPTY 0x20 #define TI_LSR_TX_EMPTY_BOTH 0x40 /* Line control */ #define TI_LCR_BREAK 0x40 /* Modem control */ #define TI_MCR_LOOP 0x04 #define TI_MCR_DTR 0x10 #define TI_MCR_RTS 0x20 /* Mask settings */ #define TI_UART_ENABLE_RTS_IN 0x0001 #define TI_UART_DISABLE_RTS 0x0002 #define TI_UART_ENABLE_PARITY_CHECKING 0x0008 #define TI_UART_ENABLE_DSR_OUT 0x0010 #define TI_UART_ENABLE_CTS_OUT 0x0020 #define TI_UART_ENABLE_X_OUT 0x0040 #define TI_UART_ENABLE_XA_OUT 0x0080 #define TI_UART_ENABLE_X_IN 0x0100 #define TI_UART_ENABLE_DTR_IN 0x0800 #define TI_UART_DISABLE_DTR 0x1000 #define TI_UART_ENABLE_MS_INTS 0x2000 #define TI_UART_ENABLE_AUTO_START_DMA 0x4000 /* Parity */ #define TI_UART_NO_PARITY 0x00 #define TI_UART_ODD_PARITY 0x01 #define TI_UART_EVEN_PARITY 0x02 #define TI_UART_MARK_PARITY 0x03 #define TI_UART_SPACE_PARITY 0x04 /* Stop bits */ #define TI_UART_1_STOP_BITS 0x00 #define TI_UART_1_5_STOP_BITS 0x01 #define TI_UART_2_STOP_BITS 0x02 /* Bits per character */ #define TI_UART_5_DATA_BITS 0x00 #define TI_UART_6_DATA_BITS 0x01 #define TI_UART_7_DATA_BITS 0x02 #define TI_UART_8_DATA_BITS 0x03 /* 232/485 modes */ #define TI_UART_232 0x00 #define TI_UART_485_RECEIVER_DISABLED 0x01 #define TI_UART_485_RECEIVER_ENABLED 0x02 /* Pipe transfer mode and timeout */ #define TI_PIPE_MODE_CONTINUOUS 0x01 #define TI_PIPE_MODE_MASK 0x03 #define TI_PIPE_TIMEOUT_MASK 0x7C #define TI_PIPE_TIMEOUT_ENABLE 0x80 /* Config struct */ struct ti_uart_config { __be16 wBaudRate; __be16 wFlags; u8 bDataBits; u8 bParity; u8 bStopBits; char cXon; char cXoff; u8 bUartMode; }; /* Get port status */ struct ti_port_status { u8 bCmdCode; u8 bModuleId; u8 bErrorCode; u8 bMSR; u8 bLSR; }; /* Purge modes */ #define TI_PURGE_OUTPUT 0x00 #define TI_PURGE_INPUT 0x80 /* Read/Write data */ #define TI_RW_DATA_ADDR_SFR 0x10 #define TI_RW_DATA_ADDR_IDATA 0x20 #define TI_RW_DATA_ADDR_XDATA 0x30 #define TI_RW_DATA_ADDR_CODE 0x40 #define TI_RW_DATA_ADDR_GPIO 0x50 #define TI_RW_DATA_ADDR_I2C 0x60 #define TI_RW_DATA_ADDR_FLASH 0x70 #define TI_RW_DATA_ADDR_DSP 0x80 #define TI_RW_DATA_UNSPECIFIED 0x00 #define TI_RW_DATA_BYTE 0x01 #define TI_RW_DATA_WORD 0x02 #define TI_RW_DATA_DOUBLE_WORD 0x04 struct ti_write_data_bytes { u8 bAddrType; u8 bDataType; u8 bDataCounter; __be16 wBaseAddrHi; __be16 wBaseAddrLo; u8 bData[]; } __packed; struct ti_read_data_request { u8 bAddrType; u8 bDataType; u8 bDataCounter; __be16 wBaseAddrHi; __be16 wBaseAddrLo; } __packed; struct ti_read_data_bytes { u8 bCmdCode; u8 bModuleId; u8 bErrorCode; u8 bData[]; }; /* Interrupt struct */ struct ti_interrupt { u8 bICode; u8 bIInfo; }; /* Interrupt codes */ #define TI_CODE_HARDWARE_ERROR 0xFF #define TI_CODE_DATA_ERROR 0x03 #define TI_CODE_MODEM_STATUS 0x04 /* Download firmware max packet size */ #define TI_DOWNLOAD_MAX_PACKET_SIZE 64 /* Firmware image header */ struct ti_firmware_header { __le16 wLength; u8 bCheckSum; } __packed; /* UART addresses */ #define TI_UART1_BASE_ADDR 0xFFA0 /* UART 1 base address */ #define TI_UART2_BASE_ADDR 0xFFB0 /* UART 2 base address */ #define TI_UART_OFFSET_LCR 0x0002 /* UART MCR register offset */ #define TI_UART_OFFSET_MCR 0x0004 /* UART MCR register offset */ #define TI_DRIVER_AUTHOR "Al Borchers <alborchers@steinerpoint.com>" #define TI_DRIVER_DESC "TI USB 3410/5052 Serial Driver" #define TI_FIRMWARE_BUF_SIZE 16284 #define TI_TRANSFER_TIMEOUT 2 /* read urb states */ #define TI_READ_URB_RUNNING 0 #define TI_READ_URB_STOPPING 1 #define TI_READ_URB_STOPPED 2 #define TI_EXTRA_VID_PID_COUNT 5 struct ti_port { int tp_is_open; u8 tp_msr; u8 tp_shadow_mcr; u8 tp_uart_mode; /* 232 or 485 modes */ unsigned int tp_uart_base_addr; struct ti_device *tp_tdev; struct usb_serial_port *tp_port; spinlock_t tp_lock; int tp_read_urb_state; int tp_write_urb_in_use; }; struct ti_device { struct mutex td_open_close_lock; int td_open_port_count; struct usb_serial *td_serial; int td_is_3410; bool td_rs485_only; }; static int ti_startup(struct usb_serial *serial); static void ti_release(struct usb_serial *serial); static int ti_port_probe(struct usb_serial_port *port); static void ti_port_remove(struct usb_serial_port *port); static int ti_open(struct tty_struct *tty, struct usb_serial_port *port); static void ti_close(struct usb_serial_port *port); static int ti_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *data, int count); static unsigned int ti_write_room(struct tty_struct *tty); static unsigned int ti_chars_in_buffer(struct tty_struct *tty); static bool ti_tx_empty(struct usb_serial_port *port); static void ti_throttle(struct tty_struct *tty); static void ti_unthrottle(struct tty_struct *tty); static void ti_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios); static int ti_tiocmget(struct tty_struct *tty); static int ti_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear); static int ti_break(struct tty_struct *tty, int break_state); static void ti_interrupt_callback(struct urb *urb); static void ti_bulk_in_callback(struct urb *urb); static void ti_bulk_out_callback(struct urb *urb); static void ti_recv(struct usb_serial_port *port, unsigned char *data, int length); static void ti_send(struct ti_port *tport); static int ti_set_mcr(struct ti_port *tport, unsigned int mcr); static int ti_get_lsr(struct ti_port *tport, u8 *lsr); static void ti_get_serial_info(struct tty_struct *tty, struct serial_struct *ss); static void ti_handle_new_msr(struct ti_port *tport, u8 msr); static void ti_stop_read(struct ti_port *tport, struct tty_struct *tty); static int ti_restart_read(struct ti_port *tport, struct tty_struct *tty); static int ti_command_out_sync(struct usb_device *udev, u8 command, u16 moduleid, u16 value, void *data, int size); static int ti_command_in_sync(struct usb_device *udev, u8 command, u16 moduleid, u16 value, void *data, int size); static int ti_port_cmd_out(struct usb_serial_port *port, u8 command, u16 value, void *data, int size); static int ti_port_cmd_in(struct usb_serial_port *port, u8 command, u16 value, void *data, int size); static int ti_write_byte(struct usb_serial_port *port, struct ti_device *tdev, unsigned long addr, u8 mask, u8 byte); static int ti_download_firmware(struct ti_device *tdev); static const struct usb_device_id ti_id_table_3410[] = { { USB_DEVICE(TI_VENDOR_ID, TI_3410_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_3410_EZ430_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_NO_FW_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_CDMA_NO_FW_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_CDMA_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_EDGE_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234MU_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234ZBA_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234ZBAOLD_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_4543_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_454B_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_454C_PRODUCT_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STEREO_PLUG_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1110_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1130_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) }, { USB_DEVICE(STARTECH_VENDOR_ID, TI_3410_PRODUCT_ID) }, { } /* terminator */ }; static const struct usb_device_id ti_id_table_5052[] = { { USB_DEVICE(TI_VENDOR_ID, TI_5052_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5152_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_EEPROM_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_FIRMWARE_PRODUCT_ID) }, { } }; static const struct usb_device_id ti_id_table_combined[] = { { USB_DEVICE(TI_VENDOR_ID, TI_3410_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_3410_EZ430_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_NO_FW_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_CDMA_NO_FW_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_CDMA_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_GSM_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_EDGE_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234MU_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234ZBA_PRODUCT_ID) }, { USB_DEVICE(MTS_VENDOR_ID, MTS_MT9234ZBAOLD_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5152_BOOT_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_EEPROM_PRODUCT_ID) }, { USB_DEVICE(TI_VENDOR_ID, TI_5052_FIRMWARE_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_4543_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_454B_PRODUCT_ID) }, { USB_DEVICE(IBM_VENDOR_ID, IBM_454C_PRODUCT_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_PRODUCT_ID) }, { USB_DEVICE(ABBOTT_VENDOR_ID, ABBOTT_STRIP_PORT_ID) }, { USB_DEVICE(TI_VENDOR_ID, FRI2_PRODUCT_ID) }, { USB_DEVICE(HONEYWELL_VENDOR_ID, HONEYWELL_HGI80_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1110_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1130_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1131_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1150_PRODUCT_ID) }, { USB_DEVICE(MXU1_VENDOR_ID, MXU1_1151_PRODUCT_ID) }, { USB_DEVICE(STARTECH_VENDOR_ID, TI_3410_PRODUCT_ID) }, { } /* terminator */ }; static struct usb_serial_driver ti_1port_device = { .driver = { .owner = THIS_MODULE, .name = "ti_usb_3410_5052_1", }, .description = "TI USB 3410 1 port adapter", .id_table = ti_id_table_3410, .num_ports = 1, .num_bulk_out = 1, .attach = ti_startup, .release = ti_release, .port_probe = ti_port_probe, .port_remove = ti_port_remove, .open = ti_open, .close = ti_close, .write = ti_write, .write_room = ti_write_room, .chars_in_buffer = ti_chars_in_buffer, .tx_empty = ti_tx_empty, .throttle = ti_throttle, .unthrottle = ti_unthrottle, .get_serial = ti_get_serial_info, .set_termios = ti_set_termios, .tiocmget = ti_tiocmget, .tiocmset = ti_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .break_ctl = ti_break, .read_int_callback = ti_interrupt_callback, .read_bulk_callback = ti_bulk_in_callback, .write_bulk_callback = ti_bulk_out_callback, }; static struct usb_serial_driver ti_2port_device = { .driver = { .owner = THIS_MODULE, .name = "ti_usb_3410_5052_2", }, .description = "TI USB 5052 2 port adapter", .id_table = ti_id_table_5052, .num_ports = 2, .num_bulk_out = 1, .attach = ti_startup, .release = ti_release, .port_probe = ti_port_probe, .port_remove = ti_port_remove, .open = ti_open, .close = ti_close, .write = ti_write, .write_room = ti_write_room, .chars_in_buffer = ti_chars_in_buffer, .tx_empty = ti_tx_empty, .throttle = ti_throttle, .unthrottle = ti_unthrottle, .get_serial = ti_get_serial_info, .set_termios = ti_set_termios, .tiocmget = ti_tiocmget, .tiocmset = ti_tiocmset, .tiocmiwait = usb_serial_generic_tiocmiwait, .get_icount = usb_serial_generic_get_icount, .break_ctl = ti_break, .read_int_callback = ti_interrupt_callback, .read_bulk_callback = ti_bulk_in_callback, .write_bulk_callback = ti_bulk_out_callback, }; static struct usb_serial_driver * const serial_drivers[] = { &ti_1port_device, &ti_2port_device, NULL }; MODULE_AUTHOR(TI_DRIVER_AUTHOR); MODULE_DESCRIPTION(TI_DRIVER_DESC); MODULE_LICENSE("GPL"); MODULE_FIRMWARE("ti_3410.fw"); MODULE_FIRMWARE("ti_5052.fw"); MODULE_FIRMWARE("mts_cdma.fw"); MODULE_FIRMWARE("mts_gsm.fw"); MODULE_FIRMWARE("mts_edge.fw"); MODULE_FIRMWARE("mts_mt9234mu.fw"); MODULE_FIRMWARE("mts_mt9234zba.fw"); MODULE_FIRMWARE("moxa/moxa-1110.fw"); MODULE_FIRMWARE("moxa/moxa-1130.fw"); MODULE_FIRMWARE("moxa/moxa-1131.fw"); MODULE_FIRMWARE("moxa/moxa-1150.fw"); MODULE_FIRMWARE("moxa/moxa-1151.fw"); MODULE_DEVICE_TABLE(usb, ti_id_table_combined); module_usb_serial_driver(serial_drivers, ti_id_table_combined); static int ti_startup(struct usb_serial *serial) { struct ti_device *tdev; struct usb_device *dev = serial->dev; struct usb_host_interface *cur_altsetting; int num_endpoints; u16 vid, pid; int status; dev_dbg(&dev->dev, "%s - product 0x%4X, num configurations %d, configuration value %d\n", __func__, le16_to_cpu(dev->descriptor.idProduct), dev->descriptor.bNumConfigurations, dev->actconfig->desc.bConfigurationValue); tdev = kzalloc(sizeof(struct ti_device), GFP_KERNEL); if (!tdev) return -ENOMEM; mutex_init(&tdev->td_open_close_lock); tdev->td_serial = serial; usb_set_serial_data(serial, tdev); /* determine device type */ if (serial->type == &ti_1port_device) tdev->td_is_3410 = 1; dev_dbg(&dev->dev, "%s - device type is %s\n", __func__, tdev->td_is_3410 ? "3410" : "5052"); vid = le16_to_cpu(dev->descriptor.idVendor); pid = le16_to_cpu(dev->descriptor.idProduct); if (vid == MXU1_VENDOR_ID) { switch (pid) { case MXU1_1130_PRODUCT_ID: case MXU1_1131_PRODUCT_ID: tdev->td_rs485_only = true; break; } } cur_altsetting = serial->interface->cur_altsetting; num_endpoints = cur_altsetting->desc.bNumEndpoints; /* if we have only 1 configuration and 1 endpoint, download firmware */ if (dev->descriptor.bNumConfigurations == 1 && num_endpoints == 1) { status = ti_download_firmware(tdev); if (status != 0) goto free_tdev; /* 3410 must be reset, 5052 resets itself */ if (tdev->td_is_3410) { msleep_interruptible(100); usb_reset_device(dev); } status = -ENODEV; goto free_tdev; } /* the second configuration must be set */ if (dev->actconfig->desc.bConfigurationValue == TI_BOOT_CONFIG) { status = usb_driver_set_configuration(dev, TI_ACTIVE_CONFIG); status = status ? status : -ENODEV; goto free_tdev; } if (serial->num_bulk_in < serial->num_ports || serial->num_bulk_out < serial->num_ports) { dev_err(&serial->interface->dev, "missing endpoints\n"); status = -ENODEV; goto free_tdev; } return 0; free_tdev: kfree(tdev); usb_set_serial_data(serial, NULL); return status; } static void ti_release(struct usb_serial *serial) { struct ti_device *tdev = usb_get_serial_data(serial); kfree(tdev); } static int ti_port_probe(struct usb_serial_port *port) { struct ti_port *tport; tport = kzalloc(sizeof(*tport), GFP_KERNEL); if (!tport) return -ENOMEM; spin_lock_init(&tport->tp_lock); if (port == port->serial->port[0]) tport->tp_uart_base_addr = TI_UART1_BASE_ADDR; else tport->tp_uart_base_addr = TI_UART2_BASE_ADDR; tport->tp_port = port; tport->tp_tdev = usb_get_serial_data(port->serial); if (tport->tp_tdev->td_rs485_only) tport->tp_uart_mode = TI_UART_485_RECEIVER_DISABLED; else tport->tp_uart_mode = TI_UART_232; usb_set_serial_port_data(port, tport); /* * The TUSB5052 LSR does not tell when the transmitter shift register * has emptied so add a one-character drain delay. */ if (!tport->tp_tdev->td_is_3410) port->port.drain_delay = 1; return 0; } static void ti_port_remove(struct usb_serial_port *port) { struct ti_port *tport; tport = usb_get_serial_port_data(port); kfree(tport); } static int ti_open(struct tty_struct *tty, struct usb_serial_port *port) { struct ti_port *tport = usb_get_serial_port_data(port); struct ti_device *tdev; struct usb_device *dev; struct urb *urb; int status; u16 open_settings; open_settings = (TI_PIPE_MODE_CONTINUOUS | TI_PIPE_TIMEOUT_ENABLE | (TI_TRANSFER_TIMEOUT << 2)); dev = port->serial->dev; tdev = tport->tp_tdev; /* only one open on any port on a device at a time */ if (mutex_lock_interruptible(&tdev->td_open_close_lock)) return -ERESTARTSYS; tport->tp_msr = 0; tport->tp_shadow_mcr |= (TI_MCR_RTS | TI_MCR_DTR); /* start interrupt urb the first time a port is opened on this device */ if (tdev->td_open_port_count == 0) { dev_dbg(&port->dev, "%s - start interrupt in urb\n", __func__); urb = tdev->td_serial->port[0]->interrupt_in_urb; if (!urb) { dev_err(&port->dev, "%s - no interrupt urb\n", __func__); status = -EINVAL; goto release_lock; } urb->context = tdev; status = usb_submit_urb(urb, GFP_KERNEL); if (status) { dev_err(&port->dev, "%s - submit interrupt urb failed, %d\n", __func__, status); goto release_lock; } } if (tty) ti_set_termios(tty, port, &tty->termios); status = ti_port_cmd_out(port, TI_OPEN_PORT, open_settings, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot send open command, %d\n", __func__, status); goto unlink_int_urb; } status = ti_port_cmd_out(port, TI_START_PORT, 0, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot send start command, %d\n", __func__, status); goto unlink_int_urb; } status = ti_port_cmd_out(port, TI_PURGE_PORT, TI_PURGE_INPUT, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot clear input buffers, %d\n", __func__, status); goto unlink_int_urb; } status = ti_port_cmd_out(port, TI_PURGE_PORT, TI_PURGE_OUTPUT, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot clear output buffers, %d\n", __func__, status); goto unlink_int_urb; } /* reset the data toggle on the bulk endpoints to work around bug in * host controllers where things get out of sync some times */ usb_clear_halt(dev, port->write_urb->pipe); usb_clear_halt(dev, port->read_urb->pipe); if (tty) ti_set_termios(tty, port, &tty->termios); status = ti_port_cmd_out(port, TI_OPEN_PORT, open_settings, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot send open command (2), %d\n", __func__, status); goto unlink_int_urb; } status = ti_port_cmd_out(port, TI_START_PORT, 0, NULL, 0); if (status) { dev_err(&port->dev, "%s - cannot send start command (2), %d\n", __func__, status); goto unlink_int_urb; } /* start read urb */ urb = port->read_urb; if (!urb) { dev_err(&port->dev, "%s - no read urb\n", __func__); status = -EINVAL; goto unlink_int_urb; } tport->tp_read_urb_state = TI_READ_URB_RUNNING; urb->context = tport; status = usb_submit_urb(urb, GFP_KERNEL); if (status) { dev_err(&port->dev, "%s - submit read urb failed, %d\n", __func__, status); goto unlink_int_urb; } tport->tp_is_open = 1; ++tdev->td_open_port_count; goto release_lock; unlink_int_urb: if (tdev->td_open_port_count == 0) usb_kill_urb(port->serial->port[0]->interrupt_in_urb); release_lock: mutex_unlock(&tdev->td_open_close_lock); return status; } static void ti_close(struct usb_serial_port *port) { struct ti_device *tdev; struct ti_port *tport; int status; unsigned long flags; tdev = usb_get_serial_data(port->serial); tport = usb_get_serial_port_data(port); tport->tp_is_open = 0; usb_kill_urb(port->read_urb); usb_kill_urb(port->write_urb); tport->tp_write_urb_in_use = 0; spin_lock_irqsave(&tport->tp_lock, flags); kfifo_reset_out(&port->write_fifo); spin_unlock_irqrestore(&tport->tp_lock, flags); status = ti_port_cmd_out(port, TI_CLOSE_PORT, 0, NULL, 0); if (status) dev_err(&port->dev, "%s - cannot send close port command, %d\n" , __func__, status); mutex_lock(&tdev->td_open_close_lock); --tdev->td_open_port_count; if (tdev->td_open_port_count == 0) { /* last port is closed, shut down interrupt urb */ usb_kill_urb(port->serial->port[0]->interrupt_in_urb); } mutex_unlock(&tdev->td_open_close_lock); } static int ti_write(struct tty_struct *tty, struct usb_serial_port *port, const unsigned char *data, int count) { struct ti_port *tport = usb_get_serial_port_data(port); if (count == 0) { return 0; } if (!tport->tp_is_open) return -ENODEV; count = kfifo_in_locked(&port->write_fifo, data, count, &tport->tp_lock); ti_send(tport); return count; } static unsigned int ti_write_room(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); unsigned int room; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); room = kfifo_avail(&port->write_fifo); spin_unlock_irqrestore(&tport->tp_lock, flags); dev_dbg(&port->dev, "%s - returns %u\n", __func__, room); return room; } static unsigned int ti_chars_in_buffer(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); unsigned int chars; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); chars = kfifo_len(&port->write_fifo); spin_unlock_irqrestore(&tport->tp_lock, flags); dev_dbg(&port->dev, "%s - returns %u\n", __func__, chars); return chars; } static bool ti_tx_empty(struct usb_serial_port *port) { struct ti_port *tport = usb_get_serial_port_data(port); u8 lsr, mask; int ret; /* * TUSB5052 does not have the TEMT bit to tell if the shift register * is empty. */ if (tport->tp_tdev->td_is_3410) mask = TI_LSR_TX_EMPTY_BOTH; else mask = TI_LSR_TX_EMPTY; ret = ti_get_lsr(tport, &lsr); if (!ret && !(lsr & mask)) return false; return true; } static void ti_throttle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); if (I_IXOFF(tty) || C_CRTSCTS(tty)) ti_stop_read(tport, tty); } static void ti_unthrottle(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); int status; if (I_IXOFF(tty) || C_CRTSCTS(tty)) { status = ti_restart_read(tport, tty); if (status) dev_err(&port->dev, "%s - cannot restart read, %d\n", __func__, status); } } static void ti_set_termios(struct tty_struct *tty, struct usb_serial_port *port, const struct ktermios *old_termios) { struct ti_port *tport = usb_get_serial_port_data(port); struct ti_uart_config *config; int baud; int status; unsigned int mcr; u16 wbaudrate; u16 wflags = 0; config = kmalloc(sizeof(*config), GFP_KERNEL); if (!config) return; /* these flags must be set */ wflags |= TI_UART_ENABLE_MS_INTS; wflags |= TI_UART_ENABLE_AUTO_START_DMA; config->bUartMode = tport->tp_uart_mode; switch (C_CSIZE(tty)) { case CS5: config->bDataBits = TI_UART_5_DATA_BITS; break; case CS6: config->bDataBits = TI_UART_6_DATA_BITS; break; case CS7: config->bDataBits = TI_UART_7_DATA_BITS; break; default: case CS8: config->bDataBits = TI_UART_8_DATA_BITS; break; } /* CMSPAR isn't supported by this driver */ tty->termios.c_cflag &= ~CMSPAR; if (C_PARENB(tty)) { if (C_PARODD(tty)) { wflags |= TI_UART_ENABLE_PARITY_CHECKING; config->bParity = TI_UART_ODD_PARITY; } else { wflags |= TI_UART_ENABLE_PARITY_CHECKING; config->bParity = TI_UART_EVEN_PARITY; } } else { wflags &= ~TI_UART_ENABLE_PARITY_CHECKING; config->bParity = TI_UART_NO_PARITY; } if (C_CSTOPB(tty)) config->bStopBits = TI_UART_2_STOP_BITS; else config->bStopBits = TI_UART_1_STOP_BITS; if (C_CRTSCTS(tty)) { /* RTS flow control must be off to drop RTS for baud rate B0 */ if ((C_BAUD(tty)) != B0) wflags |= TI_UART_ENABLE_RTS_IN; wflags |= TI_UART_ENABLE_CTS_OUT; } else { ti_restart_read(tport, tty); } if (I_IXOFF(tty) || I_IXON(tty)) { config->cXon = START_CHAR(tty); config->cXoff = STOP_CHAR(tty); if (I_IXOFF(tty)) wflags |= TI_UART_ENABLE_X_IN; else ti_restart_read(tport, tty); if (I_IXON(tty)) wflags |= TI_UART_ENABLE_X_OUT; } baud = tty_get_baud_rate(tty); if (!baud) baud = 9600; if (tport->tp_tdev->td_is_3410) wbaudrate = (923077 + baud/2) / baud; else wbaudrate = (461538 + baud/2) / baud; /* FIXME: Should calculate resulting baud here and report it back */ if ((C_BAUD(tty)) != B0) tty_encode_baud_rate(tty, baud, baud); dev_dbg(&port->dev, "%s - BaudRate=%d, wBaudRate=%d, wFlags=0x%04X, bDataBits=%d, bParity=%d, bStopBits=%d, cXon=%d, cXoff=%d, bUartMode=%d\n", __func__, baud, wbaudrate, wflags, config->bDataBits, config->bParity, config->bStopBits, config->cXon, config->cXoff, config->bUartMode); config->wBaudRate = cpu_to_be16(wbaudrate); config->wFlags = cpu_to_be16(wflags); status = ti_port_cmd_out(port, TI_SET_CONFIG, 0, config, sizeof(*config)); if (status) dev_err(&port->dev, "%s - cannot set config on port %d, %d\n", __func__, port->port_number, status); /* SET_CONFIG asserts RTS and DTR, reset them correctly */ mcr = tport->tp_shadow_mcr; /* if baud rate is B0, clear RTS and DTR */ if (C_BAUD(tty) == B0) mcr &= ~(TI_MCR_DTR | TI_MCR_RTS); status = ti_set_mcr(tport, mcr); if (status) dev_err(&port->dev, "%s - cannot set modem control on port %d, %d\n", __func__, port->port_number, status); kfree(config); } static int ti_tiocmget(struct tty_struct *tty) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); unsigned int result; unsigned int msr; unsigned int mcr; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); msr = tport->tp_msr; mcr = tport->tp_shadow_mcr; spin_unlock_irqrestore(&tport->tp_lock, flags); result = ((mcr & TI_MCR_DTR) ? TIOCM_DTR : 0) | ((mcr & TI_MCR_RTS) ? TIOCM_RTS : 0) | ((mcr & TI_MCR_LOOP) ? TIOCM_LOOP : 0) | ((msr & TI_MSR_CTS) ? TIOCM_CTS : 0) | ((msr & TI_MSR_CD) ? TIOCM_CAR : 0) | ((msr & TI_MSR_RI) ? TIOCM_RI : 0) | ((msr & TI_MSR_DSR) ? TIOCM_DSR : 0); dev_dbg(&port->dev, "%s - 0x%04X\n", __func__, result); return result; } static int ti_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); unsigned int mcr; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); mcr = tport->tp_shadow_mcr; if (set & TIOCM_RTS) mcr |= TI_MCR_RTS; if (set & TIOCM_DTR) mcr |= TI_MCR_DTR; if (set & TIOCM_LOOP) mcr |= TI_MCR_LOOP; if (clear & TIOCM_RTS) mcr &= ~TI_MCR_RTS; if (clear & TIOCM_DTR) mcr &= ~TI_MCR_DTR; if (clear & TIOCM_LOOP) mcr &= ~TI_MCR_LOOP; spin_unlock_irqrestore(&tport->tp_lock, flags); return ti_set_mcr(tport, mcr); } static int ti_break(struct tty_struct *tty, int break_state) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); int status; dev_dbg(&port->dev, "%s - state = %d\n", __func__, break_state); status = ti_write_byte(port, tport->tp_tdev, tport->tp_uart_base_addr + TI_UART_OFFSET_LCR, TI_LCR_BREAK, break_state == -1 ? TI_LCR_BREAK : 0); if (status) { dev_dbg(&port->dev, "%s - error setting break, %d\n", __func__, status); return status; } return 0; } static int ti_get_port_from_code(unsigned char code) { return (code >> 6) & 0x01; } static int ti_get_func_from_code(unsigned char code) { return code & 0x0f; } static void ti_interrupt_callback(struct urb *urb) { struct ti_device *tdev = urb->context; struct usb_serial_port *port; struct usb_serial *serial = tdev->td_serial; struct ti_port *tport; struct device *dev = &urb->dev->dev; unsigned char *data = urb->transfer_buffer; int length = urb->actual_length; int port_number; int function; int status = urb->status; int retval; u8 msr; switch (status) { case 0: break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: dev_dbg(dev, "%s - urb shutting down, %d\n", __func__, status); return; default: dev_err(dev, "%s - nonzero urb status, %d\n", __func__, status); goto exit; } if (length != 2) { dev_dbg(dev, "%s - bad packet size, %d\n", __func__, length); goto exit; } if (data[0] == TI_CODE_HARDWARE_ERROR) { dev_err(dev, "%s - hardware error, %d\n", __func__, data[1]); goto exit; } port_number = ti_get_port_from_code(data[0]); function = ti_get_func_from_code(data[0]); dev_dbg(dev, "%s - port_number %d, function %d, data 0x%02X\n", __func__, port_number, function, data[1]); if (port_number >= serial->num_ports) { dev_err(dev, "%s - bad port number, %d\n", __func__, port_number); goto exit; } port = serial->port[port_number]; tport = usb_get_serial_port_data(port); if (!tport) goto exit; switch (function) { case TI_CODE_DATA_ERROR: dev_err(dev, "%s - DATA ERROR, port %d, data 0x%02X\n", __func__, port_number, data[1]); break; case TI_CODE_MODEM_STATUS: msr = data[1]; dev_dbg(dev, "%s - port %d, msr 0x%02X\n", __func__, port_number, msr); ti_handle_new_msr(tport, msr); break; default: dev_err(dev, "%s - unknown interrupt code, 0x%02X\n", __func__, data[1]); break; } exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval) dev_err(dev, "%s - resubmit interrupt urb failed, %d\n", __func__, retval); } static void ti_bulk_in_callback(struct urb *urb) { struct ti_port *tport = urb->context; struct usb_serial_port *port = tport->tp_port; struct device *dev = &urb->dev->dev; int status = urb->status; unsigned long flags; int retval = 0; switch (status) { case 0: break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: dev_dbg(dev, "%s - urb shutting down, %d\n", __func__, status); return; default: dev_err(dev, "%s - nonzero urb status, %d\n", __func__, status); } if (status == -EPIPE) goto exit; if (status) { dev_err(dev, "%s - stopping read!\n", __func__); return; } if (urb->actual_length) { usb_serial_debug_data(dev, __func__, urb->actual_length, urb->transfer_buffer); if (!tport->tp_is_open) dev_dbg(dev, "%s - port closed, dropping data\n", __func__); else ti_recv(port, urb->transfer_buffer, urb->actual_length); spin_lock_irqsave(&tport->tp_lock, flags); port->icount.rx += urb->actual_length; spin_unlock_irqrestore(&tport->tp_lock, flags); } exit: /* continue to read unless stopping */ spin_lock_irqsave(&tport->tp_lock, flags); if (tport->tp_read_urb_state == TI_READ_URB_RUNNING) retval = usb_submit_urb(urb, GFP_ATOMIC); else if (tport->tp_read_urb_state == TI_READ_URB_STOPPING) tport->tp_read_urb_state = TI_READ_URB_STOPPED; spin_unlock_irqrestore(&tport->tp_lock, flags); if (retval) dev_err(dev, "%s - resubmit read urb failed, %d\n", __func__, retval); } static void ti_bulk_out_callback(struct urb *urb) { struct ti_port *tport = urb->context; struct usb_serial_port *port = tport->tp_port; int status = urb->status; tport->tp_write_urb_in_use = 0; switch (status) { case 0: break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: dev_dbg(&port->dev, "%s - urb shutting down, %d\n", __func__, status); return; default: dev_err_console(port, "%s - nonzero urb status, %d\n", __func__, status); } /* send any buffered data */ ti_send(tport); } static void ti_recv(struct usb_serial_port *port, unsigned char *data, int length) { int cnt; do { cnt = tty_insert_flip_string(&port->port, data, length); if (cnt < length) { dev_err(&port->dev, "%s - dropping data, %d bytes lost\n", __func__, length - cnt); if (cnt == 0) break; } tty_flip_buffer_push(&port->port); data += cnt; length -= cnt; } while (length > 0); } static void ti_send(struct ti_port *tport) { int count, result; struct usb_serial_port *port = tport->tp_port; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); if (tport->tp_write_urb_in_use) goto unlock; count = kfifo_out(&port->write_fifo, port->write_urb->transfer_buffer, port->bulk_out_size); if (count == 0) goto unlock; tport->tp_write_urb_in_use = 1; spin_unlock_irqrestore(&tport->tp_lock, flags); usb_serial_debug_data(&port->dev, __func__, count, port->write_urb->transfer_buffer); usb_fill_bulk_urb(port->write_urb, port->serial->dev, usb_sndbulkpipe(port->serial->dev, port->bulk_out_endpointAddress), port->write_urb->transfer_buffer, count, ti_bulk_out_callback, tport); result = usb_submit_urb(port->write_urb, GFP_ATOMIC); if (result) { dev_err_console(port, "%s - submit write urb failed, %d\n", __func__, result); tport->tp_write_urb_in_use = 0; /* TODO: reschedule ti_send */ } else { spin_lock_irqsave(&tport->tp_lock, flags); port->icount.tx += count; spin_unlock_irqrestore(&tport->tp_lock, flags); } /* more room in the buffer for new writes, wakeup */ tty_port_tty_wakeup(&port->port); return; unlock: spin_unlock_irqrestore(&tport->tp_lock, flags); return; } static int ti_set_mcr(struct ti_port *tport, unsigned int mcr) { unsigned long flags; int status; status = ti_write_byte(tport->tp_port, tport->tp_tdev, tport->tp_uart_base_addr + TI_UART_OFFSET_MCR, TI_MCR_RTS | TI_MCR_DTR | TI_MCR_LOOP, mcr); spin_lock_irqsave(&tport->tp_lock, flags); if (!status) tport->tp_shadow_mcr = mcr; spin_unlock_irqrestore(&tport->tp_lock, flags); return status; } static int ti_get_lsr(struct ti_port *tport, u8 *lsr) { int size, status; struct usb_serial_port *port = tport->tp_port; struct ti_port_status *data; size = sizeof(struct ti_port_status); data = kmalloc(size, GFP_KERNEL); if (!data) return -ENOMEM; status = ti_port_cmd_in(port, TI_GET_PORT_STATUS, 0, data, size); if (status) { dev_err(&port->dev, "%s - get port status command failed, %d\n", __func__, status); goto free_data; } dev_dbg(&port->dev, "%s - lsr 0x%02X\n", __func__, data->bLSR); *lsr = data->bLSR; free_data: kfree(data); return status; } static void ti_get_serial_info(struct tty_struct *tty, struct serial_struct *ss) { struct usb_serial_port *port = tty->driver_data; struct ti_port *tport = usb_get_serial_port_data(port); ss->baud_base = tport->tp_tdev->td_is_3410 ? 921600 : 460800; } static void ti_handle_new_msr(struct ti_port *tport, u8 msr) { struct async_icount *icount; struct tty_struct *tty; unsigned long flags; dev_dbg(&tport->tp_port->dev, "%s - msr 0x%02X\n", __func__, msr); if (msr & TI_MSR_DELTA_MASK) { spin_lock_irqsave(&tport->tp_lock, flags); icount = &tport->tp_port->icount; if (msr & TI_MSR_DELTA_CTS) icount->cts++; if (msr & TI_MSR_DELTA_DSR) icount->dsr++; if (msr & TI_MSR_DELTA_CD) icount->dcd++; if (msr & TI_MSR_DELTA_RI) icount->rng++; wake_up_interruptible(&tport->tp_port->port.delta_msr_wait); spin_unlock_irqrestore(&tport->tp_lock, flags); } tport->tp_msr = msr & TI_MSR_MASK; /* handle CTS flow control */ tty = tty_port_tty_get(&tport->tp_port->port); if (tty && C_CRTSCTS(tty)) { if (msr & TI_MSR_CTS) tty_wakeup(tty); } tty_kref_put(tty); } static void ti_stop_read(struct ti_port *tport, struct tty_struct *tty) { unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); if (tport->tp_read_urb_state == TI_READ_URB_RUNNING) tport->tp_read_urb_state = TI_READ_URB_STOPPING; spin_unlock_irqrestore(&tport->tp_lock, flags); } static int ti_restart_read(struct ti_port *tport, struct tty_struct *tty) { struct urb *urb; int status = 0; unsigned long flags; spin_lock_irqsave(&tport->tp_lock, flags); if (tport->tp_read_urb_state == TI_READ_URB_STOPPED) { tport->tp_read_urb_state = TI_READ_URB_RUNNING; urb = tport->tp_port->read_urb; spin_unlock_irqrestore(&tport->tp_lock, flags); urb->context = tport; status = usb_submit_urb(urb, GFP_KERNEL); } else { tport->tp_read_urb_state = TI_READ_URB_RUNNING; spin_unlock_irqrestore(&tport->tp_lock, flags); } return status; } static int ti_command_out_sync(struct usb_device *udev, u8 command, u16 moduleid, u16 value, void *data, int size) { int status; status = usb_control_msg(udev, usb_sndctrlpipe(udev, 0), command, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_OUT, value, moduleid, data, size, 1000); if (status < 0) return status; return 0; } static int ti_command_in_sync(struct usb_device *udev, u8 command, u16 moduleid, u16 value, void *data, int size) { int status; status = usb_control_msg(udev, usb_rcvctrlpipe(udev, 0), command, USB_TYPE_VENDOR | USB_RECIP_DEVICE | USB_DIR_IN, value, moduleid, data, size, 1000); if (status == size) status = 0; else if (status >= 0) status = -ECOMM; return status; } static int ti_port_cmd_out(struct usb_serial_port *port, u8 command, u16 value, void *data, int size) { return ti_command_out_sync(port->serial->dev, command, TI_UART1_PORT + port->port_number, value, data, size); } static int ti_port_cmd_in(struct usb_serial_port *port, u8 command, u16 value, void *data, int size) { return ti_command_in_sync(port->serial->dev, command, TI_UART1_PORT + port->port_number, value, data, size); } static int ti_write_byte(struct usb_serial_port *port, struct ti_device *tdev, unsigned long addr, u8 mask, u8 byte) { int status; unsigned int size; struct ti_write_data_bytes *data; dev_dbg(&port->dev, "%s - addr 0x%08lX, mask 0x%02X, byte 0x%02X\n", __func__, addr, mask, byte); size = sizeof(struct ti_write_data_bytes) + 2; data = kmalloc(size, GFP_KERNEL); if (!data) return -ENOMEM; data->bAddrType = TI_RW_DATA_ADDR_XDATA; data->bDataType = TI_RW_DATA_BYTE; data->bDataCounter = 1; data->wBaseAddrHi = cpu_to_be16(addr>>16); data->wBaseAddrLo = cpu_to_be16(addr); data->bData[0] = mask; data->bData[1] = byte; status = ti_command_out_sync(port->serial->dev, TI_WRITE_DATA, TI_RAM_PORT, 0, data, size); if (status < 0) dev_err(&port->dev, "%s - failed, %d\n", __func__, status); kfree(data); return status; } static int ti_do_download(struct usb_device *dev, int pipe, u8 *buffer, int size) { int pos; u8 cs = 0; int done; struct ti_firmware_header *header; int status = 0; int len; for (pos = sizeof(struct ti_firmware_header); pos < size; pos++) cs = (u8)(cs + buffer[pos]); header = (struct ti_firmware_header *)buffer; header->wLength = cpu_to_le16(size - sizeof(*header)); header->bCheckSum = cs; dev_dbg(&dev->dev, "%s - downloading firmware\n", __func__); for (pos = 0; pos < size; pos += done) { len = min(size - pos, TI_DOWNLOAD_MAX_PACKET_SIZE); status = usb_bulk_msg(dev, pipe, buffer + pos, len, &done, 1000); if (status) break; } return status; } static int ti_download_firmware(struct ti_device *tdev) { int status; int buffer_size; u8 *buffer; struct usb_device *dev = tdev->td_serial->dev; unsigned int pipe = usb_sndbulkpipe(dev, tdev->td_serial->port[0]->bulk_out_endpointAddress); const struct firmware *fw_p; char buf[32]; if (le16_to_cpu(dev->descriptor.idVendor) == MXU1_VENDOR_ID) { snprintf(buf, sizeof(buf), "moxa/moxa-%04x.fw", le16_to_cpu(dev->descriptor.idProduct)); status = request_firmware(&fw_p, buf, &dev->dev); goto check_firmware; } /* try ID specific firmware first, then try generic firmware */ sprintf(buf, "ti_usb-v%04x-p%04x.fw", le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); status = request_firmware(&fw_p, buf, &dev->dev); if (status != 0) { buf[0] = '\0'; if (le16_to_cpu(dev->descriptor.idVendor) == MTS_VENDOR_ID) { switch (le16_to_cpu(dev->descriptor.idProduct)) { case MTS_CDMA_PRODUCT_ID: strcpy(buf, "mts_cdma.fw"); break; case MTS_GSM_PRODUCT_ID: strcpy(buf, "mts_gsm.fw"); break; case MTS_EDGE_PRODUCT_ID: strcpy(buf, "mts_edge.fw"); break; case MTS_MT9234MU_PRODUCT_ID: strcpy(buf, "mts_mt9234mu.fw"); break; case MTS_MT9234ZBA_PRODUCT_ID: strcpy(buf, "mts_mt9234zba.fw"); break; case MTS_MT9234ZBAOLD_PRODUCT_ID: strcpy(buf, "mts_mt9234zba.fw"); break; } } if (buf[0] == '\0') { if (tdev->td_is_3410) strcpy(buf, "ti_3410.fw"); else strcpy(buf, "ti_5052.fw"); } status = request_firmware(&fw_p, buf, &dev->dev); } check_firmware: if (status) { dev_err(&dev->dev, "%s - firmware not found\n", __func__); return -ENOENT; } if (fw_p->size > TI_FIRMWARE_BUF_SIZE) { dev_err(&dev->dev, "%s - firmware too large %zu\n", __func__, fw_p->size); release_firmware(fw_p); return -ENOENT; } buffer_size = TI_FIRMWARE_BUF_SIZE + sizeof(struct ti_firmware_header); buffer = kmalloc(buffer_size, GFP_KERNEL); if (buffer) { memcpy(buffer, fw_p->data, fw_p->size); memset(buffer + fw_p->size, 0xff, buffer_size - fw_p->size); status = ti_do_download(dev, pipe, buffer, fw_p->size); kfree(buffer); } else { status = -ENOMEM; } release_firmware(fw_p); if (status) { dev_err(&dev->dev, "%s - error downloading firmware, %d\n", __func__, status); return status; } dev_dbg(&dev->dev, "%s - download successful\n", __func__); return 0; }
2 2 1 1 1 1 1 1 1 1 4 5 32 2 1 25 2 1 1 24 2 2 5 2 17 4 3 1 11 1 5 6 11 1 5 5 1 4 5 4 1 4 4 2 2 2 1 1 2 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 // SPDX-License-Identifier: GPL-2.0+ /* * cdc-acm.c * * Copyright (c) 1999 Armin Fuerst <fuerst@in.tum.de> * Copyright (c) 1999 Pavel Machek <pavel@ucw.cz> * Copyright (c) 1999 Johannes Erdfelt <johannes@erdfelt.com> * Copyright (c) 2000 Vojtech Pavlik <vojtech@suse.cz> * Copyright (c) 2004 Oliver Neukum <oliver@neukum.name> * Copyright (c) 2005 David Kubicek <dave@awk.cz> * Copyright (c) 2011 Johan Hovold <jhovold@gmail.com> * * USB Abstract Control Model driver for USB modems and ISDN adapters * * Sponsored by SuSE */ #undef DEBUG #undef VERBOSE_DEBUG #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/errno.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/log2.h> #include <linux/tty.h> #include <linux/serial.h> #include <linux/tty_driver.h> #include <linux/tty_flip.h> #include <linux/tty_ldisc.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/uaccess.h> #include <linux/usb.h> #include <linux/usb/cdc.h> #include <asm/byteorder.h> #include <asm/unaligned.h> #include <linux/idr.h> #include <linux/list.h> #include "cdc-acm.h" #define DRIVER_AUTHOR "Armin Fuerst, Pavel Machek, Johannes Erdfelt, Vojtech Pavlik, David Kubicek, Johan Hovold" #define DRIVER_DESC "USB Abstract Control Model driver for USB modems and ISDN adapters" static struct usb_driver acm_driver; static struct tty_driver *acm_tty_driver; static DEFINE_IDR(acm_minors); static DEFINE_MUTEX(acm_minors_lock); static void acm_tty_set_termios(struct tty_struct *tty, const struct ktermios *termios_old); /* * acm_minors accessors */ /* * Look up an ACM structure by minor. If found and not disconnected, increment * its refcount and return it with its mutex held. */ static struct acm *acm_get_by_minor(unsigned int minor) { struct acm *acm; mutex_lock(&acm_minors_lock); acm = idr_find(&acm_minors, minor); if (acm) { mutex_lock(&acm->mutex); if (acm->disconnected) { mutex_unlock(&acm->mutex); acm = NULL; } else { tty_port_get(&acm->port); mutex_unlock(&acm->mutex); } } mutex_unlock(&acm_minors_lock); return acm; } /* * Try to find an available minor number and if found, associate it with 'acm'. */ static int acm_alloc_minor(struct acm *acm) { int minor; mutex_lock(&acm_minors_lock); minor = idr_alloc(&acm_minors, acm, 0, ACM_TTY_MINORS, GFP_KERNEL); mutex_unlock(&acm_minors_lock); return minor; } /* Release the minor number associated with 'acm'. */ static void acm_release_minor(struct acm *acm) { mutex_lock(&acm_minors_lock); idr_remove(&acm_minors, acm->minor); mutex_unlock(&acm_minors_lock); } /* * Functions for ACM control messages. */ static int acm_ctrl_msg(struct acm *acm, int request, int value, void *buf, int len) { int retval; retval = usb_autopm_get_interface(acm->control); if (retval) return retval; retval = usb_control_msg(acm->dev, usb_sndctrlpipe(acm->dev, 0), request, USB_RT_ACM, value, acm->control->altsetting[0].desc.bInterfaceNumber, buf, len, USB_CTRL_SET_TIMEOUT); dev_dbg(&acm->control->dev, "%s - rq 0x%02x, val %#x, len %#x, result %d\n", __func__, request, value, len, retval); usb_autopm_put_interface(acm->control); return retval < 0 ? retval : 0; } /* devices aren't required to support these requests. * the cdc acm descriptor tells whether they do... */ static inline int acm_set_control(struct acm *acm, int control) { if (acm->quirks & QUIRK_CONTROL_LINE_STATE) return -EOPNOTSUPP; return acm_ctrl_msg(acm, USB_CDC_REQ_SET_CONTROL_LINE_STATE, control, NULL, 0); } #define acm_set_line(acm, line) \ acm_ctrl_msg(acm, USB_CDC_REQ_SET_LINE_CODING, 0, line, sizeof *(line)) #define acm_send_break(acm, ms) \ acm_ctrl_msg(acm, USB_CDC_REQ_SEND_BREAK, ms, NULL, 0) static void acm_poison_urbs(struct acm *acm) { int i; usb_poison_urb(acm->ctrlurb); for (i = 0; i < ACM_NW; i++) usb_poison_urb(acm->wb[i].urb); for (i = 0; i < acm->rx_buflimit; i++) usb_poison_urb(acm->read_urbs[i]); } static void acm_unpoison_urbs(struct acm *acm) { int i; for (i = 0; i < acm->rx_buflimit; i++) usb_unpoison_urb(acm->read_urbs[i]); for (i = 0; i < ACM_NW; i++) usb_unpoison_urb(acm->wb[i].urb); usb_unpoison_urb(acm->ctrlurb); } /* * Write buffer management. * All of these assume proper locks taken by the caller. */ static int acm_wb_alloc(struct acm *acm) { int i, wbn; struct acm_wb *wb; wbn = 0; i = 0; for (;;) { wb = &acm->wb[wbn]; if (!wb->use) { wb->use = true; wb->len = 0; return wbn; } wbn = (wbn + 1) % ACM_NW; if (++i >= ACM_NW) return -1; } } static int acm_wb_is_avail(struct acm *acm) { int i, n; unsigned long flags; n = ACM_NW; spin_lock_irqsave(&acm->write_lock, flags); for (i = 0; i < ACM_NW; i++) if(acm->wb[i].use) n--; spin_unlock_irqrestore(&acm->write_lock, flags); return n; } /* * Finish write. Caller must hold acm->write_lock */ static void acm_write_done(struct acm *acm, struct acm_wb *wb) { wb->use = false; acm->transmitting--; usb_autopm_put_interface_async(acm->control); } /* * Poke write. * * the caller is responsible for locking */ static int acm_start_wb(struct acm *acm, struct acm_wb *wb) { int rc; acm->transmitting++; wb->urb->transfer_buffer = wb->buf; wb->urb->transfer_dma = wb->dmah; wb->urb->transfer_buffer_length = wb->len; wb->urb->dev = acm->dev; rc = usb_submit_urb(wb->urb, GFP_ATOMIC); if (rc < 0) { if (rc != -EPERM) dev_err(&acm->data->dev, "%s - usb_submit_urb(write bulk) failed: %d\n", __func__, rc); acm_write_done(acm, wb); } return rc; } /* * attributes exported through sysfs */ static ssize_t bmCapabilities_show (struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct acm *acm = usb_get_intfdata(intf); return sprintf(buf, "%d", acm->ctrl_caps); } static DEVICE_ATTR_RO(bmCapabilities); static ssize_t wCountryCodes_show (struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct acm *acm = usb_get_intfdata(intf); memcpy(buf, acm->country_codes, acm->country_code_size); return acm->country_code_size; } static DEVICE_ATTR_RO(wCountryCodes); static ssize_t iCountryCodeRelDate_show (struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct acm *acm = usb_get_intfdata(intf); return sprintf(buf, "%d", acm->country_rel_date); } static DEVICE_ATTR_RO(iCountryCodeRelDate); /* * Interrupt handlers for various ACM device responses */ static void acm_process_notification(struct acm *acm, unsigned char *buf) { int newctrl; int difference; unsigned long flags; struct usb_cdc_notification *dr = (struct usb_cdc_notification *)buf; unsigned char *data = buf + sizeof(struct usb_cdc_notification); switch (dr->bNotificationType) { case USB_CDC_NOTIFY_NETWORK_CONNECTION: dev_dbg(&acm->control->dev, "%s - network connection: %d\n", __func__, dr->wValue); break; case USB_CDC_NOTIFY_SERIAL_STATE: if (le16_to_cpu(dr->wLength) != 2) { dev_dbg(&acm->control->dev, "%s - malformed serial state\n", __func__); break; } newctrl = get_unaligned_le16(data); dev_dbg(&acm->control->dev, "%s - serial state: 0x%x\n", __func__, newctrl); if (!acm->clocal && (acm->ctrlin & ~newctrl & USB_CDC_SERIAL_STATE_DCD)) { dev_dbg(&acm->control->dev, "%s - calling hangup\n", __func__); tty_port_tty_hangup(&acm->port, false); } difference = acm->ctrlin ^ newctrl; if ((difference & USB_CDC_SERIAL_STATE_DCD) && acm->port.tty) { struct tty_ldisc *ld = tty_ldisc_ref(acm->port.tty); if (ld) { if (ld->ops->dcd_change) ld->ops->dcd_change(acm->port.tty, newctrl & USB_CDC_SERIAL_STATE_DCD); tty_ldisc_deref(ld); } } spin_lock_irqsave(&acm->read_lock, flags); acm->ctrlin = newctrl; acm->oldcount = acm->iocount; if (difference & USB_CDC_SERIAL_STATE_DSR) acm->iocount.dsr++; if (difference & USB_CDC_SERIAL_STATE_DCD) acm->iocount.dcd++; if (newctrl & USB_CDC_SERIAL_STATE_BREAK) { acm->iocount.brk++; tty_insert_flip_char(&acm->port, 0, TTY_BREAK); } if (newctrl & USB_CDC_SERIAL_STATE_RING_SIGNAL) acm->iocount.rng++; if (newctrl & USB_CDC_SERIAL_STATE_FRAMING) acm->iocount.frame++; if (newctrl & USB_CDC_SERIAL_STATE_PARITY) acm->iocount.parity++; if (newctrl & USB_CDC_SERIAL_STATE_OVERRUN) acm->iocount.overrun++; spin_unlock_irqrestore(&acm->read_lock, flags); if (newctrl & USB_CDC_SERIAL_STATE_BREAK) tty_flip_buffer_push(&acm->port); if (difference) wake_up_all(&acm->wioctl); break; default: dev_dbg(&acm->control->dev, "%s - unknown notification %d received: index %d len %d\n", __func__, dr->bNotificationType, dr->wIndex, dr->wLength); } } /* control interface reports status changes with "interrupt" transfers */ static void acm_ctrl_irq(struct urb *urb) { struct acm *acm = urb->context; struct usb_cdc_notification *dr = urb->transfer_buffer; unsigned int current_size = urb->actual_length; unsigned int expected_size, copy_size, alloc_size; int retval; int status = urb->status; switch (status) { case 0: /* success */ break; case -ECONNRESET: case -ENOENT: case -ESHUTDOWN: /* this urb is terminated, clean up */ dev_dbg(&acm->control->dev, "%s - urb shutting down with status: %d\n", __func__, status); return; default: dev_dbg(&acm->control->dev, "%s - nonzero urb status received: %d\n", __func__, status); goto exit; } usb_mark_last_busy(acm->dev); if (acm->nb_index) dr = (struct usb_cdc_notification *)acm->notification_buffer; /* size = notification-header + (optional) data */ expected_size = sizeof(struct usb_cdc_notification) + le16_to_cpu(dr->wLength); if (current_size < expected_size) { /* notification is transmitted fragmented, reassemble */ if (acm->nb_size < expected_size) { u8 *new_buffer; alloc_size = roundup_pow_of_two(expected_size); /* Final freeing is done on disconnect. */ new_buffer = krealloc(acm->notification_buffer, alloc_size, GFP_ATOMIC); if (!new_buffer) { acm->nb_index = 0; goto exit; } acm->notification_buffer = new_buffer; acm->nb_size = alloc_size; dr = (struct usb_cdc_notification *)acm->notification_buffer; } copy_size = min(current_size, expected_size - acm->nb_index); memcpy(&acm->notification_buffer[acm->nb_index], urb->transfer_buffer, copy_size); acm->nb_index += copy_size; current_size = acm->nb_index; } if (current_size >= expected_size) { /* notification complete */ acm_process_notification(acm, (unsigned char *)dr); acm->nb_index = 0; } exit: retval = usb_submit_urb(urb, GFP_ATOMIC); if (retval && retval != -EPERM && retval != -ENODEV) dev_err(&acm->control->dev, "%s - usb_submit_urb failed: %d\n", __func__, retval); else dev_vdbg(&acm->control->dev, "control resubmission terminated %d\n", retval); } static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags) { int res; if (!test_and_clear_bit(index, &acm->read_urbs_free)) return 0; res = usb_submit_urb(acm->read_urbs[index], mem_flags); if (res) { if (res != -EPERM && res != -ENODEV) { dev_err(&acm->data->dev, "urb %d failed submission with %d\n", index, res); } else { dev_vdbg(&acm->data->dev, "intended failure %d\n", res); } set_bit(index, &acm->read_urbs_free); return res; } else { dev_vdbg(&acm->data->dev, "submitted urb %d\n", index); } return 0; } static int acm_submit_read_urbs(struct acm *acm, gfp_t mem_flags) { int res; int i; for (i = 0; i < acm->rx_buflimit; ++i) { res = acm_submit_read_urb(acm, i, mem_flags); if (res) return res; } return 0; } static void acm_process_read_urb(struct acm *acm, struct urb *urb) { unsigned long flags; if (!urb->actual_length) return; spin_lock_irqsave(&acm->read_lock, flags); tty_insert_flip_string(&acm->port, urb->transfer_buffer, urb->actual_length); spin_unlock_irqrestore(&acm->read_lock, flags); tty_flip_buffer_push(&acm->port); } static void acm_read_bulk_callback(struct urb *urb) { struct acm_rb *rb = urb->context; struct acm *acm = rb->instance; int status = urb->status; bool stopped = false; bool stalled = false; bool cooldown = false; dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n", rb->index, urb->actual_length, status); switch (status) { case 0: usb_mark_last_busy(acm->dev); acm_process_read_urb(acm, urb); break; case -EPIPE: set_bit(EVENT_RX_STALL, &acm->flags); stalled = true; break; case -ENOENT: case -ECONNRESET: case -ESHUTDOWN: dev_dbg(&acm->data->dev, "%s - urb shutting down with status: %d\n", __func__, status); stopped = true; break; case -EOVERFLOW: case -EPROTO: dev_dbg(&acm->data->dev, "%s - cooling babbling device\n", __func__); usb_mark_last_busy(acm->dev); set_bit(rb->index, &acm->urbs_in_error_delay); set_bit(ACM_ERROR_DELAY, &acm->flags); cooldown = true; break; default: dev_dbg(&acm->data->dev, "%s - nonzero urb status received: %d\n", __func__, status); break; } /* * Make sure URB processing is done before marking as free to avoid * racing with unthrottle() on another CPU. Matches the barriers * implied by the test_and_clear_bit() in acm_submit_read_urb(). */ smp_mb__before_atomic(); set_bit(rb->index, &acm->read_urbs_free); /* * Make sure URB is marked as free before checking the throttled flag * to avoid racing with unthrottle() on another CPU. Matches the * smp_mb() in unthrottle(). */ smp_mb__after_atomic(); if (stopped || stalled || cooldown) { if (stalled) schedule_delayed_work(&acm->dwork, 0); else if (cooldown) schedule_delayed_work(&acm->dwork, HZ / 2); return; } if (test_bit(ACM_THROTTLED, &acm->flags)) return; acm_submit_read_urb(acm, rb->index, GFP_ATOMIC); } /* data interface wrote those outgoing bytes */ static void acm_write_bulk(struct urb *urb) { struct acm_wb *wb = urb->context; struct acm *acm = wb->instance; unsigned long flags; int status = urb->status; if (status || (urb->actual_length != urb->transfer_buffer_length)) dev_vdbg(&acm->data->dev, "wrote len %d/%d, status %d\n", urb->actual_length, urb->transfer_buffer_length, status); spin_lock_irqsave(&acm->write_lock, flags); acm_write_done(acm, wb); spin_unlock_irqrestore(&acm->write_lock, flags); set_bit(EVENT_TTY_WAKEUP, &acm->flags); schedule_delayed_work(&acm->dwork, 0); } static void acm_softint(struct work_struct *work) { int i; struct acm *acm = container_of(work, struct acm, dwork.work); if (test_bit(EVENT_RX_STALL, &acm->flags)) { smp_mb(); /* against acm_suspend() */ if (!acm->susp_count) { for (i = 0; i < acm->rx_buflimit; i++) usb_kill_urb(acm->read_urbs[i]); usb_clear_halt(acm->dev, acm->in); acm_submit_read_urbs(acm, GFP_KERNEL); clear_bit(EVENT_RX_STALL, &acm->flags); } } if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) { for (i = 0; i < acm->rx_buflimit; i++) if (test_and_clear_bit(i, &acm->urbs_in_error_delay)) acm_submit_read_urb(acm, i, GFP_KERNEL); } if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags)) tty_port_tty_wakeup(&acm->port); } /* * TTY handlers */ static int acm_tty_install(struct tty_driver *driver, struct tty_struct *tty) { struct acm *acm; int retval; acm = acm_get_by_minor(tty->index); if (!acm) return -ENODEV; retval = tty_standard_install(driver, tty); if (retval) goto error_init_termios; /* * Suppress initial echoing for some devices which might send data * immediately after acm driver has been installed. */ if (acm->quirks & DISABLE_ECHO) tty->termios.c_lflag &= ~ECHO; tty->driver_data = acm; return 0; error_init_termios: tty_port_put(&acm->port); return retval; } static int acm_tty_open(struct tty_struct *tty, struct file *filp) { struct acm *acm = tty->driver_data; return tty_port_open(&acm->port, tty, filp); } static void acm_port_dtr_rts(struct tty_port *port, bool active) { struct acm *acm = container_of(port, struct acm, port); int val; int res; if (active) val = USB_CDC_CTRL_DTR | USB_CDC_CTRL_RTS; else val = 0; /* FIXME: add missing ctrlout locking throughout driver */ acm->ctrlout = val; res = acm_set_control(acm, val); if (res && (acm->ctrl_caps & USB_CDC_CAP_LINE)) /* This is broken in too many devices to spam the logs */ dev_dbg(&acm->control->dev, "failed to set dtr/rts\n"); } static int acm_port_activate(struct tty_port *port, struct tty_struct *tty) { struct acm *acm = container_of(port, struct acm, port); int retval = -ENODEV; int i; mutex_lock(&acm->mutex); if (acm->disconnected) goto disconnected; retval = usb_autopm_get_interface(acm->control); if (retval) goto error_get_interface; set_bit(TTY_NO_WRITE_SPLIT, &tty->flags); acm->control->needs_remote_wakeup = 1; acm->ctrlurb->dev = acm->dev; retval = usb_submit_urb(acm->ctrlurb, GFP_KERNEL); if (retval) { dev_err(&acm->control->dev, "%s - usb_submit_urb(ctrl irq) failed\n", __func__); goto error_submit_urb; } acm_tty_set_termios(tty, NULL); /* * Unthrottle device in case the TTY was closed while throttled. */ clear_bit(ACM_THROTTLED, &acm->flags); retval = acm_submit_read_urbs(acm, GFP_KERNEL); if (retval) goto error_submit_read_urbs; usb_autopm_put_interface(acm->control); mutex_unlock(&acm->mutex); return 0; error_submit_read_urbs: for (i = 0; i < acm->rx_buflimit; i++) usb_kill_urb(acm->read_urbs[i]); usb_kill_urb(acm->ctrlurb); error_submit_urb: usb_autopm_put_interface(acm->control); error_get_interface: disconnected: mutex_unlock(&acm->mutex); return usb_translate_errors(retval); } static void acm_port_destruct(struct tty_port *port) { struct acm *acm = container_of(port, struct acm, port); if (acm->minor != ACM_MINOR_INVALID) acm_release_minor(acm); usb_put_intf(acm->control); kfree(acm->country_codes); kfree(acm); } static void acm_port_shutdown(struct tty_port *port) { struct acm *acm = container_of(port, struct acm, port); struct urb *urb; struct acm_wb *wb; /* * Need to grab write_lock to prevent race with resume, but no need to * hold it due to the tty-port initialised flag. */ acm_poison_urbs(acm); spin_lock_irq(&acm->write_lock); spin_unlock_irq(&acm->write_lock); usb_autopm_get_interface_no_resume(acm->control); acm->control->needs_remote_wakeup = 0; usb_autopm_put_interface(acm->control); for (;;) { urb = usb_get_from_anchor(&acm->delayed); if (!urb) break; wb = urb->context; wb->use = false; usb_autopm_put_interface_async(acm->control); } acm_unpoison_urbs(acm); } static void acm_tty_cleanup(struct tty_struct *tty) { struct acm *acm = tty->driver_data; tty_port_put(&acm->port); } static void acm_tty_hangup(struct tty_struct *tty) { struct acm *acm = tty->driver_data; tty_port_hangup(&acm->port); } static void acm_tty_close(struct tty_struct *tty, struct file *filp) { struct acm *acm = tty->driver_data; tty_port_close(&acm->port, tty, filp); } static ssize_t acm_tty_write(struct tty_struct *tty, const u8 *buf, size_t count) { struct acm *acm = tty->driver_data; int stat; unsigned long flags; int wbn; struct acm_wb *wb; if (!count) return 0; dev_vdbg(&acm->data->dev, "%zu bytes from tty layer\n", count); spin_lock_irqsave(&acm->write_lock, flags); wbn = acm_wb_alloc(acm); if (wbn < 0) { spin_unlock_irqrestore(&acm->write_lock, flags); return 0; } wb = &acm->wb[wbn]; if (!acm->dev) { wb->use = false; spin_unlock_irqrestore(&acm->write_lock, flags); return -ENODEV; } count = (count > acm->writesize) ? acm->writesize : count; dev_vdbg(&acm->data->dev, "writing %zu bytes\n", count); memcpy(wb->buf, buf, count); wb->len = count; stat = usb_autopm_get_interface_async(acm->control); if (stat) { wb->use = false; spin_unlock_irqrestore(&acm->write_lock, flags); return stat; } if (acm->susp_count) { usb_anchor_urb(wb->urb, &acm->delayed); spin_unlock_irqrestore(&acm->write_lock, flags); return count; } stat = acm_start_wb(acm, wb); spin_unlock_irqrestore(&acm->write_lock, flags); if (stat < 0) return stat; return count; } static unsigned int acm_tty_write_room(struct tty_struct *tty) { struct acm *acm = tty->driver_data; /* * Do not let the line discipline to know that we have a reserve, * or it might get too enthusiastic. */ return acm_wb_is_avail(acm) ? acm->writesize : 0; } static void acm_tty_flush_buffer(struct tty_struct *tty) { struct acm *acm = tty->driver_data; unsigned long flags; int i; spin_lock_irqsave(&acm->write_lock, flags); for (i = 0; i < ACM_NW; i++) if (acm->wb[i].use) usb_unlink_urb(acm->wb[i].urb); spin_unlock_irqrestore(&acm->write_lock, flags); } static unsigned int acm_tty_chars_in_buffer(struct tty_struct *tty) { struct acm *acm = tty->driver_data; /* * if the device was unplugged then any remaining characters fell out * of the connector ;) */ if (acm->disconnected) return 0; /* * This is inaccurate (overcounts), but it works. */ return (ACM_NW - acm_wb_is_avail(acm)) * acm->writesize; } static void acm_tty_throttle(struct tty_struct *tty) { struct acm *acm = tty->driver_data; set_bit(ACM_THROTTLED, &acm->flags); } static void acm_tty_unthrottle(struct tty_struct *tty) { struct acm *acm = tty->driver_data; clear_bit(ACM_THROTTLED, &acm->flags); /* Matches the smp_mb__after_atomic() in acm_read_bulk_callback(). */ smp_mb(); acm_submit_read_urbs(acm, GFP_KERNEL); } static int acm_tty_break_ctl(struct tty_struct *tty, int state) { struct acm *acm = tty->driver_data; int retval; if (!(acm->ctrl_caps & USB_CDC_CAP_BRK)) return -EOPNOTSUPP; retval = acm_send_break(acm, state ? 0xffff : 0); if (retval < 0) dev_dbg(&acm->control->dev, "%s - send break failed\n", __func__); return retval; } static int acm_tty_tiocmget(struct tty_struct *tty) { struct acm *acm = tty->driver_data; return (acm->ctrlout & USB_CDC_CTRL_DTR ? TIOCM_DTR : 0) | (acm->ctrlout & USB_CDC_CTRL_RTS ? TIOCM_RTS : 0) | (acm->ctrlin & USB_CDC_SERIAL_STATE_DSR ? TIOCM_DSR : 0) | (acm->ctrlin & USB_CDC_SERIAL_STATE_RING_SIGNAL ? TIOCM_RI : 0) | (acm->ctrlin & USB_CDC_SERIAL_STATE_DCD ? TIOCM_CD : 0) | TIOCM_CTS; } static int acm_tty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct acm *acm = tty->driver_data; unsigned int newctrl; newctrl = acm->ctrlout; set = (set & TIOCM_DTR ? USB_CDC_CTRL_DTR : 0) | (set & TIOCM_RTS ? USB_CDC_CTRL_RTS : 0); clear = (clear & TIOCM_DTR ? USB_CDC_CTRL_DTR : 0) | (clear & TIOCM_RTS ? USB_CDC_CTRL_RTS : 0); newctrl = (newctrl & ~clear) | set; if (acm->ctrlout == newctrl) return 0; return acm_set_control(acm, acm->ctrlout = newctrl); } static int get_serial_info(struct tty_struct *tty, struct serial_struct *ss) { struct acm *acm = tty->driver_data; ss->line = acm->minor; ss->close_delay = jiffies_to_msecs(acm->port.close_delay) / 10; ss->closing_wait = acm->port.closing_wait == ASYNC_CLOSING_WAIT_NONE ? ASYNC_CLOSING_WAIT_NONE : jiffies_to_msecs(acm->port.closing_wait) / 10; return 0; } static int set_serial_info(struct tty_struct *tty, struct serial_struct *ss) { struct acm *acm = tty->driver_data; unsigned int closing_wait, close_delay; int retval = 0; close_delay = msecs_to_jiffies(ss->close_delay * 10); closing_wait = ss->closing_wait == ASYNC_CLOSING_WAIT_NONE ? ASYNC_CLOSING_WAIT_NONE : msecs_to_jiffies(ss->closing_wait * 10); mutex_lock(&acm->port.mutex); if (!capable(CAP_SYS_ADMIN)) { if ((close_delay != acm->port.close_delay) || (closing_wait != acm->port.closing_wait)) retval = -EPERM; } else { acm->port.close_delay = close_delay; acm->port.closing_wait = closing_wait; } mutex_unlock(&acm->port.mutex); return retval; } static int wait_serial_change(struct acm *acm, unsigned long arg) { int rv = 0; DECLARE_WAITQUEUE(wait, current); struct async_icount old, new; do { spin_lock_irq(&acm->read_lock); old = acm->oldcount; new = acm->iocount; acm->oldcount = new; spin_unlock_irq(&acm->read_lock); if ((arg & TIOCM_DSR) && old.dsr != new.dsr) break; if ((arg & TIOCM_CD) && old.dcd != new.dcd) break; if ((arg & TIOCM_RI) && old.rng != new.rng) break; add_wait_queue(&acm->wioctl, &wait); set_current_state(TASK_INTERRUPTIBLE); schedule(); remove_wait_queue(&acm->wioctl, &wait); if (acm->disconnected) { if (arg & TIOCM_CD) break; else rv = -ENODEV; } else { if (signal_pending(current)) rv = -ERESTARTSYS; } } while (!rv); return rv; } static int acm_tty_get_icount(struct tty_struct *tty, struct serial_icounter_struct *icount) { struct acm *acm = tty->driver_data; icount->dsr = acm->iocount.dsr; icount->rng = acm->iocount.rng; icount->dcd = acm->iocount.dcd; icount->frame = acm->iocount.frame; icount->overrun = acm->iocount.overrun; icount->parity = acm->iocount.parity; icount->brk = acm->iocount.brk; return 0; } static int acm_tty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct acm *acm = tty->driver_data; int rv = -ENOIOCTLCMD; switch (cmd) { case TIOCMIWAIT: rv = usb_autopm_get_interface(acm->control); if (rv < 0) { rv = -EIO; break; } rv = wait_serial_change(acm, arg); usb_autopm_put_interface(acm->control); break; } return rv; } static void acm_tty_set_termios(struct tty_struct *tty, const struct ktermios *termios_old) { struct acm *acm = tty->driver_data; struct ktermios *termios = &tty->termios; struct usb_cdc_line_coding newline; int newctrl = acm->ctrlout; newline.dwDTERate = cpu_to_le32(tty_get_baud_rate(tty)); newline.bCharFormat = termios->c_cflag & CSTOPB ? 2 : 0; newline.bParityType = termios->c_cflag & PARENB ? (termios->c_cflag & PARODD ? 1 : 2) + (termios->c_cflag & CMSPAR ? 2 : 0) : 0; newline.bDataBits = tty_get_char_size(termios->c_cflag); /* FIXME: Needs to clear unsupported bits in the termios */ acm->clocal = ((termios->c_cflag & CLOCAL) != 0); if (C_BAUD(tty) == B0) { newline.dwDTERate = acm->line.dwDTERate; newctrl &= ~USB_CDC_CTRL_DTR; } else if (termios_old && (termios_old->c_cflag & CBAUD) == B0) { newctrl |= USB_CDC_CTRL_DTR; } if (newctrl != acm->ctrlout) acm_set_control(acm, acm->ctrlout = newctrl); if (memcmp(&acm->line, &newline, sizeof newline)) { memcpy(&acm->line, &newline, sizeof newline); dev_dbg(&acm->control->dev, "%s - set line: %d %d %d %d\n", __func__, le32_to_cpu(newline.dwDTERate), newline.bCharFormat, newline.bParityType, newline.bDataBits); acm_set_line(acm, &acm->line); } } static const struct tty_port_operations acm_port_ops = { .dtr_rts = acm_port_dtr_rts, .shutdown = acm_port_shutdown, .activate = acm_port_activate, .destruct = acm_port_destruct, }; /* * USB probe and disconnect routines. */ /* Little helpers: write/read buffers free */ static void acm_write_buffers_free(struct acm *acm) { int i; struct acm_wb *wb; for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) usb_free_coherent(acm->dev, acm->writesize, wb->buf, wb->dmah); } static void acm_read_buffers_free(struct acm *acm) { int i; for (i = 0; i < acm->rx_buflimit; i++) usb_free_coherent(acm->dev, acm->readsize, acm->read_buffers[i].base, acm->read_buffers[i].dma); } /* Little helper: write buffers allocate */ static int acm_write_buffers_alloc(struct acm *acm) { int i; struct acm_wb *wb; for (wb = &acm->wb[0], i = 0; i < ACM_NW; i++, wb++) { wb->buf = usb_alloc_coherent(acm->dev, acm->writesize, GFP_KERNEL, &wb->dmah); if (!wb->buf) { while (i != 0) { --i; --wb; usb_free_coherent(acm->dev, acm->writesize, wb->buf, wb->dmah); } return -ENOMEM; } } return 0; } static int acm_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_cdc_union_desc *union_header = NULL; struct usb_cdc_call_mgmt_descriptor *cmgmd = NULL; unsigned char *buffer = intf->altsetting->extra; int buflen = intf->altsetting->extralen; struct usb_interface *control_interface; struct usb_interface *data_interface; struct usb_endpoint_descriptor *epctrl = NULL; struct usb_endpoint_descriptor *epread = NULL; struct usb_endpoint_descriptor *epwrite = NULL; struct usb_device *usb_dev = interface_to_usbdev(intf); struct usb_cdc_parsed_header h; struct acm *acm; int minor; int ctrlsize, readsize; u8 *buf; int call_intf_num = -1; int data_intf_num = -1; unsigned long quirks; int num_rx_buf; int i; int combined_interfaces = 0; struct device *tty_dev; int rv = -ENOMEM; int res; /* normal quirks */ quirks = (unsigned long)id->driver_info; if (quirks == IGNORE_DEVICE) return -ENODEV; memset(&h, 0x00, sizeof(struct usb_cdc_parsed_header)); num_rx_buf = (quirks == SINGLE_RX_URB) ? 1 : ACM_NR; /* handle quirks deadly to normal probing*/ if (quirks == NO_UNION_NORMAL) { data_interface = usb_ifnum_to_if(usb_dev, 1); control_interface = usb_ifnum_to_if(usb_dev, 0); /* we would crash */ if (!data_interface || !control_interface) return -ENODEV; goto skip_normal_probe; } /* normal probing*/ if (!buffer) { dev_err(&intf->dev, "Weird descriptor references\n"); return -EINVAL; } if (!buflen) { if (intf->cur_altsetting->endpoint && intf->cur_altsetting->endpoint->extralen && intf->cur_altsetting->endpoint->extra) { dev_dbg(&intf->dev, "Seeking extra descriptors on endpoint\n"); buflen = intf->cur_altsetting->endpoint->extralen; buffer = intf->cur_altsetting->endpoint->extra; } else { dev_err(&intf->dev, "Zero length descriptor references\n"); return -EINVAL; } } cdc_parse_cdc_header(&h, intf, buffer, buflen); union_header = h.usb_cdc_union_desc; cmgmd = h.usb_cdc_call_mgmt_descriptor; if (cmgmd) call_intf_num = cmgmd->bDataInterface; if (!union_header) { if (intf->cur_altsetting->desc.bNumEndpoints == 3) { dev_dbg(&intf->dev, "No union descriptor, assuming single interface\n"); combined_interfaces = 1; control_interface = data_interface = intf; goto look_for_collapsed_interface; } else if (call_intf_num > 0) { dev_dbg(&intf->dev, "No union descriptor, using call management descriptor\n"); data_intf_num = call_intf_num; data_interface = usb_ifnum_to_if(usb_dev, data_intf_num); control_interface = intf; } else { dev_dbg(&intf->dev, "No union descriptor, giving up\n"); return -ENODEV; } } else { int class = -1; data_intf_num = union_header->bSlaveInterface0; control_interface = usb_ifnum_to_if(usb_dev, union_header->bMasterInterface0); data_interface = usb_ifnum_to_if(usb_dev, data_intf_num); if (control_interface) class = control_interface->cur_altsetting->desc.bInterfaceClass; if (class != USB_CLASS_COMM && class != USB_CLASS_CDC_DATA) { dev_dbg(&intf->dev, "Broken union descriptor, assuming single interface\n"); combined_interfaces = 1; control_interface = data_interface = intf; goto look_for_collapsed_interface; } } if (!control_interface || !data_interface) { dev_dbg(&intf->dev, "no interfaces\n"); return -ENODEV; } if (data_intf_num != call_intf_num) dev_dbg(&intf->dev, "Separate call control interface. That is not fully supported.\n"); if (control_interface == data_interface) { /* some broken devices designed for windows work this way */ dev_warn(&intf->dev,"Control and data interfaces are not separated!\n"); combined_interfaces = 1; /* a popular other OS doesn't use it */ quirks |= NO_CAP_LINE; if (data_interface->cur_altsetting->desc.bNumEndpoints != 3) { dev_err(&intf->dev, "This needs exactly 3 endpoints\n"); return -EINVAL; } look_for_collapsed_interface: res = usb_find_common_endpoints(data_interface->cur_altsetting, &epread, &epwrite, &epctrl, NULL); if (res) return res; goto made_compressed_probe; } skip_normal_probe: /*workaround for switched interfaces */ if (data_interface->cur_altsetting->desc.bInterfaceClass != USB_CLASS_CDC_DATA) { if (control_interface->cur_altsetting->desc.bInterfaceClass == USB_CLASS_CDC_DATA) { dev_dbg(&intf->dev, "Your device has switched interfaces.\n"); swap(control_interface, data_interface); } else { return -EINVAL; } } /* Accept probe requests only for the control interface */ if (!combined_interfaces && intf != control_interface) return -ENODEV; if (data_interface->cur_altsetting->desc.bNumEndpoints < 2 || control_interface->cur_altsetting->desc.bNumEndpoints == 0) return -EINVAL; epctrl = &control_interface->cur_altsetting->endpoint[0].desc; epread = &data_interface->cur_altsetting->endpoint[0].desc; epwrite = &data_interface->cur_altsetting->endpoint[1].desc; /* workaround for switched endpoints */ if (!usb_endpoint_dir_in(epread)) { /* descriptors are swapped */ dev_dbg(&intf->dev, "The data interface has switched endpoints\n"); swap(epread, epwrite); } made_compressed_probe: dev_dbg(&intf->dev, "interfaces are valid\n"); acm = kzalloc(sizeof(struct acm), GFP_KERNEL); if (!acm) return -ENOMEM; tty_port_init(&acm->port); acm->port.ops = &acm_port_ops; ctrlsize = usb_endpoint_maxp(epctrl); readsize = usb_endpoint_maxp(epread) * (quirks == SINGLE_RX_URB ? 1 : 2); acm->combined_interfaces = combined_interfaces; acm->writesize = usb_endpoint_maxp(epwrite) * 20; acm->control = control_interface; acm->data = data_interface; usb_get_intf(acm->control); /* undone in destruct() */ minor = acm_alloc_minor(acm); if (minor < 0) { acm->minor = ACM_MINOR_INVALID; goto err_put_port; } acm->minor = minor; acm->dev = usb_dev; if (h.usb_cdc_acm_descriptor) acm->ctrl_caps = h.usb_cdc_acm_descriptor->bmCapabilities; if (quirks & NO_CAP_LINE) acm->ctrl_caps &= ~USB_CDC_CAP_LINE; acm->ctrlsize = ctrlsize; acm->readsize = readsize; acm->rx_buflimit = num_rx_buf; INIT_DELAYED_WORK(&acm->dwork, acm_softint); init_waitqueue_head(&acm->wioctl); spin_lock_init(&acm->write_lock); spin_lock_init(&acm->read_lock); mutex_init(&acm->mutex); if (usb_endpoint_xfer_int(epread)) { acm->bInterval = epread->bInterval; acm->in = usb_rcvintpipe(usb_dev, epread->bEndpointAddress); } else { acm->in = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress); } if (usb_endpoint_xfer_int(epwrite)) acm->out = usb_sndintpipe(usb_dev, epwrite->bEndpointAddress); else acm->out = usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress); init_usb_anchor(&acm->delayed); acm->quirks = quirks; buf = usb_alloc_coherent(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma); if (!buf) goto err_put_port; acm->ctrl_buffer = buf; if (acm_write_buffers_alloc(acm) < 0) goto err_free_ctrl_buffer; acm->ctrlurb = usb_alloc_urb(0, GFP_KERNEL); if (!acm->ctrlurb) goto err_free_write_buffers; for (i = 0; i < num_rx_buf; i++) { struct acm_rb *rb = &(acm->read_buffers[i]); struct urb *urb; rb->base = usb_alloc_coherent(acm->dev, readsize, GFP_KERNEL, &rb->dma); if (!rb->base) goto err_free_read_urbs; rb->index = i; rb->instance = acm; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) goto err_free_read_urbs; urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; urb->transfer_dma = rb->dma; if (usb_endpoint_xfer_int(epread)) usb_fill_int_urb(urb, acm->dev, acm->in, rb->base, acm->readsize, acm_read_bulk_callback, rb, acm->bInterval); else usb_fill_bulk_urb(urb, acm->dev, acm->in, rb->base, acm->readsize, acm_read_bulk_callback, rb); acm->read_urbs[i] = urb; __set_bit(i, &acm->read_urbs_free); } for (i = 0; i < ACM_NW; i++) { struct acm_wb *snd = &(acm->wb[i]); snd->urb = usb_alloc_urb(0, GFP_KERNEL); if (!snd->urb) goto err_free_write_urbs; if (usb_endpoint_xfer_int(epwrite)) usb_fill_int_urb(snd->urb, usb_dev, acm->out, NULL, acm->writesize, acm_write_bulk, snd, epwrite->bInterval); else usb_fill_bulk_urb(snd->urb, usb_dev, acm->out, NULL, acm->writesize, acm_write_bulk, snd); snd->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; if (quirks & SEND_ZERO_PACKET) snd->urb->transfer_flags |= URB_ZERO_PACKET; snd->instance = acm; } usb_set_intfdata(intf, acm); i = device_create_file(&intf->dev, &dev_attr_bmCapabilities); if (i < 0) goto err_free_write_urbs; if (h.usb_cdc_country_functional_desc) { /* export the country data */ struct usb_cdc_country_functional_desc * cfd = h.usb_cdc_country_functional_desc; acm->country_codes = kmalloc(cfd->bLength - 4, GFP_KERNEL); if (!acm->country_codes) goto skip_countries; acm->country_code_size = cfd->bLength - 4; memcpy(acm->country_codes, (u8 *)&cfd->wCountyCode0, cfd->bLength - 4); acm->country_rel_date = cfd->iCountryCodeRelDate; i = device_create_file(&intf->dev, &dev_attr_wCountryCodes); if (i < 0) { kfree(acm->country_codes); acm->country_codes = NULL; acm->country_code_size = 0; goto skip_countries; } i = device_create_file(&intf->dev, &dev_attr_iCountryCodeRelDate); if (i < 0) { device_remove_file(&intf->dev, &dev_attr_wCountryCodes); kfree(acm->country_codes); acm->country_codes = NULL; acm->country_code_size = 0; goto skip_countries; } } skip_countries: usb_fill_int_urb(acm->ctrlurb, usb_dev, usb_rcvintpipe(usb_dev, epctrl->bEndpointAddress), acm->ctrl_buffer, ctrlsize, acm_ctrl_irq, acm, /* works around buggy devices */ epctrl->bInterval ? epctrl->bInterval : 16); acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; acm->ctrlurb->transfer_dma = acm->ctrl_dma; acm->notification_buffer = NULL; acm->nb_index = 0; acm->nb_size = 0; acm->line.dwDTERate = cpu_to_le32(9600); acm->line.bDataBits = 8; acm_set_line(acm, &acm->line); if (!acm->combined_interfaces) { rv = usb_driver_claim_interface(&acm_driver, data_interface, acm); if (rv) goto err_remove_files; } tty_dev = tty_port_register_device(&acm->port, acm_tty_driver, minor, &control_interface->dev); if (IS_ERR(tty_dev)) { rv = PTR_ERR(tty_dev); goto err_release_data_interface; } if (quirks & CLEAR_HALT_CONDITIONS) { usb_clear_halt(usb_dev, acm->in); usb_clear_halt(usb_dev, acm->out); } dev_info(&intf->dev, "ttyACM%d: USB ACM device\n", minor); return 0; err_release_data_interface: if (!acm->combined_interfaces) { /* Clear driver data so that disconnect() returns early. */ usb_set_intfdata(data_interface, NULL); usb_driver_release_interface(&acm_driver, data_interface); } err_remove_files: if (acm->country_codes) { device_remove_file(&acm->control->dev, &dev_attr_wCountryCodes); device_remove_file(&acm->control->dev, &dev_attr_iCountryCodeRelDate); } device_remove_file(&acm->control->dev, &dev_attr_bmCapabilities); err_free_write_urbs: for (i = 0; i < ACM_NW; i++) usb_free_urb(acm->wb[i].urb); err_free_read_urbs: for (i = 0; i < num_rx_buf; i++) usb_free_urb(acm->read_urbs[i]); acm_read_buffers_free(acm); usb_free_urb(acm->ctrlurb); err_free_write_buffers: acm_write_buffers_free(acm); err_free_ctrl_buffer: usb_free_coherent(usb_dev, ctrlsize, acm->ctrl_buffer, acm->ctrl_dma); err_put_port: tty_port_put(&acm->port); return rv; } static void acm_disconnect(struct usb_interface *intf) { struct acm *acm = usb_get_intfdata(intf); struct tty_struct *tty; int i; /* sibling interface is already cleaning up */ if (!acm) return; acm->disconnected = true; /* * there is a circular dependency. acm_softint() can resubmit * the URBs in error handling so we need to block any * submission right away */ acm_poison_urbs(acm); mutex_lock(&acm->mutex); if (acm->country_codes) { device_remove_file(&acm->control->dev, &dev_attr_wCountryCodes); device_remove_file(&acm->control->dev, &dev_attr_iCountryCodeRelDate); } wake_up_all(&acm->wioctl); device_remove_file(&acm->control->dev, &dev_attr_bmCapabilities); usb_set_intfdata(acm->control, NULL); usb_set_intfdata(acm->data, NULL); mutex_unlock(&acm->mutex); tty = tty_port_tty_get(&acm->port); if (tty) { tty_vhangup(tty); tty_kref_put(tty); } cancel_delayed_work_sync(&acm->dwork); tty_unregister_device(acm_tty_driver, acm->minor); usb_free_urb(acm->ctrlurb); for (i = 0; i < ACM_NW; i++) usb_free_urb(acm->wb[i].urb); for (i = 0; i < acm->rx_buflimit; i++) usb_free_urb(acm->read_urbs[i]); acm_write_buffers_free(acm); usb_free_coherent(acm->dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma); acm_read_buffers_free(acm); kfree(acm->notification_buffer); if (!acm->combined_interfaces) usb_driver_release_interface(&acm_driver, intf == acm->control ? acm->data : acm->control); tty_port_put(&acm->port); } #ifdef CONFIG_PM static int acm_suspend(struct usb_interface *intf, pm_message_t message) { struct acm *acm = usb_get_intfdata(intf); int cnt; spin_lock_irq(&acm->write_lock); if (PMSG_IS_AUTO(message)) { if (acm->transmitting) { spin_unlock_irq(&acm->write_lock); return -EBUSY; } } cnt = acm->susp_count++; spin_unlock_irq(&acm->write_lock); if (cnt) return 0; acm_poison_urbs(acm); cancel_delayed_work_sync(&acm->dwork); acm->urbs_in_error_delay = 0; return 0; } static int acm_resume(struct usb_interface *intf) { struct acm *acm = usb_get_intfdata(intf); struct urb *urb; int rv = 0; spin_lock_irq(&acm->write_lock); if (--acm->susp_count) goto out; acm_unpoison_urbs(acm); if (tty_port_initialized(&acm->port)) { rv = usb_submit_urb(acm->ctrlurb, GFP_ATOMIC); for (;;) { urb = usb_get_from_anchor(&acm->delayed); if (!urb) break; acm_start_wb(acm, urb->context); } /* * delayed error checking because we must * do the write path at all cost */ if (rv < 0) goto out; rv = acm_submit_read_urbs(acm, GFP_ATOMIC); } out: spin_unlock_irq(&acm->write_lock); return rv; } static int acm_reset_resume(struct usb_interface *intf) { struct acm *acm = usb_get_intfdata(intf); if (tty_port_initialized(&acm->port)) tty_port_tty_hangup(&acm->port, false); return acm_resume(intf); } #endif /* CONFIG_PM */ static int acm_pre_reset(struct usb_interface *intf) { struct acm *acm = usb_get_intfdata(intf); clear_bit(EVENT_RX_STALL, &acm->flags); acm->nb_index = 0; /* pending control transfers are lost */ return 0; } #define NOKIA_PCSUITE_ACM_INFO(x) \ USB_DEVICE_AND_INTERFACE_INFO(0x0421, x, \ USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, \ USB_CDC_ACM_PROTO_VENDOR) #define SAMSUNG_PCSUITE_ACM_INFO(x) \ USB_DEVICE_AND_INTERFACE_INFO(0x04e7, x, \ USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, \ USB_CDC_ACM_PROTO_VENDOR) /* * USB driver structure. */ static const struct usb_device_id acm_ids[] = { /* quirky and broken devices */ { USB_DEVICE(0x0424, 0x274e), /* Microchip Technology, Inc. (formerly SMSC) */ .driver_info = DISABLE_ECHO, }, /* DISABLE ECHO in termios flag */ { USB_DEVICE(0x076d, 0x0006), /* Denso Cradle CU-321 */ .driver_info = NO_UNION_NORMAL, },/* has no union descriptor */ { USB_DEVICE(0x17ef, 0x7000), /* Lenovo USB modem */ .driver_info = NO_UNION_NORMAL, },/* has no union descriptor */ { USB_DEVICE(0x0870, 0x0001), /* Metricom GS Modem */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x045b, 0x023c), /* Renesas USB Download mode */ .driver_info = DISABLE_ECHO, /* Don't echo banner */ }, { USB_DEVICE(0x045b, 0x0248), /* Renesas USB Download mode */ .driver_info = DISABLE_ECHO, /* Don't echo banner */ }, { USB_DEVICE(0x045b, 0x024D), /* Renesas USB Download mode */ .driver_info = DISABLE_ECHO, /* Don't echo banner */ }, { USB_DEVICE(0x0e8d, 0x0003), /* FIREFLY, MediaTek Inc; andrey.arapov@gmail.com */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x0e8d, 0x2000), /* MediaTek Inc Preloader */ .driver_info = DISABLE_ECHO, /* DISABLE ECHO in termios flag */ }, { USB_DEVICE(0x0e8d, 0x3329), /* MediaTek Inc GPS */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x0482, 0x0203), /* KYOCERA AH-K3001V */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x079b, 0x000f), /* BT On-Air USB MODEM */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x0ace, 0x1602), /* ZyDAS 56K USB MODEM */ .driver_info = SINGLE_RX_URB, }, { USB_DEVICE(0x0ace, 0x1608), /* ZyDAS 56K USB MODEM */ .driver_info = SINGLE_RX_URB, /* firmware bug */ }, { USB_DEVICE(0x0ace, 0x1611), /* ZyDAS 56K USB MODEM - new version */ .driver_info = SINGLE_RX_URB, /* firmware bug */ }, { USB_DEVICE(0x11ca, 0x0201), /* VeriFone Mx870 Gadget Serial */ .driver_info = SINGLE_RX_URB, }, { USB_DEVICE(0x1965, 0x0018), /* Uniden UBC125XLT */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x22b8, 0x7000), /* Motorola Q Phone */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x0803, 0x3095), /* Zoom Telephonics Model 3095F USB MODEM */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x0572, 0x1321), /* Conexant USB MODEM CX93010 */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x0572, 0x1324), /* Conexant USB MODEM RD02-D400 */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x0572, 0x1328), /* Shiro / Aztech USB MODEM UM-3100 */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x0572, 0x1349), /* Hiro (Conexant) USB MODEM H50228 */ .driver_info = NO_UNION_NORMAL, /* has no union descriptor */ }, { USB_DEVICE(0x20df, 0x0001), /* Simtec Electronics Entropy Key */ .driver_info = QUIRK_CONTROL_LINE_STATE, }, { USB_DEVICE(0x2184, 0x001c) }, /* GW Instek AFG-2225 */ { USB_DEVICE(0x2184, 0x0036) }, /* GW Instek AFG-125 */ { USB_DEVICE(0x22b8, 0x6425), /* Motorola MOTOMAGX phones */ }, /* Motorola H24 HSPA module: */ { USB_DEVICE(0x22b8, 0x2d91) }, /* modem */ { USB_DEVICE(0x22b8, 0x2d92), /* modem + diagnostics */ .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ }, { USB_DEVICE(0x22b8, 0x2d93), /* modem + AT port */ .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ }, { USB_DEVICE(0x22b8, 0x2d95), /* modem + AT port + diagnostics */ .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ }, { USB_DEVICE(0x22b8, 0x2d96), /* modem + NMEA */ .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ }, { USB_DEVICE(0x22b8, 0x2d97), /* modem + diagnostics + NMEA */ .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ }, { USB_DEVICE(0x22b8, 0x2d99), /* modem + AT port + NMEA */ .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ }, { USB_DEVICE(0x22b8, 0x2d9a), /* modem + AT port + diagnostics + NMEA */ .driver_info = NO_UNION_NORMAL, /* handle only modem interface */ }, { USB_DEVICE(0x0572, 0x1329), /* Hummingbird huc56s (Conexant) */ .driver_info = NO_UNION_NORMAL, /* union descriptor misplaced on data interface instead of communications interface. Maybe we should define a new quirk for this. */ }, { USB_DEVICE(0x0572, 0x1340), /* Conexant CX93010-2x UCMxx */ .driver_info = NO_UNION_NORMAL, }, { USB_DEVICE(0x05f9, 0x4002), /* PSC Scanning, Magellan 800i */ .driver_info = NO_UNION_NORMAL, }, { USB_DEVICE(0x1bbb, 0x0003), /* Alcatel OT-I650 */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, { USB_DEVICE(0x1576, 0x03b1), /* Maretron USB100 */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, { USB_DEVICE(0xfff0, 0x0100), /* DATECS FP-2000 */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, { USB_DEVICE(0x09d8, 0x0320), /* Elatec GmbH TWN3 */ .driver_info = NO_UNION_NORMAL, /* has misplaced union descriptor */ }, { USB_DEVICE(0x0c26, 0x0020), /* Icom ICF3400 Serie */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, { USB_DEVICE(0x0ca6, 0xa050), /* Castles VEGA3000 */ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */ }, { USB_DEVICE(0x2912, 0x0001), /* ATOL FPrint */ .driver_info = CLEAR_HALT_CONDITIONS, }, /* Nokia S60 phones expose two ACM channels. The first is * a modem and is picked up by the standard AT-command * information below. The second is 'vendor-specific' but * is treated as a serial device at the S60 end, so we want * to expose it on Linux too. */ { NOKIA_PCSUITE_ACM_INFO(0x042D), }, /* Nokia 3250 */ { NOKIA_PCSUITE_ACM_INFO(0x04D8), }, /* Nokia 5500 Sport */ { NOKIA_PCSUITE_ACM_INFO(0x04C9), }, /* Nokia E50 */ { NOKIA_PCSUITE_ACM_INFO(0x0419), }, /* Nokia E60 */ { NOKIA_PCSUITE_ACM_INFO(0x044D), }, /* Nokia E61 */ { NOKIA_PCSUITE_ACM_INFO(0x0001), }, /* Nokia E61i */ { NOKIA_PCSUITE_ACM_INFO(0x0475), }, /* Nokia E62 */ { NOKIA_PCSUITE_ACM_INFO(0x0508), }, /* Nokia E65 */ { NOKIA_PCSUITE_ACM_INFO(0x0418), }, /* Nokia E70 */ { NOKIA_PCSUITE_ACM_INFO(0x0425), }, /* Nokia N71 */ { NOKIA_PCSUITE_ACM_INFO(0x0486), }, /* Nokia N73 */ { NOKIA_PCSUITE_ACM_INFO(0x04DF), }, /* Nokia N75 */ { NOKIA_PCSUITE_ACM_INFO(0x000e), }, /* Nokia N77 */ { NOKIA_PCSUITE_ACM_INFO(0x0445), }, /* Nokia N80 */ { NOKIA_PCSUITE_ACM_INFO(0x042F), }, /* Nokia N91 & N91 8GB */ { NOKIA_PCSUITE_ACM_INFO(0x048E), }, /* Nokia N92 */ { NOKIA_PCSUITE_ACM_INFO(0x0420), }, /* Nokia N93 */ { NOKIA_PCSUITE_ACM_INFO(0x04E6), }, /* Nokia N93i */ { NOKIA_PCSUITE_ACM_INFO(0x04B2), }, /* Nokia 5700 XpressMusic */ { NOKIA_PCSUITE_ACM_INFO(0x0134), }, /* Nokia 6110 Navigator (China) */ { NOKIA_PCSUITE_ACM_INFO(0x046E), }, /* Nokia 6110 Navigator */ { NOKIA_PCSUITE_ACM_INFO(0x002f), }, /* Nokia 6120 classic & */ { NOKIA_PCSUITE_ACM_INFO(0x0088), }, /* Nokia 6121 classic */ { NOKIA_PCSUITE_ACM_INFO(0x00fc), }, /* Nokia 6124 classic */ { NOKIA_PCSUITE_ACM_INFO(0x0042), }, /* Nokia E51 */ { NOKIA_PCSUITE_ACM_INFO(0x00b0), }, /* Nokia E66 */ { NOKIA_PCSUITE_ACM_INFO(0x00ab), }, /* Nokia E71 */ { NOKIA_PCSUITE_ACM_INFO(0x0481), }, /* Nokia N76 */ { NOKIA_PCSUITE_ACM_INFO(0x0007), }, /* Nokia N81 & N81 8GB */ { NOKIA_PCSUITE_ACM_INFO(0x0071), }, /* Nokia N82 */ { NOKIA_PCSUITE_ACM_INFO(0x04F0), }, /* Nokia N95 & N95-3 NAM */ { NOKIA_PCSUITE_ACM_INFO(0x0070), }, /* Nokia N95 8GB */ { NOKIA_PCSUITE_ACM_INFO(0x0099), }, /* Nokia 6210 Navigator, RM-367 */ { NOKIA_PCSUITE_ACM_INFO(0x0128), }, /* Nokia 6210 Navigator, RM-419 */ { NOKIA_PCSUITE_ACM_INFO(0x008f), }, /* Nokia 6220 Classic */ { NOKIA_PCSUITE_ACM_INFO(0x00a0), }, /* Nokia 6650 */ { NOKIA_PCSUITE_ACM_INFO(0x007b), }, /* Nokia N78 */ { NOKIA_PCSUITE_ACM_INFO(0x0094), }, /* Nokia N85 */ { NOKIA_PCSUITE_ACM_INFO(0x003a), }, /* Nokia N96 & N96-3 */ { NOKIA_PCSUITE_ACM_INFO(0x00e9), }, /* Nokia 5320 XpressMusic */ { NOKIA_PCSUITE_ACM_INFO(0x0108), }, /* Nokia 5320 XpressMusic 2G */ { NOKIA_PCSUITE_ACM_INFO(0x01f5), }, /* Nokia N97, RM-505 */ { NOKIA_PCSUITE_ACM_INFO(0x02e3), }, /* Nokia 5230, RM-588 */ { NOKIA_PCSUITE_ACM_INFO(0x0178), }, /* Nokia E63 */ { NOKIA_PCSUITE_ACM_INFO(0x010e), }, /* Nokia E75 */ { NOKIA_PCSUITE_ACM_INFO(0x02d9), }, /* Nokia 6760 Slide */ { NOKIA_PCSUITE_ACM_INFO(0x01d0), }, /* Nokia E52 */ { NOKIA_PCSUITE_ACM_INFO(0x0223), }, /* Nokia E72 */ { NOKIA_PCSUITE_ACM_INFO(0x0275), }, /* Nokia X6 */ { NOKIA_PCSUITE_ACM_INFO(0x026c), }, /* Nokia N97 Mini */ { NOKIA_PCSUITE_ACM_INFO(0x0154), }, /* Nokia 5800 XpressMusic */ { NOKIA_PCSUITE_ACM_INFO(0x04ce), }, /* Nokia E90 */ { NOKIA_PCSUITE_ACM_INFO(0x01d4), }, /* Nokia E55 */ { NOKIA_PCSUITE_ACM_INFO(0x0302), }, /* Nokia N8 */ { NOKIA_PCSUITE_ACM_INFO(0x0335), }, /* Nokia E7 */ { NOKIA_PCSUITE_ACM_INFO(0x03cd), }, /* Nokia C7 */ { SAMSUNG_PCSUITE_ACM_INFO(0x6651), }, /* Samsung GTi8510 (INNOV8) */ /* Support for Owen devices */ { USB_DEVICE(0x03eb, 0x0030), }, /* Owen SI30 */ /* NOTE: non-Nokia COMM/ACM/0xff is likely MSFT RNDIS... NOT a modem! */ #if IS_ENABLED(CONFIG_INPUT_IMS_PCU) { USB_DEVICE(0x04d8, 0x0082), /* Application mode */ .driver_info = IGNORE_DEVICE, }, { USB_DEVICE(0x04d8, 0x0083), /* Bootloader mode */ .driver_info = IGNORE_DEVICE, }, #endif #if IS_ENABLED(CONFIG_IR_TOY) { USB_DEVICE(0x04d8, 0xfd08), .driver_info = IGNORE_DEVICE, }, { USB_DEVICE(0x04d8, 0xf58b), .driver_info = IGNORE_DEVICE, }, #endif #if IS_ENABLED(CONFIG_USB_SERIAL_XR) { USB_DEVICE(0x04e2, 0x1400), .driver_info = IGNORE_DEVICE }, { USB_DEVICE(0x04e2, 0x1401), .driver_info = IGNORE_DEVICE }, { USB_DEVICE(0x04e2, 0x1402), .driver_info = IGNORE_DEVICE }, { USB_DEVICE(0x04e2, 0x1403), .driver_info = IGNORE_DEVICE }, { USB_DEVICE(0x04e2, 0x1410), .driver_info = IGNORE_DEVICE }, { USB_DEVICE(0x04e2, 0x1411), .driver_info = IGNORE_DEVICE }, { USB_DEVICE(0x04e2, 0x1412), .driver_info = IGNORE_DEVICE }, { USB_DEVICE(0x04e2, 0x1414), .driver_info = IGNORE_DEVICE }, { USB_DEVICE(0x04e2, 0x1420), .driver_info = IGNORE_DEVICE }, { USB_DEVICE(0x04e2, 0x1422), .driver_info = IGNORE_DEVICE }, { USB_DEVICE(0x04e2, 0x1424), .driver_info = IGNORE_DEVICE }, #endif /*Samsung phone in firmware update mode */ { USB_DEVICE(0x04e8, 0x685d), .driver_info = IGNORE_DEVICE, }, /* Exclude Infineon Flash Loader utility */ { USB_DEVICE(0x058b, 0x0041), .driver_info = IGNORE_DEVICE, }, /* Exclude ETAS ES58x */ { USB_DEVICE(0x108c, 0x0159), /* ES581.4 */ .driver_info = IGNORE_DEVICE, }, { USB_DEVICE(0x108c, 0x0168), /* ES582.1 */ .driver_info = IGNORE_DEVICE, }, { USB_DEVICE(0x108c, 0x0169), /* ES584.1 */ .driver_info = IGNORE_DEVICE, }, { USB_DEVICE(0x1bc7, 0x0021), /* Telit 3G ACM only composition */ .driver_info = SEND_ZERO_PACKET, }, { USB_DEVICE(0x1bc7, 0x0023), /* Telit 3G ACM + ECM composition */ .driver_info = SEND_ZERO_PACKET, }, /* Exclude Goodix Fingerprint Reader */ { USB_DEVICE(0x27c6, 0x5395), .driver_info = IGNORE_DEVICE, }, /* Exclude Heimann Sensor GmbH USB appset demo */ { USB_DEVICE(0x32a7, 0x0000), .driver_info = IGNORE_DEVICE, }, /* control interfaces without any protocol set */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_PROTO_NONE) }, /* control interfaces with various AT-command sets */ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_V25TER) }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_PCCA101) }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_PCCA101_WAKE) }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_GSM) }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_3G) }, { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, USB_CDC_ACM_PROTO_AT_CDMA) }, { USB_DEVICE(0x1519, 0x0452), /* Intel 7260 modem */ .driver_info = SEND_ZERO_PACKET, }, { } }; MODULE_DEVICE_TABLE(usb, acm_ids); static struct usb_driver acm_driver = { .name = "cdc_acm", .probe = acm_probe, .disconnect = acm_disconnect, #ifdef CONFIG_PM .suspend = acm_suspend, .resume = acm_resume, .reset_resume = acm_reset_resume, #endif .pre_reset = acm_pre_reset, .id_table = acm_ids, #ifdef CONFIG_PM .supports_autosuspend = 1, #endif .disable_hub_initiated_lpm = 1, }; /* * TTY driver structures. */ static const struct tty_operations acm_ops = { .install = acm_tty_install, .open = acm_tty_open, .close = acm_tty_close, .cleanup = acm_tty_cleanup, .hangup = acm_tty_hangup, .write = acm_tty_write, .write_room = acm_tty_write_room, .flush_buffer = acm_tty_flush_buffer, .ioctl = acm_tty_ioctl, .throttle = acm_tty_throttle, .unthrottle = acm_tty_unthrottle, .chars_in_buffer = acm_tty_chars_in_buffer, .break_ctl = acm_tty_break_ctl, .set_termios = acm_tty_set_termios, .tiocmget = acm_tty_tiocmget, .tiocmset = acm_tty_tiocmset, .get_serial = get_serial_info, .set_serial = set_serial_info, .get_icount = acm_tty_get_icount, }; /* * Init / exit. */ static int __init acm_init(void) { int retval; acm_tty_driver = tty_alloc_driver(ACM_TTY_MINORS, TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV); if (IS_ERR(acm_tty_driver)) return PTR_ERR(acm_tty_driver); acm_tty_driver->driver_name = "acm", acm_tty_driver->name = "ttyACM", acm_tty_driver->major = ACM_TTY_MAJOR, acm_tty_driver->minor_start = 0, acm_tty_driver->type = TTY_DRIVER_TYPE_SERIAL, acm_tty_driver->subtype = SERIAL_TYPE_NORMAL, acm_tty_driver->init_termios = tty_std_termios; acm_tty_driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; tty_set_operations(acm_tty_driver, &acm_ops); retval = tty_register_driver(acm_tty_driver); if (retval) { tty_driver_kref_put(acm_tty_driver); return retval; } retval = usb_register(&acm_driver); if (retval) { tty_unregister_driver(acm_tty_driver); tty_driver_kref_put(acm_tty_driver); return retval; } printk(KERN_INFO KBUILD_MODNAME ": " DRIVER_DESC "\n"); return 0; } static void __exit acm_exit(void) { usb_deregister(&acm_driver); tty_unregister_driver(acm_tty_driver); tty_driver_kref_put(acm_tty_driver); idr_destroy(&acm_minors); } module_init(acm_init); module_exit(acm_exit); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); MODULE_ALIAS_CHARDEV_MAJOR(ACM_TTY_MAJOR);
25 4 21 21 6 5 8 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 // SPDX-License-Identifier: GPL-2.0-only /* * * Generic part shared by ipv4 and ipv6 backends. */ #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> #include <linux/in.h> #include <net/xfrm.h> static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = { [NFTA_XFRM_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_XFRM_DIR] = { .type = NLA_U8 }, [NFTA_XFRM_SPNUM] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_XFRM_DREG] = { .type = NLA_U32 }, }; struct nft_xfrm { enum nft_xfrm_keys key:8; u8 dreg; u8 dir; u8 spnum; u8 len; }; static int nft_xfrm_get_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_xfrm *priv = nft_expr_priv(expr); unsigned int len = 0; u32 spnum = 0; u8 dir; if (!tb[NFTA_XFRM_KEY] || !tb[NFTA_XFRM_DIR] || !tb[NFTA_XFRM_DREG]) return -EINVAL; switch (ctx->family) { case NFPROTO_IPV4: case NFPROTO_IPV6: case NFPROTO_INET: break; default: return -EOPNOTSUPP; } priv->key = ntohl(nla_get_be32(tb[NFTA_XFRM_KEY])); switch (priv->key) { case NFT_XFRM_KEY_REQID: case NFT_XFRM_KEY_SPI: len = sizeof(u32); break; case NFT_XFRM_KEY_DADDR_IP4: case NFT_XFRM_KEY_SADDR_IP4: len = sizeof(struct in_addr); break; case NFT_XFRM_KEY_DADDR_IP6: case NFT_XFRM_KEY_SADDR_IP6: len = sizeof(struct in6_addr); break; default: return -EINVAL; } dir = nla_get_u8(tb[NFTA_XFRM_DIR]); switch (dir) { case XFRM_POLICY_IN: case XFRM_POLICY_OUT: priv->dir = dir; break; default: return -EINVAL; } if (tb[NFTA_XFRM_SPNUM]) spnum = ntohl(nla_get_be32(tb[NFTA_XFRM_SPNUM])); if (spnum >= XFRM_MAX_DEPTH) return -ERANGE; priv->spnum = spnum; priv->len = len; return nft_parse_register_store(ctx, tb[NFTA_XFRM_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, len); } /* Return true if key asks for daddr/saddr and current * state does have a valid address (BEET, TUNNEL). */ static bool xfrm_state_addr_ok(enum nft_xfrm_keys k, u8 family, u8 mode) { switch (k) { case NFT_XFRM_KEY_DADDR_IP4: case NFT_XFRM_KEY_SADDR_IP4: if (family == NFPROTO_IPV4) break; return false; case NFT_XFRM_KEY_DADDR_IP6: case NFT_XFRM_KEY_SADDR_IP6: if (family == NFPROTO_IPV6) break; return false; default: return true; } return mode == XFRM_MODE_BEET || mode == XFRM_MODE_TUNNEL; } static void nft_xfrm_state_get_key(const struct nft_xfrm *priv, struct nft_regs *regs, const struct xfrm_state *state) { u32 *dest = &regs->data[priv->dreg]; if (!xfrm_state_addr_ok(priv->key, state->props.family, state->props.mode)) { regs->verdict.code = NFT_BREAK; return; } switch (priv->key) { case NFT_XFRM_KEY_UNSPEC: case __NFT_XFRM_KEY_MAX: WARN_ON_ONCE(1); break; case NFT_XFRM_KEY_DADDR_IP4: *dest = (__force __u32)state->id.daddr.a4; return; case NFT_XFRM_KEY_DADDR_IP6: memcpy(dest, &state->id.daddr.in6, sizeof(struct in6_addr)); return; case NFT_XFRM_KEY_SADDR_IP4: *dest = (__force __u32)state->props.saddr.a4; return; case NFT_XFRM_KEY_SADDR_IP6: memcpy(dest, &state->props.saddr.in6, sizeof(struct in6_addr)); return; case NFT_XFRM_KEY_REQID: *dest = state->props.reqid; return; case NFT_XFRM_KEY_SPI: *dest = (__force __u32)state->id.spi; return; } regs->verdict.code = NFT_BREAK; } static void nft_xfrm_get_eval_in(const struct nft_xfrm *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct sec_path *sp = skb_sec_path(pkt->skb); const struct xfrm_state *state; if (sp == NULL || sp->len <= priv->spnum) { regs->verdict.code = NFT_BREAK; return; } state = sp->xvec[priv->spnum]; nft_xfrm_state_get_key(priv, regs, state); } static void nft_xfrm_get_eval_out(const struct nft_xfrm *priv, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct dst_entry *dst = skb_dst(pkt->skb); int i; for (i = 0; dst && dst->xfrm; dst = ((const struct xfrm_dst *)dst)->child, i++) { if (i < priv->spnum) continue; nft_xfrm_state_get_key(priv, regs, dst->xfrm); return; } regs->verdict.code = NFT_BREAK; } static void nft_xfrm_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_xfrm *priv = nft_expr_priv(expr); switch (priv->dir) { case XFRM_POLICY_IN: nft_xfrm_get_eval_in(priv, regs, pkt); break; case XFRM_POLICY_OUT: nft_xfrm_get_eval_out(priv, regs, pkt); break; default: WARN_ON_ONCE(1); regs->verdict.code = NFT_BREAK; break; } } static int nft_xfrm_get_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_xfrm *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_XFRM_DREG, priv->dreg)) return -1; if (nla_put_be32(skb, NFTA_XFRM_KEY, htonl(priv->key))) return -1; if (nla_put_u8(skb, NFTA_XFRM_DIR, priv->dir)) return -1; if (nla_put_be32(skb, NFTA_XFRM_SPNUM, htonl(priv->spnum))) return -1; return 0; } static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) { const struct nft_xfrm *priv = nft_expr_priv(expr); unsigned int hooks; if (ctx->family != NFPROTO_IPV4 && ctx->family != NFPROTO_IPV6 && ctx->family != NFPROTO_INET) return -EOPNOTSUPP; switch (priv->dir) { case XFRM_POLICY_IN: hooks = (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_PRE_ROUTING); break; case XFRM_POLICY_OUT: hooks = (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING); break; default: WARN_ON_ONCE(1); return -EINVAL; } return nft_chain_validate_hooks(ctx->chain, hooks); } static bool nft_xfrm_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { const struct nft_xfrm *priv = nft_expr_priv(expr); const struct nft_xfrm *xfrm; if (!nft_reg_track_cmp(track, expr, priv->dreg)) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } xfrm = nft_expr_priv(track->regs[priv->dreg].selector); if (priv->key != xfrm->key || priv->dreg != xfrm->dreg || priv->dir != xfrm->dir || priv->spnum != xfrm->spnum) { nft_reg_track_update(track, expr, priv->dreg, priv->len); return false; } if (!track->regs[priv->dreg].bitwise) return true; return nft_expr_reduce_bitwise(track, expr); } static struct nft_expr_type nft_xfrm_type; static const struct nft_expr_ops nft_xfrm_get_ops = { .type = &nft_xfrm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_xfrm)), .eval = nft_xfrm_get_eval, .init = nft_xfrm_get_init, .dump = nft_xfrm_get_dump, .validate = nft_xfrm_validate, .reduce = nft_xfrm_reduce, }; static struct nft_expr_type nft_xfrm_type __read_mostly = { .name = "xfrm", .ops = &nft_xfrm_get_ops, .policy = nft_xfrm_policy, .maxattr = NFTA_XFRM_MAX, .owner = THIS_MODULE, }; static int __init nft_xfrm_module_init(void) { return nft_register_expr(&nft_xfrm_type); } static void __exit nft_xfrm_module_exit(void) { nft_unregister_expr(&nft_xfrm_type); } module_init(nft_xfrm_module_init); module_exit(nft_xfrm_module_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("nf_tables: xfrm/IPSec matching"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); MODULE_AUTHOR("Máté Eckl <ecklm94@gmail.com>"); MODULE_ALIAS_NFT_EXPR("xfrm");
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 // SPDX-License-Identifier: GPL-2.0-or-later /* * * Copyright (C) Jonathan Naylor G4KLX (g4klx@g4klx.demon.co.uk) * Copyright (C) Joerg Reuter DL1BKE (jreuter@yaina.de) */ #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/spinlock.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <net/tcp_states.h> #include <net/ax25.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/sock.h> #include <linux/uaccess.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> static void ax25_ds_timeout(struct timer_list *); /* * Add DAMA slave timeout timer to timer list. * Unlike the connection based timers the timeout function gets * triggered every second. Please note that NET_AX25_DAMA_SLAVE_TIMEOUT * (aka /proc/sys/net/ax25/{dev}/dama_slave_timeout) is still in * 1/10th of a second. */ void ax25_ds_setup_timer(ax25_dev *ax25_dev) { timer_setup(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 0); } void ax25_ds_del_timer(ax25_dev *ax25_dev) { if (ax25_dev) del_timer(&ax25_dev->dama.slave_timer); } void ax25_ds_set_timer(ax25_dev *ax25_dev) { if (ax25_dev == NULL) /* paranoia */ return; ax25_dev->dama.slave_timeout = msecs_to_jiffies(ax25_dev->values[AX25_VALUES_DS_TIMEOUT]) / 10; mod_timer(&ax25_dev->dama.slave_timer, jiffies + HZ); } /* * DAMA Slave Timeout * Silently discard all (slave) connections in case our master forgot us... */ static void ax25_ds_timeout(struct timer_list *t) { ax25_dev *ax25_dev = from_timer(ax25_dev, t, dama.slave_timer); ax25_cb *ax25; if (ax25_dev == NULL || !ax25_dev->dama.slave) return; /* Yikes! */ if (!ax25_dev->dama.slave_timeout || --ax25_dev->dama.slave_timeout) { ax25_ds_set_timer(ax25_dev); return; } spin_lock(&ax25_list_lock); ax25_for_each(ax25, &ax25_list) { if (ax25->ax25_dev != ax25_dev || !(ax25->condition & AX25_COND_DAMA_MODE)) continue; ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); ax25_disconnect(ax25, ETIMEDOUT); } spin_unlock(&ax25_list_lock); ax25_dev_dama_off(ax25_dev); } void ax25_ds_heartbeat_expiry(ax25_cb *ax25) { struct sock *sk=ax25->sk; if (sk) bh_lock_sock(sk); switch (ax25->state) { case AX25_STATE_0: case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { if (sk) { sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); return; } break; case AX25_STATE_3: /* * Check the state of the receive buffer. */ if (sk != NULL) { if (atomic_read(&sk->sk_rmem_alloc) < (sk->sk_rcvbuf >> 1) && (ax25->condition & AX25_COND_OWN_RX_BUSY)) { ax25->condition &= ~AX25_COND_OWN_RX_BUSY; ax25->condition &= ~AX25_COND_ACK_PENDING; break; } } break; } if (sk) bh_unlock_sock(sk); ax25_start_heartbeat(ax25); } /* dl1bke 960114: T3 works much like the IDLE timeout, but * gets reloaded with every frame for this * connection. */ void ax25_ds_t3timer_expiry(ax25_cb *ax25) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); ax25_dama_off(ax25); ax25_disconnect(ax25, ETIMEDOUT); } /* dl1bke 960228: close the connection when IDLE expires. * unlike T3 this timer gets reloaded only on * I frames. */ void ax25_ds_idletimer_expiry(ax25_cb *ax25) { ax25_clear_queues(ax25); ax25->n2count = 0; ax25->state = AX25_STATE_2; ax25_calculate_t1(ax25); ax25_start_t1timer(ax25); ax25_stop_t3timer(ax25); if (ax25->sk != NULL) { bh_lock_sock(ax25->sk); ax25->sk->sk_state = TCP_CLOSE; ax25->sk->sk_err = 0; ax25->sk->sk_shutdown |= SEND_SHUTDOWN; if (!sock_flag(ax25->sk, SOCK_DEAD)) { ax25->sk->sk_state_change(ax25->sk); sock_set_flag(ax25->sk, SOCK_DEAD); } bh_unlock_sock(ax25->sk); } } /* dl1bke 960114: The DAMA protocol requires to send data and SABM/DISC * within the poll of any connected channel. Remember * that we are not allowed to send anything unless we * get polled by the Master. * * Thus we'll have to do parts of our T1 handling in * ax25_enquiry_response(). */ void ax25_ds_t1_timeout(ax25_cb *ax25) { switch (ax25->state) { case AX25_STATE_1: if (ax25->n2count == ax25->n2) { if (ax25->modulus == AX25_MODULUS) { ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->modulus = AX25_MODULUS; ax25->window = ax25->ax25_dev->values[AX25_VALUES_WINDOW]; ax25->n2count = 0; ax25_send_control(ax25, AX25_SABM, AX25_POLLOFF, AX25_COMMAND); } } else { ax25->n2count++; if (ax25->modulus == AX25_MODULUS) ax25_send_control(ax25, AX25_SABM, AX25_POLLOFF, AX25_COMMAND); else ax25_send_control(ax25, AX25_SABME, AX25_POLLOFF, AX25_COMMAND); } break; case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); if (!sock_flag(ax25->sk, SOCK_DESTROY)) ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; } break; case AX25_STATE_3: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DM, AX25_POLLON, AX25_RESPONSE); ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; } break; } ax25_calculate_t1(ax25); ax25_start_t1timer(ax25); }
22 26 6 22 6 6 12 33 33 33 10 216 1 249 248 216 3 163 114 193 309 73 284 200 199 1 1 1 5 5 9 26 26 25 9 32 29 32 16 24 1 25 24 1 23 4 1 18 23 52 52 51 60 3 1 5 8 2 8 16 1 4 2 9 8 8 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 /* * Copyright (c) 2016 Intel Corporation * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of the copyright holders not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. The copyright holders make no representations * about the suitability of this software for any purpose. It is provided "as * is" without express or implied warranty. * * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THIS SOFTWARE. */ #include <linux/export.h> #include <linux/uaccess.h> #include <drm/drm_atomic.h> #include <drm/drm_drv.h> #include <drm/drm_device.h> #include <drm/drm_file.h> #include <drm/drm_mode_object.h> #include <drm/drm_print.h> #include "drm_crtc_internal.h" /* * Internal function to assign a slot in the object idr and optionally * register the object into the idr. */ int __drm_mode_object_add(struct drm_device *dev, struct drm_mode_object *obj, uint32_t obj_type, bool register_obj, void (*obj_free_cb)(struct kref *kref)) { int ret; WARN_ON(!dev->driver->load && dev->registered && !obj_free_cb); mutex_lock(&dev->mode_config.idr_mutex); ret = idr_alloc(&dev->mode_config.object_idr, register_obj ? obj : NULL, 1, 0, GFP_KERNEL); if (ret >= 0) { /* * Set up the object linking under the protection of the idr * lock so that other users can't see inconsistent state. */ obj->id = ret; obj->type = obj_type; if (obj_free_cb) { obj->free_cb = obj_free_cb; kref_init(&obj->refcount); } } mutex_unlock(&dev->mode_config.idr_mutex); return ret < 0 ? ret : 0; } /** * drm_mode_object_add - allocate a new modeset identifier * @dev: DRM device * @obj: object pointer, used to generate unique ID * @obj_type: object type * * Create a unique identifier based on @ptr in @dev's identifier space. Used * for tracking modes, CRTCs and connectors. * * Returns: * Zero on success, error code on failure. */ int drm_mode_object_add(struct drm_device *dev, struct drm_mode_object *obj, uint32_t obj_type) { return __drm_mode_object_add(dev, obj, obj_type, true, NULL); } void drm_mode_object_register(struct drm_device *dev, struct drm_mode_object *obj) { mutex_lock(&dev->mode_config.idr_mutex); idr_replace(&dev->mode_config.object_idr, obj, obj->id); mutex_unlock(&dev->mode_config.idr_mutex); } /** * drm_mode_object_unregister - free a modeset identifier * @dev: DRM device * @object: object to free * * Free @id from @dev's unique identifier pool. * This function can be called multiple times, and guards against * multiple removals. * These modeset identifiers are _not_ reference counted. Hence don't use this * for reference counted modeset objects like framebuffers. */ void drm_mode_object_unregister(struct drm_device *dev, struct drm_mode_object *object) { WARN_ON(!dev->driver->load && dev->registered && !object->free_cb); mutex_lock(&dev->mode_config.idr_mutex); if (object->id) { idr_remove(&dev->mode_config.object_idr, object->id); object->id = 0; } mutex_unlock(&dev->mode_config.idr_mutex); } /** * drm_mode_object_lease_required - check types which must be leased to be used * @type: type of object * * Returns whether the provided type of drm_mode_object must * be owned or leased to be used by a process. */ bool drm_mode_object_lease_required(uint32_t type) { switch(type) { case DRM_MODE_OBJECT_CRTC: case DRM_MODE_OBJECT_CONNECTOR: case DRM_MODE_OBJECT_PLANE: return true; default: return false; } } struct drm_mode_object *__drm_mode_object_find(struct drm_device *dev, struct drm_file *file_priv, uint32_t id, uint32_t type) { struct drm_mode_object *obj = NULL; mutex_lock(&dev->mode_config.idr_mutex); obj = idr_find(&dev->mode_config.object_idr, id); if (obj && type != DRM_MODE_OBJECT_ANY && obj->type != type) obj = NULL; if (obj && obj->id != id) obj = NULL; if (obj && drm_mode_object_lease_required(obj->type) && !_drm_lease_held(file_priv, obj->id)) { drm_dbg_kms(dev, "[OBJECT:%d] not included in lease", id); obj = NULL; } if (obj && obj->free_cb) { if (!kref_get_unless_zero(&obj->refcount)) obj = NULL; } mutex_unlock(&dev->mode_config.idr_mutex); return obj; } /** * drm_mode_object_find - look up a drm object with static lifetime * @dev: drm device * @file_priv: drm file * @id: id of the mode object * @type: type of the mode object * * This function is used to look up a modeset object. It will acquire a * reference for reference counted objects. This reference must be dropped again * by callind drm_mode_object_put(). */ struct drm_mode_object *drm_mode_object_find(struct drm_device *dev, struct drm_file *file_priv, uint32_t id, uint32_t type) { struct drm_mode_object *obj = NULL; obj = __drm_mode_object_find(dev, file_priv, id, type); return obj; } EXPORT_SYMBOL(drm_mode_object_find); /** * drm_mode_object_put - release a mode object reference * @obj: DRM mode object * * This function decrements the object's refcount if it is a refcounted modeset * object. It is a no-op on any other object. This is used to drop references * acquired with drm_mode_object_get(). */ void drm_mode_object_put(struct drm_mode_object *obj) { if (obj->free_cb) { DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, kref_read(&obj->refcount)); kref_put(&obj->refcount, obj->free_cb); } } EXPORT_SYMBOL(drm_mode_object_put); /** * drm_mode_object_get - acquire a mode object reference * @obj: DRM mode object * * This function increments the object's refcount if it is a refcounted modeset * object. It is a no-op on any other object. References should be dropped again * by calling drm_mode_object_put(). */ void drm_mode_object_get(struct drm_mode_object *obj) { if (obj->free_cb) { DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, kref_read(&obj->refcount)); kref_get(&obj->refcount); } } EXPORT_SYMBOL(drm_mode_object_get); /** * drm_object_attach_property - attach a property to a modeset object * @obj: drm modeset object * @property: property to attach * @init_val: initial value of the property * * This attaches the given property to the modeset object with the given initial * value. Currently this function cannot fail since the properties are stored in * a statically sized array. * * Note that all properties must be attached before the object itself is * registered and accessible from userspace. */ void drm_object_attach_property(struct drm_mode_object *obj, struct drm_property *property, uint64_t init_val) { int count = obj->properties->count; struct drm_device *dev = property->dev; if (obj->type == DRM_MODE_OBJECT_CONNECTOR) { struct drm_connector *connector = obj_to_connector(obj); WARN_ON(!dev->driver->load && connector->registration_state == DRM_CONNECTOR_REGISTERED); } else { WARN_ON(!dev->driver->load && dev->registered); } if (count == DRM_OBJECT_MAX_PROPERTY) { WARN(1, "Failed to attach object property (type: 0x%x). Please " "increase DRM_OBJECT_MAX_PROPERTY by 1 for each time " "you see this message on the same object type.\n", obj->type); return; } obj->properties->properties[count] = property; obj->properties->values[count] = init_val; obj->properties->count++; } EXPORT_SYMBOL(drm_object_attach_property); /** * drm_object_property_set_value - set the value of a property * @obj: drm mode object to set property value for * @property: property to set * @val: value the property should be set to * * This function sets a given property on a given object. This function only * changes the software state of the property, it does not call into the * driver's ->set_property callback. * * Note that atomic drivers should not have any need to call this, the core will * ensure consistency of values reported back to userspace through the * appropriate ->atomic_get_property callback. Only legacy drivers should call * this function to update the tracked value (after clamping and other * restrictions have been applied). * * Returns: * Zero on success, error code on failure. */ int drm_object_property_set_value(struct drm_mode_object *obj, struct drm_property *property, uint64_t val) { int i; WARN_ON(drm_drv_uses_atomic_modeset(property->dev) && !(property->flags & DRM_MODE_PROP_IMMUTABLE)); for (i = 0; i < obj->properties->count; i++) { if (obj->properties->properties[i] == property) { obj->properties->values[i] = val; return 0; } } return -EINVAL; } EXPORT_SYMBOL(drm_object_property_set_value); static int __drm_object_property_get_prop_value(struct drm_mode_object *obj, struct drm_property *property, uint64_t *val) { int i; for (i = 0; i < obj->properties->count; i++) { if (obj->properties->properties[i] == property) { *val = obj->properties->values[i]; return 0; } } return -EINVAL; } static int __drm_object_property_get_value(struct drm_mode_object *obj, struct drm_property *property, uint64_t *val) { /* read-only properties bypass atomic mechanism and still store * their value in obj->properties->values[].. mostly to avoid * having to deal w/ EDID and similar props in atomic paths: */ if (drm_drv_uses_atomic_modeset(property->dev) && !(property->flags & DRM_MODE_PROP_IMMUTABLE)) return drm_atomic_get_property(obj, property, val); return __drm_object_property_get_prop_value(obj, property, val); } /** * drm_object_property_get_value - retrieve the value of a property * @obj: drm mode object to get property value from * @property: property to retrieve * @val: storage for the property value * * This function retrieves the softare state of the given property for the given * property. Since there is no driver callback to retrieve the current property * value this might be out of sync with the hardware, depending upon the driver * and property. * * Atomic drivers should never call this function directly, the core will read * out property values through the various ->atomic_get_property callbacks. * * Returns: * Zero on success, error code on failure. */ int drm_object_property_get_value(struct drm_mode_object *obj, struct drm_property *property, uint64_t *val) { WARN_ON(drm_drv_uses_atomic_modeset(property->dev)); return __drm_object_property_get_value(obj, property, val); } EXPORT_SYMBOL(drm_object_property_get_value); /** * drm_object_property_get_default_value - retrieve the default value of a * property when in atomic mode. * @obj: drm mode object to get property value from * @property: property to retrieve * @val: storage for the property value * * This function retrieves the default state of the given property as passed in * to drm_object_attach_property * * Only atomic drivers should call this function directly, as for non-atomic * drivers it will return the current value. * * Returns: * Zero on success, error code on failure. */ int drm_object_property_get_default_value(struct drm_mode_object *obj, struct drm_property *property, uint64_t *val) { WARN_ON(!drm_drv_uses_atomic_modeset(property->dev)); return __drm_object_property_get_prop_value(obj, property, val); } EXPORT_SYMBOL(drm_object_property_get_default_value); /* helper for getconnector and getproperties ioctls */ int drm_mode_object_get_properties(struct drm_mode_object *obj, bool atomic, uint32_t __user *prop_ptr, uint64_t __user *prop_values, uint32_t *arg_count_props) { int i, ret, count; for (i = 0, count = 0; i < obj->properties->count; i++) { struct drm_property *prop = obj->properties->properties[i]; uint64_t val; if ((prop->flags & DRM_MODE_PROP_ATOMIC) && !atomic) continue; if (*arg_count_props > count) { ret = __drm_object_property_get_value(obj, prop, &val); if (ret) return ret; if (put_user(prop->base.id, prop_ptr + count)) return -EFAULT; if (put_user(val, prop_values + count)) return -EFAULT; } count++; } *arg_count_props = count; return 0; } /** * drm_mode_obj_get_properties_ioctl - get the current value of a object's property * @dev: DRM device * @data: ioctl data * @file_priv: DRM file info * * This function retrieves the current value for an object's property. Compared * to the connector specific ioctl this one is extended to also work on crtc and * plane objects. * * Called by the user via ioctl. * * Returns: * Zero on success, negative errno on failure. */ int drm_mode_obj_get_properties_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_obj_get_properties *arg = data; struct drm_mode_object *obj; struct drm_modeset_acquire_ctx ctx; int ret = 0; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret); obj = drm_mode_object_find(dev, file_priv, arg->obj_id, arg->obj_type); if (!obj) { ret = -ENOENT; goto out; } if (!obj->properties) { ret = -EINVAL; goto out_unref; } ret = drm_mode_object_get_properties(obj, file_priv->atomic, (uint32_t __user *)(unsigned long)(arg->props_ptr), (uint64_t __user *)(unsigned long)(arg->prop_values_ptr), &arg->count_props); out_unref: drm_mode_object_put(obj); out: DRM_MODESET_LOCK_ALL_END(dev, ctx, ret); return ret; } struct drm_property *drm_mode_obj_find_prop_id(struct drm_mode_object *obj, uint32_t prop_id) { int i; for (i = 0; i < obj->properties->count; i++) if (obj->properties->properties[i]->base.id == prop_id) return obj->properties->properties[i]; return NULL; } EXPORT_SYMBOL_FOR_TESTS_ONLY(drm_mode_obj_find_prop_id); static int set_property_legacy(struct drm_mode_object *obj, struct drm_property *prop, uint64_t prop_value) { struct drm_device *dev = prop->dev; struct drm_mode_object *ref; struct drm_modeset_acquire_ctx ctx; int ret = -EINVAL; if (!drm_property_change_valid_get(prop, prop_value, &ref)) return -EINVAL; DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret); switch (obj->type) { case DRM_MODE_OBJECT_CONNECTOR: ret = drm_connector_set_obj_prop(obj, prop, prop_value); break; case DRM_MODE_OBJECT_CRTC: ret = drm_mode_crtc_set_obj_prop(obj, prop, prop_value); break; case DRM_MODE_OBJECT_PLANE: ret = drm_mode_plane_set_obj_prop(obj_to_plane(obj), prop, prop_value); break; } drm_property_change_valid_put(prop, ref); DRM_MODESET_LOCK_ALL_END(dev, ctx, ret); return ret; } static int set_property_atomic(struct drm_mode_object *obj, struct drm_file *file_priv, struct drm_property *prop, uint64_t prop_value) { struct drm_device *dev = prop->dev; struct drm_atomic_state *state; struct drm_modeset_acquire_ctx ctx; int ret; state = drm_atomic_state_alloc(dev); if (!state) return -ENOMEM; drm_modeset_acquire_init(&ctx, 0); state->acquire_ctx = &ctx; retry: if (prop == state->dev->mode_config.dpms_property) { if (obj->type != DRM_MODE_OBJECT_CONNECTOR) { ret = -EINVAL; goto out; } ret = drm_atomic_connector_commit_dpms(state, obj_to_connector(obj), prop_value); } else { ret = drm_atomic_set_property(state, file_priv, obj, prop, prop_value, false); if (ret) goto out; ret = drm_atomic_commit(state); } out: if (ret == -EDEADLK) { drm_atomic_state_clear(state); drm_modeset_backoff(&ctx); goto retry; } drm_atomic_state_put(state); drm_modeset_drop_locks(&ctx); drm_modeset_acquire_fini(&ctx); return ret; } int drm_mode_obj_set_property_ioctl(struct drm_device *dev, void *data, struct drm_file *file_priv) { struct drm_mode_obj_set_property *arg = data; struct drm_mode_object *arg_obj; struct drm_property *property; int ret = -EINVAL; if (!drm_core_check_feature(dev, DRIVER_MODESET)) return -EOPNOTSUPP; arg_obj = drm_mode_object_find(dev, file_priv, arg->obj_id, arg->obj_type); if (!arg_obj) return -ENOENT; if (!arg_obj->properties) goto out_unref; property = drm_mode_obj_find_prop_id(arg_obj, arg->prop_id); if (!property) goto out_unref; if (drm_drv_uses_atomic_modeset(property->dev)) ret = set_property_atomic(arg_obj, file_priv, property, arg->value); else ret = set_property_legacy(arg_obj, property, arg->value); out_unref: drm_mode_object_put(arg_obj); return ret; }
309 309 46 5 222 174 13 10 4 106 46 85 85 85 85 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 // SPDX-License-Identifier: GPL-2.0 /* Multipath TCP * * Copyright (c) 2019, Tessares SA. */ #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <net/net_namespace.h> #include <net/netns/generic.h> #include "protocol.h" #define MPTCP_SYSCTL_PATH "net/mptcp" static int mptcp_pernet_id; #ifdef CONFIG_SYSCTL static int mptcp_pm_type_max = __MPTCP_PM_TYPE_MAX; #endif struct mptcp_pernet { #ifdef CONFIG_SYSCTL struct ctl_table_header *ctl_table_hdr; #endif unsigned int add_addr_timeout; unsigned int close_timeout; unsigned int stale_loss_cnt; u8 mptcp_enabled; u8 checksum_enabled; u8 allow_join_initial_addr_port; u8 pm_type; char scheduler[MPTCP_SCHED_NAME_MAX]; }; static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) { return net_generic(net, mptcp_pernet_id); } int mptcp_is_enabled(const struct net *net) { return mptcp_get_pernet(net)->mptcp_enabled; } unsigned int mptcp_get_add_addr_timeout(const struct net *net) { return mptcp_get_pernet(net)->add_addr_timeout; } int mptcp_is_checksum_enabled(const struct net *net) { return mptcp_get_pernet(net)->checksum_enabled; } int mptcp_allow_join_id0(const struct net *net) { return mptcp_get_pernet(net)->allow_join_initial_addr_port; } unsigned int mptcp_stale_loss_cnt(const struct net *net) { return mptcp_get_pernet(net)->stale_loss_cnt; } unsigned int mptcp_close_timeout(const struct sock *sk) { if (sock_flag(sk, SOCK_DEAD)) return TCP_TIMEWAIT_LEN; return mptcp_get_pernet(sock_net(sk))->close_timeout; } int mptcp_get_pm_type(const struct net *net) { return mptcp_get_pernet(net)->pm_type; } const char *mptcp_get_scheduler(const struct net *net) { return mptcp_get_pernet(net)->scheduler; } static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; pernet->close_timeout = TCP_TIMEWAIT_LEN; pernet->checksum_enabled = 0; pernet->allow_join_initial_addr_port = 1; pernet->stale_loss_cnt = 4; pernet->pm_type = MPTCP_PM_TYPE_KERNEL; strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler)); } #ifdef CONFIG_SYSCTL static int mptcp_set_scheduler(const struct net *net, const char *name) { struct mptcp_pernet *pernet = mptcp_get_pernet(net); struct mptcp_sched_ops *sched; int ret = 0; rcu_read_lock(); sched = mptcp_sched_find(name); if (sched) strscpy(pernet->scheduler, name, MPTCP_SCHED_NAME_MAX); else ret = -ENOENT; rcu_read_unlock(); return ret; } static int proc_scheduler(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { const struct net *net = current->nsproxy->net_ns; char val[MPTCP_SCHED_NAME_MAX]; struct ctl_table tbl = { .data = val, .maxlen = MPTCP_SCHED_NAME_MAX, }; int ret; strscpy(val, mptcp_get_scheduler(net), MPTCP_SCHED_NAME_MAX); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) ret = mptcp_set_scheduler(net, val); return ret; } static int proc_available_schedulers(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table tbl = { .maxlen = MPTCP_SCHED_BUF_MAX, }; int ret; tbl.data = kmalloc(tbl.maxlen, GFP_USER); if (!tbl.data) return -ENOMEM; mptcp_get_available_schedulers(tbl.data, MPTCP_SCHED_BUF_MAX); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); kfree(tbl.data); return ret; } static struct ctl_table mptcp_sysctl_table[] = { { .procname = "enabled", .maxlen = sizeof(u8), .mode = 0644, /* users with CAP_NET_ADMIN or root (not and) can change this * value, same as other sysctl or the 'net' tree. */ .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, { .procname = "add_addr_timeout", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "checksum_enabled", .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, { .procname = "allow_join_initial_addr_port", .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, { .procname = "stale_loss_cnt", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec_minmax, }, { .procname = "pm_type", .maxlen = sizeof(u8), .mode = 0644, .proc_handler = proc_dou8vec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = &mptcp_pm_type_max }, { .procname = "scheduler", .maxlen = MPTCP_SCHED_NAME_MAX, .mode = 0644, .proc_handler = proc_scheduler, }, { .procname = "available_schedulers", .maxlen = MPTCP_SCHED_BUF_MAX, .mode = 0644, .proc_handler = proc_available_schedulers, }, { .procname = "close_timeout", .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, }; static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) { struct ctl_table_header *hdr; struct ctl_table *table; table = mptcp_sysctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(mptcp_sysctl_table), GFP_KERNEL); if (!table) goto err_alloc; } table[0].data = &pernet->mptcp_enabled; table[1].data = &pernet->add_addr_timeout; table[2].data = &pernet->checksum_enabled; table[3].data = &pernet->allow_join_initial_addr_port; table[4].data = &pernet->stale_loss_cnt; table[5].data = &pernet->pm_type; table[6].data = &pernet->scheduler; /* table[7] is for available_schedulers which is read-only info */ table[8].data = &pernet->close_timeout; hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, ARRAY_SIZE(mptcp_sysctl_table)); if (!hdr) goto err_reg; pernet->ctl_table_hdr = hdr; return 0; err_reg: if (!net_eq(net, &init_net)) kfree(table); err_alloc: return -ENOMEM; } static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) { const struct ctl_table *table = pernet->ctl_table_hdr->ctl_table_arg; unregister_net_sysctl_table(pernet->ctl_table_hdr); kfree(table); } #else static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) { return 0; } static void mptcp_pernet_del_table(struct mptcp_pernet *pernet) {} #endif /* CONFIG_SYSCTL */ static int __net_init mptcp_net_init(struct net *net) { struct mptcp_pernet *pernet = mptcp_get_pernet(net); mptcp_pernet_set_defaults(pernet); return mptcp_pernet_new_table(net, pernet); } /* Note: the callback will only be called per extra netns */ static void __net_exit mptcp_net_exit(struct net *net) { struct mptcp_pernet *pernet = mptcp_get_pernet(net); mptcp_pernet_del_table(pernet); } static struct pernet_operations mptcp_pernet_ops = { .init = mptcp_net_init, .exit = mptcp_net_exit, .id = &mptcp_pernet_id, .size = sizeof(struct mptcp_pernet), }; void __init mptcp_init(void) { mptcp_join_cookie_init(); mptcp_proto_init(); if (register_pernet_subsys(&mptcp_pernet_ops) < 0) panic("Failed to register MPTCP pernet subsystem.\n"); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) int __init mptcpv6_init(void) { int err; err = mptcp_proto_v6_init(); return err; } #endif
2 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 // SPDX-License-Identifier: GPL-2.0-only /* * IPV4 GSO/GRO offload support * Linux INET implementation * * Copyright (C) 2016 secunet Security Networks AG * Author: Steffen Klassert <steffen.klassert@secunet.com> * * ESP GRO support */ #include <linux/skbuff.h> #include <linux/init.h> #include <net/protocol.h> #include <crypto/aead.h> #include <crypto/authenc.h> #include <linux/err.h> #include <linux/module.h> #include <net/gro.h> #include <net/gso.h> #include <net/ip.h> #include <net/xfrm.h> #include <net/esp.h> #include <linux/scatterlist.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <net/udp.h> static struct sk_buff *esp4_gro_receive(struct list_head *head, struct sk_buff *skb) { int offset = skb_gro_offset(skb); struct xfrm_offload *xo; struct xfrm_state *x; int encap_type = 0; __be32 seq; __be32 spi; if (!pskb_pull(skb, offset)) return NULL; if (xfrm_parse_spi(skb, IPPROTO_ESP, &spi, &seq) != 0) goto out; xo = xfrm_offload(skb); if (!xo || !(xo->flags & CRYPTO_DONE)) { struct sec_path *sp = secpath_set(skb); if (!sp) goto out; if (sp->len == XFRM_MAX_DEPTH) goto out_reset; x = xfrm_state_lookup(dev_net(skb->dev), skb->mark, (xfrm_address_t *)&ip_hdr(skb)->daddr, spi, IPPROTO_ESP, AF_INET); if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) { /* non-offload path will record the error and audit log */ xfrm_state_put(x); x = NULL; } if (!x) goto out_reset; skb->mark = xfrm_smark_get(skb->mark, x); sp->xvec[sp->len++] = x; sp->olen++; xo = xfrm_offload(skb); if (!xo) goto out_reset; } xo->flags |= XFRM_GRO; if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) encap_type = UDP_ENCAP_ESPINUDP; XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); XFRM_SPI_SKB_CB(skb)->seq = seq; /* We don't need to handle errors from xfrm_input, it does all * the error handling and frees the resources on error. */ xfrm_input(skb, IPPROTO_ESP, spi, encap_type); return ERR_PTR(-EINPROGRESS); out_reset: secpath_reset(skb); out: skb_push(skb, offset); NAPI_GRO_CB(skb)->same_flow = 0; NAPI_GRO_CB(skb)->flush = 1; return NULL; } static void esp4_gso_encap(struct xfrm_state *x, struct sk_buff *skb) { struct ip_esp_hdr *esph; struct iphdr *iph = ip_hdr(skb); struct xfrm_offload *xo = xfrm_offload(skb); int proto = iph->protocol; skb_push(skb, -skb_network_offset(skb)); esph = ip_esp_hdr(skb); *skb_mac_header(skb) = IPPROTO_ESP; esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); xo->proto = proto; } static struct sk_buff *xfrm4_tunnel_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { __be16 type = x->inner_mode.family == AF_INET6 ? htons(ETH_P_IPV6) : htons(ETH_P_IP); return skb_eth_gso_segment(skb, features, type); } static struct sk_buff *xfrm4_transport_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { const struct net_offload *ops; struct sk_buff *segs = ERR_PTR(-EINVAL); struct xfrm_offload *xo = xfrm_offload(skb); skb->transport_header += x->props.header_len; ops = rcu_dereference(inet_offloads[xo->proto]); if (likely(ops && ops->callbacks.gso_segment)) segs = ops->callbacks.gso_segment(skb, features); return segs; } static struct sk_buff *xfrm4_beet_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { struct xfrm_offload *xo = xfrm_offload(skb); struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; u8 proto = xo->proto; skb->transport_header += x->props.header_len; if (x->sel.family != AF_INET6) { if (proto == IPPROTO_BEETPH) { struct ip_beet_phdr *ph = (struct ip_beet_phdr *)skb->data; skb->transport_header += ph->hdrlen * 8; proto = ph->nexthdr; } else { skb->transport_header -= IPV4_BEET_PHMAXLEN; } } else { __be16 frag; skb->transport_header += ipv6_skip_exthdr(skb, 0, &proto, &frag); if (proto == IPPROTO_TCP) skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; } if (proto == IPPROTO_IPV6) skb_shinfo(skb)->gso_type |= SKB_GSO_IPXIP4; __skb_pull(skb, skb_transport_offset(skb)); ops = rcu_dereference(inet_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) segs = ops->callbacks.gso_segment(skb, features); return segs; } static struct sk_buff *xfrm4_outer_mode_gso_segment(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { switch (x->outer_mode.encap) { case XFRM_MODE_TUNNEL: return xfrm4_tunnel_gso_segment(x, skb, features); case XFRM_MODE_TRANSPORT: return xfrm4_transport_gso_segment(x, skb, features); case XFRM_MODE_BEET: return xfrm4_beet_gso_segment(x, skb, features); } return ERR_PTR(-EOPNOTSUPP); } static struct sk_buff *esp4_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct xfrm_state *x; struct ip_esp_hdr *esph; struct crypto_aead *aead; netdev_features_t esp_features = features; struct xfrm_offload *xo = xfrm_offload(skb); struct sec_path *sp; if (!xo) return ERR_PTR(-EINVAL); if (!(skb_shinfo(skb)->gso_type & SKB_GSO_ESP)) return ERR_PTR(-EINVAL); sp = skb_sec_path(skb); x = sp->xvec[sp->len - 1]; aead = x->data; esph = ip_esp_hdr(skb); if (esph->spi != x->id.spi) return ERR_PTR(-EINVAL); if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) return ERR_PTR(-EINVAL); __skb_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead)); skb->encap_hdr_csum = 1; if ((!(skb->dev->gso_partial_features & NETIF_F_HW_ESP) && !(features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev) esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC); else if (!(features & NETIF_F_HW_ESP_TX_CSUM) && !(skb->dev->gso_partial_features & NETIF_F_HW_ESP_TX_CSUM)) esp_features = features & ~(NETIF_F_CSUM_MASK | NETIF_F_SCTP_CRC); xo->flags |= XFRM_GSO_SEGMENT; return xfrm4_outer_mode_gso_segment(x, skb, esp_features); } static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) { struct crypto_aead *aead = x->data; struct xfrm_offload *xo = xfrm_offload(skb); if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead))) return -EINVAL; if (!(xo->flags & CRYPTO_DONE)) skb->ip_summed = CHECKSUM_NONE; return esp_input_done2(skb, 0); } static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_t features) { int err; int alen; int blksize; struct xfrm_offload *xo; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct esp_info esp; bool hw_offload = true; __u32 seq; int encap_type = 0; esp.inplace = true; xo = xfrm_offload(skb); if (!xo) return -EINVAL; if ((!(features & NETIF_F_HW_ESP) && !(skb->dev->gso_partial_features & NETIF_F_HW_ESP)) || x->xso.dev != skb->dev) { xo->flags |= CRYPTO_FALLBACK; hw_offload = false; } esp.proto = xo->proto; /* skb is pure payload to encrypt */ aead = x->data; alen = crypto_aead_authsize(aead); esp.tfclen = 0; /* XXX: Add support for tfc padding here. */ blksize = ALIGN(crypto_aead_blocksize(aead), 4); esp.clen = ALIGN(skb->len + 2 + esp.tfclen, blksize); esp.plen = esp.clen - skb->len - esp.tfclen; esp.tailen = esp.tfclen + esp.plen + alen; esp.esph = ip_esp_hdr(skb); if (x->encap) encap_type = x->encap->encap_type; if (!hw_offload || !skb_is_gso(skb) || (hw_offload && encap_type == UDP_ENCAP_ESPINUDP)) { esp.nfrags = esp_output_head(x, skb, &esp); if (esp.nfrags < 0) return esp.nfrags; } seq = xo->seq.low; esph = esp.esph; esph->spi = x->id.spi; skb_push(skb, -skb_network_offset(skb)); if (xo->flags & XFRM_GSO_SEGMENT) { esph->seq_no = htonl(seq); if (!skb_is_gso(skb)) xo->seq.low++; else xo->seq.low += skb_shinfo(skb)->gso_segs; } if (xo->seq.low < seq) xo->seq.hi++; esp.seqno = cpu_to_be64(seq + ((u64)xo->seq.hi << 32)); if (hw_offload && encap_type == UDP_ENCAP_ESPINUDP) { /* In the XFRM stack, the encapsulation protocol is set to iphdr->protocol by * setting *skb_mac_header(skb) (see esp_output_udp_encap()) where skb->mac_header * points to iphdr->protocol (see xfrm4_tunnel_encap_add()). * However, in esp_xmit(), skb->mac_header doesn't point to iphdr->protocol. * Therefore, the protocol field needs to be corrected. */ ip_hdr(skb)->protocol = IPPROTO_UDP; esph->seq_no = htonl(seq); } ip_hdr(skb)->tot_len = htons(skb->len); ip_send_check(ip_hdr(skb)); if (hw_offload) { if (!skb_ext_add(skb, SKB_EXT_SEC_PATH)) return -ENOMEM; xo = xfrm_offload(skb); if (!xo) return -EINVAL; xo->flags |= XFRM_XMIT; return 0; } err = esp_output_tail(x, skb, &esp); if (err) return err; secpath_reset(skb); if (skb_needs_linearize(skb, skb->dev->features) && __skb_linearize(skb)) return -ENOMEM; return 0; } static const struct net_offload esp4_offload = { .callbacks = { .gro_receive = esp4_gro_receive, .gso_segment = esp4_gso_segment, }, }; static const struct xfrm_type_offload esp_type_offload = { .owner = THIS_MODULE, .proto = IPPROTO_ESP, .input_tail = esp_input_tail, .xmit = esp_xmit, .encap = esp4_gso_encap, }; static int __init esp4_offload_init(void) { if (xfrm_register_type_offload(&esp_type_offload, AF_INET) < 0) { pr_info("%s: can't add xfrm type offload\n", __func__); return -EAGAIN; } return inet_add_offload(&esp4_offload, IPPROTO_ESP); } static void __exit esp4_offload_exit(void) { xfrm_unregister_type_offload(&esp_type_offload, AF_INET); inet_del_offload(&esp4_offload, IPPROTO_ESP); } module_init(esp4_offload_init); module_exit(esp4_offload_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP); MODULE_DESCRIPTION("IPV4 GSO/GRO offload support");
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_SMT_H #define _LINUX_SCHED_SMT_H #include <linux/static_key.h> #ifdef CONFIG_SCHED_SMT extern struct static_key_false sched_smt_present; static __always_inline bool sched_smt_active(void) { return static_branch_likely(&sched_smt_present); } #else static inline bool sched_smt_active(void) { return false; } #endif void arch_smt_update(void); #endif /* _LINUX_SCHED_SMT_H */
5 5 3 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * SM4, as specified in * https://tools.ietf.org/id/draft-ribose-cfrg-sm4-10.html * * Copyright (C) 2018 ARM Limited or its affiliates. * Copyright (c) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> */ #include <linux/module.h> #include <asm/unaligned.h> #include <crypto/sm4.h> static const u32 ____cacheline_aligned fk[4] = { 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc }; static const u32 ____cacheline_aligned ck[32] = { 0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269, 0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9, 0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249, 0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9, 0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229, 0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299, 0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209, 0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279 }; static const u8 ____cacheline_aligned sbox[256] = { 0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7, 0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05, 0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3, 0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99, 0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a, 0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62, 0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95, 0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6, 0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba, 0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8, 0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b, 0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35, 0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2, 0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87, 0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52, 0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e, 0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5, 0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1, 0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55, 0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3, 0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60, 0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f, 0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f, 0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51, 0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f, 0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8, 0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd, 0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0, 0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e, 0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84, 0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20, 0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48 }; extern const u32 crypto_sm4_fk[4] __alias(fk); extern const u32 crypto_sm4_ck[32] __alias(ck); extern const u8 crypto_sm4_sbox[256] __alias(sbox); EXPORT_SYMBOL(crypto_sm4_fk); EXPORT_SYMBOL(crypto_sm4_ck); EXPORT_SYMBOL(crypto_sm4_sbox); static inline u32 sm4_t_non_lin_sub(u32 x) { u32 out; out = (u32)sbox[x & 0xff]; out |= (u32)sbox[(x >> 8) & 0xff] << 8; out |= (u32)sbox[(x >> 16) & 0xff] << 16; out |= (u32)sbox[(x >> 24) & 0xff] << 24; return out; } static inline u32 sm4_key_lin_sub(u32 x) { return x ^ rol32(x, 13) ^ rol32(x, 23); } static inline u32 sm4_enc_lin_sub(u32 x) { return x ^ rol32(x, 2) ^ rol32(x, 10) ^ rol32(x, 18) ^ rol32(x, 24); } static inline u32 sm4_key_sub(u32 x) { return sm4_key_lin_sub(sm4_t_non_lin_sub(x)); } static inline u32 sm4_enc_sub(u32 x) { return sm4_enc_lin_sub(sm4_t_non_lin_sub(x)); } static inline u32 sm4_round(u32 x0, u32 x1, u32 x2, u32 x3, u32 rk) { return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk); } /** * sm4_expandkey - Expands the SM4 key as described in GB/T 32907-2016 * @ctx: The location where the computed key will be stored. * @in_key: The supplied key. * @key_len: The length of the supplied key. * * Returns 0 on success. The function fails only if an invalid key size (or * pointer) is supplied. */ int sm4_expandkey(struct sm4_ctx *ctx, const u8 *in_key, unsigned int key_len) { u32 rk[4]; const u32 *key = (u32 *)in_key; int i; if (key_len != SM4_KEY_SIZE) return -EINVAL; rk[0] = get_unaligned_be32(&key[0]) ^ fk[0]; rk[1] = get_unaligned_be32(&key[1]) ^ fk[1]; rk[2] = get_unaligned_be32(&key[2]) ^ fk[2]; rk[3] = get_unaligned_be32(&key[3]) ^ fk[3]; for (i = 0; i < 32; i += 4) { rk[0] ^= sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]); rk[1] ^= sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]); rk[2] ^= sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]); rk[3] ^= sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]); ctx->rkey_enc[i + 0] = rk[0]; ctx->rkey_enc[i + 1] = rk[1]; ctx->rkey_enc[i + 2] = rk[2]; ctx->rkey_enc[i + 3] = rk[3]; ctx->rkey_dec[31 - 0 - i] = rk[0]; ctx->rkey_dec[31 - 1 - i] = rk[1]; ctx->rkey_dec[31 - 2 - i] = rk[2]; ctx->rkey_dec[31 - 3 - i] = rk[3]; } return 0; } EXPORT_SYMBOL_GPL(sm4_expandkey); /** * sm4_crypt_block - Encrypt or decrypt a single SM4 block * @rk: The rkey_enc for encrypt or rkey_dec for decrypt * @out: Buffer to store output data * @in: Buffer containing the input data */ void sm4_crypt_block(const u32 *rk, u8 *out, const u8 *in) { u32 x[4], i; x[0] = get_unaligned_be32(in + 0 * 4); x[1] = get_unaligned_be32(in + 1 * 4); x[2] = get_unaligned_be32(in + 2 * 4); x[3] = get_unaligned_be32(in + 3 * 4); for (i = 0; i < 32; i += 4) { x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]); x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]); x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]); x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]); } put_unaligned_be32(x[3 - 0], out + 0 * 4); put_unaligned_be32(x[3 - 1], out + 1 * 4); put_unaligned_be32(x[3 - 2], out + 2 * 4); put_unaligned_be32(x[3 - 3], out + 3 * 4); } EXPORT_SYMBOL_GPL(sm4_crypt_block); MODULE_DESCRIPTION("Generic SM4 library"); MODULE_LICENSE("GPL v2");
95 7 80 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* SCTP kernel reference Implementation * Copyright (c) 1999-2001 Motorola, Inc. * Copyright (c) 2001-2003 International Business Machines, Corp. * * This file is part of the SCTP kernel reference Implementation * * SCTP Checksum functions * * Please send any bug reports or fixes you make to the * email address(es): * lksctp developers <linux-sctp@vger.kernel.org> * * Written or modified by: * Dinakaran Joseph * Jon Grimm <jgrimm@us.ibm.com> * Sridhar Samudrala <sri@us.ibm.com> * * Rewritten to use libcrc32c by: * Vlad Yasevich <vladislav.yasevich@hp.com> */ #ifndef __sctp_checksum_h__ #define __sctp_checksum_h__ #include <linux/types.h> #include <linux/sctp.h> #include <linux/crc32c.h> #include <linux/crc32.h> static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum) { /* This uses the crypto implementation of crc32c, which is either * implemented w/ hardware support or resolves to __crc32c_le(). */ return (__force __wsum)crc32c((__force __u32)sum, buff, len); } static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2, int offset, int len) { return (__force __wsum)__crc32c_le_combine((__force __u32)csum, (__force __u32)csum2, len); } static const struct skb_checksum_ops sctp_csum_ops = { .update = sctp_csum_update, .combine = sctp_csum_combine, }; static inline __le32 sctp_compute_cksum(const struct sk_buff *skb, unsigned int offset) { struct sctphdr *sh = (struct sctphdr *)(skb->data + offset); __le32 old = sh->checksum; __wsum new; sh->checksum = 0; new = ~__skb_checksum(skb, offset, skb->len - offset, ~(__wsum)0, &sctp_csum_ops); sh->checksum = old; return cpu_to_le32((__force __u32)new); } #endif /* __sctp_checksum_h__ */
12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_INCLUDE_PATH ../../drivers/dma-buf #define TRACE_SYSTEM sync_trace #if !defined(_TRACE_SYNC_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_SYNC_H #include "sync_debug.h" #include <linux/tracepoint.h> TRACE_EVENT(sync_timeline, TP_PROTO(struct sync_timeline *timeline), TP_ARGS(timeline), TP_STRUCT__entry( __string(name, timeline->name) __field(u32, value) ), TP_fast_assign( __assign_str(name); __entry->value = timeline->value; ), TP_printk("name=%s value=%d", __get_str(name), __entry->value) ); #endif /* if !defined(_TRACE_SYNC_H) || defined(TRACE_HEADER_MULTI_READ) */ /* This part must be outside protection */ #include <trace/define_trace.h>
4 4 8 5 6 6 6 6 6 6 6 1148 1146 13 19 13 12 6 6 9 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 // SPDX-License-Identifier: GPL-2.0-only #include <linux/ethtool_netlink.h> #include <linux/net_tstamp.h> #include <linux/phy.h> #include <linux/rtnetlink.h> #include <linux/ptp_clock_kernel.h> #include "common.h" const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = { [NETIF_F_SG_BIT] = "tx-scatter-gather", [NETIF_F_IP_CSUM_BIT] = "tx-checksum-ipv4", [NETIF_F_HW_CSUM_BIT] = "tx-checksum-ip-generic", [NETIF_F_IPV6_CSUM_BIT] = "tx-checksum-ipv6", [NETIF_F_HIGHDMA_BIT] = "highdma", [NETIF_F_FRAGLIST_BIT] = "tx-scatter-gather-fraglist", [NETIF_F_HW_VLAN_CTAG_TX_BIT] = "tx-vlan-hw-insert", [NETIF_F_HW_VLAN_CTAG_RX_BIT] = "rx-vlan-hw-parse", [NETIF_F_HW_VLAN_CTAG_FILTER_BIT] = "rx-vlan-filter", [NETIF_F_HW_VLAN_STAG_TX_BIT] = "tx-vlan-stag-hw-insert", [NETIF_F_HW_VLAN_STAG_RX_BIT] = "rx-vlan-stag-hw-parse", [NETIF_F_HW_VLAN_STAG_FILTER_BIT] = "rx-vlan-stag-filter", [NETIF_F_VLAN_CHALLENGED_BIT] = "vlan-challenged", [NETIF_F_GSO_BIT] = "tx-generic-segmentation", [NETIF_F_LLTX_BIT] = "tx-lockless", [NETIF_F_NETNS_LOCAL_BIT] = "netns-local", [NETIF_F_GRO_BIT] = "rx-gro", [NETIF_F_GRO_HW_BIT] = "rx-gro-hw", [NETIF_F_LRO_BIT] = "rx-lro", [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", [NETIF_F_TSO6_BIT] = "tx-tcp6-segmentation", [NETIF_F_FSO_BIT] = "tx-fcoe-segmentation", [NETIF_F_GSO_GRE_BIT] = "tx-gre-segmentation", [NETIF_F_GSO_GRE_CSUM_BIT] = "tx-gre-csum-segmentation", [NETIF_F_GSO_IPXIP4_BIT] = "tx-ipxip4-segmentation", [NETIF_F_GSO_IPXIP6_BIT] = "tx-ipxip6-segmentation", [NETIF_F_GSO_UDP_TUNNEL_BIT] = "tx-udp_tnl-segmentation", [NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT] = "tx-udp_tnl-csum-segmentation", [NETIF_F_GSO_PARTIAL_BIT] = "tx-gso-partial", [NETIF_F_GSO_TUNNEL_REMCSUM_BIT] = "tx-tunnel-remcsum-segmentation", [NETIF_F_GSO_SCTP_BIT] = "tx-sctp-segmentation", [NETIF_F_GSO_ESP_BIT] = "tx-esp-segmentation", [NETIF_F_GSO_UDP_L4_BIT] = "tx-udp-segmentation", [NETIF_F_GSO_FRAGLIST_BIT] = "tx-gso-list", [NETIF_F_FCOE_CRC_BIT] = "tx-checksum-fcoe-crc", [NETIF_F_SCTP_CRC_BIT] = "tx-checksum-sctp", [NETIF_F_FCOE_MTU_BIT] = "fcoe-mtu", [NETIF_F_NTUPLE_BIT] = "rx-ntuple-filter", [NETIF_F_RXHASH_BIT] = "rx-hashing", [NETIF_F_RXCSUM_BIT] = "rx-checksum", [NETIF_F_NOCACHE_COPY_BIT] = "tx-nocache-copy", [NETIF_F_LOOPBACK_BIT] = "loopback", [NETIF_F_RXFCS_BIT] = "rx-fcs", [NETIF_F_RXALL_BIT] = "rx-all", [NETIF_F_HW_L2FW_DOFFLOAD_BIT] = "l2-fwd-offload", [NETIF_F_HW_TC_BIT] = "hw-tc-offload", [NETIF_F_HW_ESP_BIT] = "esp-hw-offload", [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload", [NETIF_F_HW_TLS_RECORD_BIT] = "tls-hw-record", [NETIF_F_HW_TLS_TX_BIT] = "tls-hw-tx-offload", [NETIF_F_HW_TLS_RX_BIT] = "tls-hw-rx-offload", [NETIF_F_GRO_FRAGLIST_BIT] = "rx-gro-list", [NETIF_F_HW_MACSEC_BIT] = "macsec-hw-offload", [NETIF_F_GRO_UDP_FWD_BIT] = "rx-udp-gro-forwarding", [NETIF_F_HW_HSR_TAG_INS_BIT] = "hsr-tag-ins-offload", [NETIF_F_HW_HSR_TAG_RM_BIT] = "hsr-tag-rm-offload", [NETIF_F_HW_HSR_FWD_BIT] = "hsr-fwd-offload", [NETIF_F_HW_HSR_DUP_BIT] = "hsr-dup-offload", }; const char rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = { [ETH_RSS_HASH_TOP_BIT] = "toeplitz", [ETH_RSS_HASH_XOR_BIT] = "xor", [ETH_RSS_HASH_CRC32_BIT] = "crc32", }; const char tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_ID_UNSPEC] = "Unspec", [ETHTOOL_RX_COPYBREAK] = "rx-copybreak", [ETHTOOL_TX_COPYBREAK] = "tx-copybreak", [ETHTOOL_PFC_PREVENTION_TOUT] = "pfc-prevention-tout", [ETHTOOL_TX_COPYBREAK_BUF_SIZE] = "tx-copybreak-buf-size", }; const char phy_tunable_strings[__ETHTOOL_PHY_TUNABLE_COUNT][ETH_GSTRING_LEN] = { [ETHTOOL_ID_UNSPEC] = "Unspec", [ETHTOOL_PHY_DOWNSHIFT] = "phy-downshift", [ETHTOOL_PHY_FAST_LINK_DOWN] = "phy-fast-link-down", [ETHTOOL_PHY_EDPD] = "phy-energy-detect-power-down", }; #define __LINK_MODE_NAME(speed, type, duplex) \ #speed "base" #type "/" #duplex #define __DEFINE_LINK_MODE_NAME(speed, type, duplex) \ [ETHTOOL_LINK_MODE(speed, type, duplex)] = \ __LINK_MODE_NAME(speed, type, duplex) #define __DEFINE_SPECIAL_MODE_NAME(_mode, _name) \ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = _name const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(10, T, Half), __DEFINE_LINK_MODE_NAME(10, T, Full), __DEFINE_LINK_MODE_NAME(100, T, Half), __DEFINE_LINK_MODE_NAME(100, T, Full), __DEFINE_LINK_MODE_NAME(1000, T, Half), __DEFINE_LINK_MODE_NAME(1000, T, Full), __DEFINE_SPECIAL_MODE_NAME(Autoneg, "Autoneg"), __DEFINE_SPECIAL_MODE_NAME(TP, "TP"), __DEFINE_SPECIAL_MODE_NAME(AUI, "AUI"), __DEFINE_SPECIAL_MODE_NAME(MII, "MII"), __DEFINE_SPECIAL_MODE_NAME(FIBRE, "FIBRE"), __DEFINE_SPECIAL_MODE_NAME(BNC, "BNC"), __DEFINE_LINK_MODE_NAME(10000, T, Full), __DEFINE_SPECIAL_MODE_NAME(Pause, "Pause"), __DEFINE_SPECIAL_MODE_NAME(Asym_Pause, "Asym_Pause"), __DEFINE_LINK_MODE_NAME(2500, X, Full), __DEFINE_SPECIAL_MODE_NAME(Backplane, "Backplane"), __DEFINE_LINK_MODE_NAME(1000, KX, Full), __DEFINE_LINK_MODE_NAME(10000, KX4, Full), __DEFINE_LINK_MODE_NAME(10000, KR, Full), __DEFINE_SPECIAL_MODE_NAME(10000baseR_FEC, "10000baseR_FEC"), __DEFINE_LINK_MODE_NAME(20000, MLD2, Full), __DEFINE_LINK_MODE_NAME(20000, KR2, Full), __DEFINE_LINK_MODE_NAME(40000, KR4, Full), __DEFINE_LINK_MODE_NAME(40000, CR4, Full), __DEFINE_LINK_MODE_NAME(40000, SR4, Full), __DEFINE_LINK_MODE_NAME(40000, LR4, Full), __DEFINE_LINK_MODE_NAME(56000, KR4, Full), __DEFINE_LINK_MODE_NAME(56000, CR4, Full), __DEFINE_LINK_MODE_NAME(56000, SR4, Full), __DEFINE_LINK_MODE_NAME(56000, LR4, Full), __DEFINE_LINK_MODE_NAME(25000, CR, Full), __DEFINE_LINK_MODE_NAME(25000, KR, Full), __DEFINE_LINK_MODE_NAME(25000, SR, Full), __DEFINE_LINK_MODE_NAME(50000, CR2, Full), __DEFINE_LINK_MODE_NAME(50000, KR2, Full), __DEFINE_LINK_MODE_NAME(100000, KR4, Full), __DEFINE_LINK_MODE_NAME(100000, SR4, Full), __DEFINE_LINK_MODE_NAME(100000, CR4, Full), __DEFINE_LINK_MODE_NAME(100000, LR4_ER4, Full), __DEFINE_LINK_MODE_NAME(50000, SR2, Full), __DEFINE_LINK_MODE_NAME(1000, X, Full), __DEFINE_LINK_MODE_NAME(10000, CR, Full), __DEFINE_LINK_MODE_NAME(10000, SR, Full), __DEFINE_LINK_MODE_NAME(10000, LR, Full), __DEFINE_LINK_MODE_NAME(10000, LRM, Full), __DEFINE_LINK_MODE_NAME(10000, ER, Full), __DEFINE_LINK_MODE_NAME(2500, T, Full), __DEFINE_LINK_MODE_NAME(5000, T, Full), __DEFINE_SPECIAL_MODE_NAME(FEC_NONE, "None"), __DEFINE_SPECIAL_MODE_NAME(FEC_RS, "RS"), __DEFINE_SPECIAL_MODE_NAME(FEC_BASER, "BASER"), __DEFINE_LINK_MODE_NAME(50000, KR, Full), __DEFINE_LINK_MODE_NAME(50000, SR, Full), __DEFINE_LINK_MODE_NAME(50000, CR, Full), __DEFINE_LINK_MODE_NAME(50000, LR_ER_FR, Full), __DEFINE_LINK_MODE_NAME(50000, DR, Full), __DEFINE_LINK_MODE_NAME(100000, KR2, Full), __DEFINE_LINK_MODE_NAME(100000, SR2, Full), __DEFINE_LINK_MODE_NAME(100000, CR2, Full), __DEFINE_LINK_MODE_NAME(100000, LR2_ER2_FR2, Full), __DEFINE_LINK_MODE_NAME(100000, DR2, Full), __DEFINE_LINK_MODE_NAME(200000, KR4, Full), __DEFINE_LINK_MODE_NAME(200000, SR4, Full), __DEFINE_LINK_MODE_NAME(200000, LR4_ER4_FR4, Full), __DEFINE_LINK_MODE_NAME(200000, DR4, Full), __DEFINE_LINK_MODE_NAME(200000, CR4, Full), __DEFINE_LINK_MODE_NAME(100, T1, Full), __DEFINE_LINK_MODE_NAME(1000, T1, Full), __DEFINE_LINK_MODE_NAME(400000, KR8, Full), __DEFINE_LINK_MODE_NAME(400000, SR8, Full), __DEFINE_LINK_MODE_NAME(400000, LR8_ER8_FR8, Full), __DEFINE_LINK_MODE_NAME(400000, DR8, Full), __DEFINE_LINK_MODE_NAME(400000, CR8, Full), __DEFINE_SPECIAL_MODE_NAME(FEC_LLRS, "LLRS"), __DEFINE_LINK_MODE_NAME(100000, KR, Full), __DEFINE_LINK_MODE_NAME(100000, SR, Full), __DEFINE_LINK_MODE_NAME(100000, LR_ER_FR, Full), __DEFINE_LINK_MODE_NAME(100000, DR, Full), __DEFINE_LINK_MODE_NAME(100000, CR, Full), __DEFINE_LINK_MODE_NAME(200000, KR2, Full), __DEFINE_LINK_MODE_NAME(200000, SR2, Full), __DEFINE_LINK_MODE_NAME(200000, LR2_ER2_FR2, Full), __DEFINE_LINK_MODE_NAME(200000, DR2, Full), __DEFINE_LINK_MODE_NAME(200000, CR2, Full), __DEFINE_LINK_MODE_NAME(400000, KR4, Full), __DEFINE_LINK_MODE_NAME(400000, SR4, Full), __DEFINE_LINK_MODE_NAME(400000, LR4_ER4_FR4, Full), __DEFINE_LINK_MODE_NAME(400000, DR4, Full), __DEFINE_LINK_MODE_NAME(400000, CR4, Full), __DEFINE_LINK_MODE_NAME(100, FX, Half), __DEFINE_LINK_MODE_NAME(100, FX, Full), __DEFINE_LINK_MODE_NAME(10, T1L, Full), __DEFINE_LINK_MODE_NAME(800000, CR8, Full), __DEFINE_LINK_MODE_NAME(800000, KR8, Full), __DEFINE_LINK_MODE_NAME(800000, DR8, Full), __DEFINE_LINK_MODE_NAME(800000, DR8_2, Full), __DEFINE_LINK_MODE_NAME(800000, SR8, Full), __DEFINE_LINK_MODE_NAME(800000, VR8, Full), __DEFINE_LINK_MODE_NAME(10, T1S, Full), __DEFINE_LINK_MODE_NAME(10, T1S, Half), __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_NAME(10, T1BRR, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_CR 1 #define __LINK_MODE_LANES_CR2 2 #define __LINK_MODE_LANES_CR4 4 #define __LINK_MODE_LANES_CR8 8 #define __LINK_MODE_LANES_DR 1 #define __LINK_MODE_LANES_DR2 2 #define __LINK_MODE_LANES_DR4 4 #define __LINK_MODE_LANES_DR8 8 #define __LINK_MODE_LANES_KR 1 #define __LINK_MODE_LANES_KR2 2 #define __LINK_MODE_LANES_KR4 4 #define __LINK_MODE_LANES_KR8 8 #define __LINK_MODE_LANES_SR 1 #define __LINK_MODE_LANES_SR2 2 #define __LINK_MODE_LANES_SR4 4 #define __LINK_MODE_LANES_SR8 8 #define __LINK_MODE_LANES_ER 1 #define __LINK_MODE_LANES_KX 1 #define __LINK_MODE_LANES_KX4 4 #define __LINK_MODE_LANES_LR 1 #define __LINK_MODE_LANES_LR4 4 #define __LINK_MODE_LANES_LR4_ER4 4 #define __LINK_MODE_LANES_LR_ER_FR 1 #define __LINK_MODE_LANES_LR2_ER2_FR2 2 #define __LINK_MODE_LANES_LR4_ER4_FR4 4 #define __LINK_MODE_LANES_LR8_ER8_FR8 8 #define __LINK_MODE_LANES_LRM 1 #define __LINK_MODE_LANES_MLD2 2 #define __LINK_MODE_LANES_T 1 #define __LINK_MODE_LANES_T1 1 #define __LINK_MODE_LANES_X 1 #define __LINK_MODE_LANES_FX 1 #define __LINK_MODE_LANES_T1L 1 #define __LINK_MODE_LANES_T1S 1 #define __LINK_MODE_LANES_T1S_P2MP 1 #define __LINK_MODE_LANES_VR8 8 #define __LINK_MODE_LANES_DR8_2 8 #define __LINK_MODE_LANES_T1BRR 1 #define __DEFINE_LINK_MODE_PARAMS(_speed, _type, _duplex) \ [ETHTOOL_LINK_MODE(_speed, _type, _duplex)] = { \ .speed = SPEED_ ## _speed, \ .lanes = __LINK_MODE_LANES_ ## _type, \ .duplex = __DUPLEX_ ## _duplex \ } #define __DUPLEX_Half DUPLEX_HALF #define __DUPLEX_Full DUPLEX_FULL #define __DEFINE_SPECIAL_MODE_PARAMS(_mode) \ [ETHTOOL_LINK_MODE_ ## _mode ## _BIT] = { \ .speed = SPEED_UNKNOWN, \ .lanes = 0, \ .duplex = DUPLEX_UNKNOWN, \ } const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(10, T, Half), __DEFINE_LINK_MODE_PARAMS(10, T, Full), __DEFINE_LINK_MODE_PARAMS(100, T, Half), __DEFINE_LINK_MODE_PARAMS(100, T, Full), __DEFINE_LINK_MODE_PARAMS(1000, T, Half), __DEFINE_LINK_MODE_PARAMS(1000, T, Full), __DEFINE_SPECIAL_MODE_PARAMS(Autoneg), __DEFINE_SPECIAL_MODE_PARAMS(TP), __DEFINE_SPECIAL_MODE_PARAMS(AUI), __DEFINE_SPECIAL_MODE_PARAMS(MII), __DEFINE_SPECIAL_MODE_PARAMS(FIBRE), __DEFINE_SPECIAL_MODE_PARAMS(BNC), __DEFINE_LINK_MODE_PARAMS(10000, T, Full), __DEFINE_SPECIAL_MODE_PARAMS(Pause), __DEFINE_SPECIAL_MODE_PARAMS(Asym_Pause), __DEFINE_LINK_MODE_PARAMS(2500, X, Full), __DEFINE_SPECIAL_MODE_PARAMS(Backplane), __DEFINE_LINK_MODE_PARAMS(1000, KX, Full), __DEFINE_LINK_MODE_PARAMS(10000, KX4, Full), __DEFINE_LINK_MODE_PARAMS(10000, KR, Full), [ETHTOOL_LINK_MODE_10000baseR_FEC_BIT] = { .speed = SPEED_10000, .lanes = 1, .duplex = DUPLEX_FULL, }, __DEFINE_LINK_MODE_PARAMS(20000, MLD2, Full), __DEFINE_LINK_MODE_PARAMS(20000, KR2, Full), __DEFINE_LINK_MODE_PARAMS(40000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(40000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(40000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(40000, LR4, Full), __DEFINE_LINK_MODE_PARAMS(56000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(56000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(56000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(56000, LR4, Full), __DEFINE_LINK_MODE_PARAMS(25000, CR, Full), __DEFINE_LINK_MODE_PARAMS(25000, KR, Full), __DEFINE_LINK_MODE_PARAMS(25000, SR, Full), __DEFINE_LINK_MODE_PARAMS(50000, CR2, Full), __DEFINE_LINK_MODE_PARAMS(50000, KR2, Full), __DEFINE_LINK_MODE_PARAMS(100000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(100000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(100000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(100000, LR4_ER4, Full), __DEFINE_LINK_MODE_PARAMS(50000, SR2, Full), __DEFINE_LINK_MODE_PARAMS(1000, X, Full), __DEFINE_LINK_MODE_PARAMS(10000, CR, Full), __DEFINE_LINK_MODE_PARAMS(10000, SR, Full), __DEFINE_LINK_MODE_PARAMS(10000, LR, Full), __DEFINE_LINK_MODE_PARAMS(10000, LRM, Full), __DEFINE_LINK_MODE_PARAMS(10000, ER, Full), __DEFINE_LINK_MODE_PARAMS(2500, T, Full), __DEFINE_LINK_MODE_PARAMS(5000, T, Full), __DEFINE_SPECIAL_MODE_PARAMS(FEC_NONE), __DEFINE_SPECIAL_MODE_PARAMS(FEC_RS), __DEFINE_SPECIAL_MODE_PARAMS(FEC_BASER), __DEFINE_LINK_MODE_PARAMS(50000, KR, Full), __DEFINE_LINK_MODE_PARAMS(50000, SR, Full), __DEFINE_LINK_MODE_PARAMS(50000, CR, Full), __DEFINE_LINK_MODE_PARAMS(50000, LR_ER_FR, Full), __DEFINE_LINK_MODE_PARAMS(50000, DR, Full), __DEFINE_LINK_MODE_PARAMS(100000, KR2, Full), __DEFINE_LINK_MODE_PARAMS(100000, SR2, Full), __DEFINE_LINK_MODE_PARAMS(100000, CR2, Full), __DEFINE_LINK_MODE_PARAMS(100000, LR2_ER2_FR2, Full), __DEFINE_LINK_MODE_PARAMS(100000, DR2, Full), __DEFINE_LINK_MODE_PARAMS(200000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(200000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(200000, LR4_ER4_FR4, Full), __DEFINE_LINK_MODE_PARAMS(200000, DR4, Full), __DEFINE_LINK_MODE_PARAMS(200000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(100, T1, Full), __DEFINE_LINK_MODE_PARAMS(1000, T1, Full), __DEFINE_LINK_MODE_PARAMS(400000, KR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, SR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, LR8_ER8_FR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full), __DEFINE_SPECIAL_MODE_PARAMS(FEC_LLRS), __DEFINE_LINK_MODE_PARAMS(100000, KR, Full), __DEFINE_LINK_MODE_PARAMS(100000, SR, Full), __DEFINE_LINK_MODE_PARAMS(100000, LR_ER_FR, Full), __DEFINE_LINK_MODE_PARAMS(100000, DR, Full), __DEFINE_LINK_MODE_PARAMS(100000, CR, Full), __DEFINE_LINK_MODE_PARAMS(200000, KR2, Full), __DEFINE_LINK_MODE_PARAMS(200000, SR2, Full), __DEFINE_LINK_MODE_PARAMS(200000, LR2_ER2_FR2, Full), __DEFINE_LINK_MODE_PARAMS(200000, DR2, Full), __DEFINE_LINK_MODE_PARAMS(200000, CR2, Full), __DEFINE_LINK_MODE_PARAMS(400000, KR4, Full), __DEFINE_LINK_MODE_PARAMS(400000, SR4, Full), __DEFINE_LINK_MODE_PARAMS(400000, LR4_ER4_FR4, Full), __DEFINE_LINK_MODE_PARAMS(400000, DR4, Full), __DEFINE_LINK_MODE_PARAMS(400000, CR4, Full), __DEFINE_LINK_MODE_PARAMS(100, FX, Half), __DEFINE_LINK_MODE_PARAMS(100, FX, Full), __DEFINE_LINK_MODE_PARAMS(10, T1L, Full), __DEFINE_LINK_MODE_PARAMS(800000, CR8, Full), __DEFINE_LINK_MODE_PARAMS(800000, KR8, Full), __DEFINE_LINK_MODE_PARAMS(800000, DR8, Full), __DEFINE_LINK_MODE_PARAMS(800000, DR8_2, Full), __DEFINE_LINK_MODE_PARAMS(800000, SR8, Full), __DEFINE_LINK_MODE_PARAMS(800000, VR8, Full), __DEFINE_LINK_MODE_PARAMS(10, T1S, Full), __DEFINE_LINK_MODE_PARAMS(10, T1S, Half), __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); const char netif_msg_class_names[][ETH_GSTRING_LEN] = { [NETIF_MSG_DRV_BIT] = "drv", [NETIF_MSG_PROBE_BIT] = "probe", [NETIF_MSG_LINK_BIT] = "link", [NETIF_MSG_TIMER_BIT] = "timer", [NETIF_MSG_IFDOWN_BIT] = "ifdown", [NETIF_MSG_IFUP_BIT] = "ifup", [NETIF_MSG_RX_ERR_BIT] = "rx_err", [NETIF_MSG_TX_ERR_BIT] = "tx_err", [NETIF_MSG_TX_QUEUED_BIT] = "tx_queued", [NETIF_MSG_INTR_BIT] = "intr", [NETIF_MSG_TX_DONE_BIT] = "tx_done", [NETIF_MSG_RX_STATUS_BIT] = "rx_status", [NETIF_MSG_PKTDATA_BIT] = "pktdata", [NETIF_MSG_HW_BIT] = "hw", [NETIF_MSG_WOL_BIT] = "wol", }; static_assert(ARRAY_SIZE(netif_msg_class_names) == NETIF_MSG_CLASS_COUNT); const char wol_mode_names[][ETH_GSTRING_LEN] = { [const_ilog2(WAKE_PHY)] = "phy", [const_ilog2(WAKE_UCAST)] = "ucast", [const_ilog2(WAKE_MCAST)] = "mcast", [const_ilog2(WAKE_BCAST)] = "bcast", [const_ilog2(WAKE_ARP)] = "arp", [const_ilog2(WAKE_MAGIC)] = "magic", [const_ilog2(WAKE_MAGICSECURE)] = "magicsecure", [const_ilog2(WAKE_FILTER)] = "filter", }; static_assert(ARRAY_SIZE(wol_mode_names) == WOL_MODE_COUNT); const char sof_timestamping_names[][ETH_GSTRING_LEN] = { [const_ilog2(SOF_TIMESTAMPING_TX_HARDWARE)] = "hardware-transmit", [const_ilog2(SOF_TIMESTAMPING_TX_SOFTWARE)] = "software-transmit", [const_ilog2(SOF_TIMESTAMPING_RX_HARDWARE)] = "hardware-receive", [const_ilog2(SOF_TIMESTAMPING_RX_SOFTWARE)] = "software-receive", [const_ilog2(SOF_TIMESTAMPING_SOFTWARE)] = "software-system-clock", [const_ilog2(SOF_TIMESTAMPING_SYS_HARDWARE)] = "hardware-legacy-clock", [const_ilog2(SOF_TIMESTAMPING_RAW_HARDWARE)] = "hardware-raw-clock", [const_ilog2(SOF_TIMESTAMPING_OPT_ID)] = "option-id", [const_ilog2(SOF_TIMESTAMPING_TX_SCHED)] = "sched-transmit", [const_ilog2(SOF_TIMESTAMPING_TX_ACK)] = "ack-transmit", [const_ilog2(SOF_TIMESTAMPING_OPT_CMSG)] = "option-cmsg", [const_ilog2(SOF_TIMESTAMPING_OPT_TSONLY)] = "option-tsonly", [const_ilog2(SOF_TIMESTAMPING_OPT_STATS)] = "option-stats", [const_ilog2(SOF_TIMESTAMPING_OPT_PKTINFO)] = "option-pktinfo", [const_ilog2(SOF_TIMESTAMPING_OPT_TX_SWHW)] = "option-tx-swhw", [const_ilog2(SOF_TIMESTAMPING_BIND_PHC)] = "bind-phc", [const_ilog2(SOF_TIMESTAMPING_OPT_ID_TCP)] = "option-id-tcp", }; static_assert(ARRAY_SIZE(sof_timestamping_names) == __SOF_TIMESTAMPING_CNT); const char ts_tx_type_names[][ETH_GSTRING_LEN] = { [HWTSTAMP_TX_OFF] = "off", [HWTSTAMP_TX_ON] = "on", [HWTSTAMP_TX_ONESTEP_SYNC] = "onestep-sync", [HWTSTAMP_TX_ONESTEP_P2P] = "onestep-p2p", }; static_assert(ARRAY_SIZE(ts_tx_type_names) == __HWTSTAMP_TX_CNT); const char ts_rx_filter_names[][ETH_GSTRING_LEN] = { [HWTSTAMP_FILTER_NONE] = "none", [HWTSTAMP_FILTER_ALL] = "all", [HWTSTAMP_FILTER_SOME] = "some", [HWTSTAMP_FILTER_PTP_V1_L4_EVENT] = "ptpv1-l4-event", [HWTSTAMP_FILTER_PTP_V1_L4_SYNC] = "ptpv1-l4-sync", [HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ] = "ptpv1-l4-delay-req", [HWTSTAMP_FILTER_PTP_V2_L4_EVENT] = "ptpv2-l4-event", [HWTSTAMP_FILTER_PTP_V2_L4_SYNC] = "ptpv2-l4-sync", [HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ] = "ptpv2-l4-delay-req", [HWTSTAMP_FILTER_PTP_V2_L2_EVENT] = "ptpv2-l2-event", [HWTSTAMP_FILTER_PTP_V2_L2_SYNC] = "ptpv2-l2-sync", [HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ] = "ptpv2-l2-delay-req", [HWTSTAMP_FILTER_PTP_V2_EVENT] = "ptpv2-event", [HWTSTAMP_FILTER_PTP_V2_SYNC] = "ptpv2-sync", [HWTSTAMP_FILTER_PTP_V2_DELAY_REQ] = "ptpv2-delay-req", [HWTSTAMP_FILTER_NTP_ALL] = "ntp-all", }; static_assert(ARRAY_SIZE(ts_rx_filter_names) == __HWTSTAMP_FILTER_CNT); const char udp_tunnel_type_names[][ETH_GSTRING_LEN] = { [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN] = "vxlan", [ETHTOOL_UDP_TUNNEL_TYPE_GENEVE] = "geneve", [ETHTOOL_UDP_TUNNEL_TYPE_VXLAN_GPE] = "vxlan-gpe", }; static_assert(ARRAY_SIZE(udp_tunnel_type_names) == __ETHTOOL_UDP_TUNNEL_TYPE_CNT); /* return false if legacy contained non-0 deprecated fields * maxtxpkt/maxrxpkt. rest of ksettings always updated */ bool convert_legacy_settings_to_link_ksettings( struct ethtool_link_ksettings *link_ksettings, const struct ethtool_cmd *legacy_settings) { bool retval = true; memset(link_ksettings, 0, sizeof(*link_ksettings)); /* This is used to tell users that driver is still using these * deprecated legacy fields, and they should not use * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS */ if (legacy_settings->maxtxpkt || legacy_settings->maxrxpkt) retval = false; ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.supported, legacy_settings->supported); ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.advertising, legacy_settings->advertising); ethtool_convert_legacy_u32_to_link_mode( link_ksettings->link_modes.lp_advertising, legacy_settings->lp_advertising); link_ksettings->base.speed = ethtool_cmd_speed(legacy_settings); link_ksettings->base.duplex = legacy_settings->duplex; link_ksettings->base.port = legacy_settings->port; link_ksettings->base.phy_address = legacy_settings->phy_address; link_ksettings->base.autoneg = legacy_settings->autoneg; link_ksettings->base.mdio_support = legacy_settings->mdio_support; link_ksettings->base.eth_tp_mdix = legacy_settings->eth_tp_mdix; link_ksettings->base.eth_tp_mdix_ctrl = legacy_settings->eth_tp_mdix_ctrl; return retval; } int __ethtool_get_link(struct net_device *dev) { if (!dev->ethtool_ops->get_link) return -EOPNOTSUPP; return netif_running(dev) && dev->ethtool_ops->get_link(dev); } static int ethtool_get_rxnfc_rule_count(struct net_device *dev) { const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_rxnfc info = { .cmd = ETHTOOL_GRXCLSRLCNT, }; int err; err = ops->get_rxnfc(dev, &info, NULL); if (err) return err; return info.rule_cnt; } int ethtool_get_max_rxnfc_channel(struct net_device *dev, u64 *max) { const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_rxnfc *info; int err, i, rule_cnt; u64 max_ring = 0; if (!ops->get_rxnfc) return -EOPNOTSUPP; rule_cnt = ethtool_get_rxnfc_rule_count(dev); if (rule_cnt <= 0) return -EINVAL; info = kvzalloc(struct_size(info, rule_locs, rule_cnt), GFP_KERNEL); if (!info) return -ENOMEM; info->cmd = ETHTOOL_GRXCLSRLALL; info->rule_cnt = rule_cnt; err = ops->get_rxnfc(dev, info, info->rule_locs); if (err) goto err_free_info; for (i = 0; i < rule_cnt; i++) { struct ethtool_rxnfc rule_info = { .cmd = ETHTOOL_GRXCLSRULE, .fs.location = info->rule_locs[i], }; err = ops->get_rxnfc(dev, &rule_info, NULL); if (err) goto err_free_info; if (rule_info.fs.ring_cookie != RX_CLS_FLOW_DISC && rule_info.fs.ring_cookie != RX_CLS_FLOW_WAKE && !(rule_info.flow_type & FLOW_RSS) && !ethtool_get_flow_spec_ring_vf(rule_info.fs.ring_cookie)) max_ring = max_t(u64, max_ring, rule_info.fs.ring_cookie); } kvfree(info); *max = max_ring; return 0; err_free_info: kvfree(info); return err; } static u32 ethtool_get_max_rss_ctx_channel(struct net_device *dev) { struct ethtool_rxfh_context *ctx; unsigned long context; u32 max_ring = 0; mutex_lock(&dev->ethtool->rss_lock); xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { u32 i, *tbl; tbl = ethtool_rxfh_context_indir(ctx); for (i = 0; i < ctx->indir_size; i++) max_ring = max(max_ring, tbl[i]); } mutex_unlock(&dev->ethtool->rss_lock); return max_ring; } u32 ethtool_get_max_rxfh_channel(struct net_device *dev) { struct ethtool_rxfh_param rxfh = {}; u32 dev_size, current_max; int ret; /* While we do track whether RSS context has an indirection * table explicitly set by the user, no driver looks at that bit. * Assume drivers won't auto-regenerate the additional tables, * to be safe. */ current_max = ethtool_get_max_rss_ctx_channel(dev); if (!netif_is_rxfh_configured(dev)) return current_max; if (!dev->ethtool_ops->get_rxfh_indir_size || !dev->ethtool_ops->get_rxfh) return current_max; dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); if (dev_size == 0) return current_max; rxfh.indir = kcalloc(dev_size, sizeof(rxfh.indir[0]), GFP_USER); if (!rxfh.indir) return U32_MAX; ret = dev->ethtool_ops->get_rxfh(dev, &rxfh); if (ret) { current_max = U32_MAX; goto out_free; } while (dev_size--) current_max = max(current_max, rxfh.indir[dev_size]); out_free: kfree(rxfh.indir); return current_max; } int ethtool_check_ops(const struct ethtool_ops *ops) { if (WARN_ON(ops->set_coalesce && !ops->supported_coalesce_params)) return -EINVAL; /* NOTE: sufficiently insane drivers may swap ethtool_ops at runtime, * the fact that ops are checked at registration time does not * mean the ops attached to a netdev later on are sane. */ return 0; } int __ethtool_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info) { const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; memset(info, 0, sizeof(*info)); info->cmd = ETHTOOL_GET_TS_INFO; if (phy_is_default_hwtstamp(phydev) && phy_has_tsinfo(phydev)) return phy_ts_info(phydev, info); if (ops->get_ts_info) return ops->get_ts_info(dev, info); info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE; info->phc_index = -1; return 0; } int ethtool_get_phc_vclocks(struct net_device *dev, int **vclock_index) { struct kernel_ethtool_ts_info info = { }; int num = 0; if (!__ethtool_get_ts_info(dev, &info)) num = ptp_get_vclocks_index(info.phc_index, vclock_index); return num; } EXPORT_SYMBOL(ethtool_get_phc_vclocks); int ethtool_get_ts_info_by_layer(struct net_device *dev, struct kernel_ethtool_ts_info *info) { return __ethtool_get_ts_info(dev, info); } EXPORT_SYMBOL(ethtool_get_ts_info_by_layer); const struct ethtool_phy_ops *ethtool_phy_ops; void ethtool_set_ethtool_phy_ops(const struct ethtool_phy_ops *ops) { ASSERT_RTNL(); ethtool_phy_ops = ops; } EXPORT_SYMBOL_GPL(ethtool_set_ethtool_phy_ops); void ethtool_params_from_link_mode(struct ethtool_link_ksettings *link_ksettings, enum ethtool_link_mode_bit_indices link_mode) { const struct link_mode_info *link_info; if (WARN_ON_ONCE(link_mode >= __ETHTOOL_LINK_MODE_MASK_NBITS)) return; link_info = &link_mode_params[link_mode]; link_ksettings->base.speed = link_info->speed; link_ksettings->lanes = link_info->lanes; link_ksettings->base.duplex = link_info->duplex; } EXPORT_SYMBOL_GPL(ethtool_params_from_link_mode); /** * ethtool_forced_speed_maps_init * @maps: Pointer to an array of Ethtool forced speed map * @size: Array size * * Initialize an array of Ethtool forced speed map to Ethtool link modes. This * should be called during driver module init. */ void ethtool_forced_speed_maps_init(struct ethtool_forced_speed_map *maps, u32 size) { for (u32 i = 0; i < size; i++) { struct ethtool_forced_speed_map *map = &maps[i]; linkmode_set_bit_array(map->cap_arr, map->arr_size, map->caps); map->cap_arr = NULL; map->arr_size = 0; } } EXPORT_SYMBOL_GPL(ethtool_forced_speed_maps_init); void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id) { struct ethtool_rxfh_context *ctx; WARN_ONCE(!rtnl_is_locked() && !lockdep_is_held_type(&dev->ethtool->rss_lock, -1), "RSS context lock assertion failed\n"); netdev_err(dev, "device error, RSS context %d lost\n", context_id); ctx = xa_erase(&dev->ethtool->rss_ctx, context_id); kfree(ctx); } EXPORT_SYMBOL(ethtool_rxfh_context_lost);
12 49 1 14 134 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM mptcp #if !defined(_TRACE_MPTCP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MPTCP_H #include <linux/tracepoint.h> #define show_mapping_status(status) \ __print_symbolic(status, \ { 0, "MAPPING_OK" }, \ { 1, "MAPPING_INVALID" }, \ { 2, "MAPPING_EMPTY" }, \ { 3, "MAPPING_DATA_FIN" }, \ { 4, "MAPPING_DUMMY" }) TRACE_EVENT(mptcp_subflow_get_send, TP_PROTO(struct mptcp_subflow_context *subflow), TP_ARGS(subflow), TP_STRUCT__entry( __field(bool, active) __field(bool, free) __field(u32, snd_wnd) __field(u32, pace) __field(u8, backup) __field(u64, ratio) ), TP_fast_assign( struct sock *ssk; __entry->active = mptcp_subflow_active(subflow); __entry->backup = subflow->backup; if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock)) __entry->free = sk_stream_memory_free(subflow->tcp_sock); else __entry->free = 0; ssk = mptcp_subflow_tcp_sock(subflow); if (ssk && sk_fullsock(ssk)) { __entry->snd_wnd = tcp_sk(ssk)->snd_wnd; __entry->pace = READ_ONCE(ssk->sk_pacing_rate); } else { __entry->snd_wnd = 0; __entry->pace = 0; } if (ssk && sk_fullsock(ssk) && __entry->pace) __entry->ratio = div_u64((u64)ssk->sk_wmem_queued << 32, __entry->pace); else __entry->ratio = 0; ), TP_printk("active=%d free=%d snd_wnd=%u pace=%u backup=%u ratio=%llu", __entry->active, __entry->free, __entry->snd_wnd, __entry->pace, __entry->backup, __entry->ratio) ); DECLARE_EVENT_CLASS(mptcp_dump_mpext, TP_PROTO(struct mptcp_ext *mpext), TP_ARGS(mpext), TP_STRUCT__entry( __field(u64, data_ack) __field(u64, data_seq) __field(u32, subflow_seq) __field(u16, data_len) __field(u16, csum) __field(u8, use_map) __field(u8, dsn64) __field(u8, data_fin) __field(u8, use_ack) __field(u8, ack64) __field(u8, mpc_map) __field(u8, frozen) __field(u8, reset_transient) __field(u8, reset_reason) __field(u8, csum_reqd) __field(u8, infinite_map) ), TP_fast_assign( __entry->data_ack = mpext->ack64 ? mpext->data_ack : mpext->data_ack32; __entry->data_seq = mpext->data_seq; __entry->subflow_seq = mpext->subflow_seq; __entry->data_len = mpext->data_len; __entry->csum = (__force u16)mpext->csum; __entry->use_map = mpext->use_map; __entry->dsn64 = mpext->dsn64; __entry->data_fin = mpext->data_fin; __entry->use_ack = mpext->use_ack; __entry->ack64 = mpext->ack64; __entry->mpc_map = mpext->mpc_map; __entry->frozen = mpext->frozen; __entry->reset_transient = mpext->reset_transient; __entry->reset_reason = mpext->reset_reason; __entry->csum_reqd = mpext->csum_reqd; __entry->infinite_map = mpext->infinite_map; ), TP_printk("data_ack=%llu data_seq=%llu subflow_seq=%u data_len=%u csum=%x use_map=%u dsn64=%u data_fin=%u use_ack=%u ack64=%u mpc_map=%u frozen=%u reset_transient=%u reset_reason=%u csum_reqd=%u infinite_map=%u", __entry->data_ack, __entry->data_seq, __entry->subflow_seq, __entry->data_len, __entry->csum, __entry->use_map, __entry->dsn64, __entry->data_fin, __entry->use_ack, __entry->ack64, __entry->mpc_map, __entry->frozen, __entry->reset_transient, __entry->reset_reason, __entry->csum_reqd, __entry->infinite_map) ); DEFINE_EVENT(mptcp_dump_mpext, mptcp_sendmsg_frag, TP_PROTO(struct mptcp_ext *mpext), TP_ARGS(mpext)); DEFINE_EVENT(mptcp_dump_mpext, get_mapping_status, TP_PROTO(struct mptcp_ext *mpext), TP_ARGS(mpext)); TRACE_EVENT(ack_update_msk, TP_PROTO(u64 data_ack, u64 old_snd_una, u64 new_snd_una, u64 new_wnd_end, u64 msk_wnd_end), TP_ARGS(data_ack, old_snd_una, new_snd_una, new_wnd_end, msk_wnd_end), TP_STRUCT__entry( __field(u64, data_ack) __field(u64, old_snd_una) __field(u64, new_snd_una) __field(u64, new_wnd_end) __field(u64, msk_wnd_end) ), TP_fast_assign( __entry->data_ack = data_ack; __entry->old_snd_una = old_snd_una; __entry->new_snd_una = new_snd_una; __entry->new_wnd_end = new_wnd_end; __entry->msk_wnd_end = msk_wnd_end; ), TP_printk("data_ack=%llu old_snd_una=%llu new_snd_una=%llu new_wnd_end=%llu msk_wnd_end=%llu", __entry->data_ack, __entry->old_snd_una, __entry->new_snd_una, __entry->new_wnd_end, __entry->msk_wnd_end) ); TRACE_EVENT(subflow_check_data_avail, TP_PROTO(__u8 status, struct sk_buff *skb), TP_ARGS(status, skb), TP_STRUCT__entry( __field(u8, status) __field(const void *, skb) ), TP_fast_assign( __entry->status = status; __entry->skb = skb; ), TP_printk("mapping_status=%s, skb=%p", show_mapping_status(__entry->status), __entry->skb) ); #endif /* _TRACE_MPTCP_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_DMA_MAPPING_H #define _LINUX_DMA_MAPPING_H #include <linux/cache.h> #include <linux/sizes.h> #include <linux/string.h> #include <linux/device.h> #include <linux/err.h> #include <linux/dma-direction.h> #include <linux/scatterlist.h> #include <linux/bug.h> #include <linux/mem_encrypt.h> /** * List of possible attributes associated with a DMA mapping. The semantics * of each attribute should be defined in Documentation/core-api/dma-attributes.rst. */ /* * DMA_ATTR_WEAK_ORDERING: Specifies that reads and writes to the mapping * may be weakly ordered, that is that reads and writes may pass each other. */ #define DMA_ATTR_WEAK_ORDERING (1UL << 1) /* * DMA_ATTR_WRITE_COMBINE: Specifies that writes to the mapping may be * buffered to improve performance. */ #define DMA_ATTR_WRITE_COMBINE (1UL << 2) /* * DMA_ATTR_NO_KERNEL_MAPPING: Lets the platform to avoid creating a kernel * virtual mapping for the allocated buffer. */ #define DMA_ATTR_NO_KERNEL_MAPPING (1UL << 4) /* * DMA_ATTR_SKIP_CPU_SYNC: Allows platform code to skip synchronization of * the CPU cache for the given buffer assuming that it has been already * transferred to 'device' domain. */ #define DMA_ATTR_SKIP_CPU_SYNC (1UL << 5) /* * DMA_ATTR_FORCE_CONTIGUOUS: Forces contiguous allocation of the buffer * in physical memory. */ #define DMA_ATTR_FORCE_CONTIGUOUS (1UL << 6) /* * DMA_ATTR_ALLOC_SINGLE_PAGES: This is a hint to the DMA-mapping subsystem * that it's probably not worth the time to try to allocate memory to in a way * that gives better TLB efficiency. */ #define DMA_ATTR_ALLOC_SINGLE_PAGES (1UL << 7) /* * DMA_ATTR_NO_WARN: This tells the DMA-mapping subsystem to suppress * allocation failure reports (similarly to __GFP_NOWARN). */ #define DMA_ATTR_NO_WARN (1UL << 8) /* * DMA_ATTR_PRIVILEGED: used to indicate that the buffer is fully * accessible at an elevated privilege level (and ideally inaccessible or * at least read-only at lesser-privileged levels). */ #define DMA_ATTR_PRIVILEGED (1UL << 9) /* * A dma_addr_t can hold any valid DMA or bus address for the platform. It can * be given to a device to use as a DMA source or target. It is specific to a * given device and there may be a translation between the CPU physical address * space and the bus address space. * * DMA_MAPPING_ERROR is the magic error code if a mapping failed. It should not * be used directly in drivers, but checked for using dma_mapping_error() * instead. */ #define DMA_MAPPING_ERROR (~(dma_addr_t)0) #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) #ifdef CONFIG_DMA_API_DEBUG void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); void debug_dma_map_single(struct device *dev, const void *addr, unsigned long len); #else static inline void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { } static inline void debug_dma_map_single(struct device *dev, const void *addr, unsigned long len) { } #endif /* CONFIG_DMA_API_DEBUG */ #ifdef CONFIG_HAS_DMA static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { debug_dma_mapping_error(dev, dma_addr); if (unlikely(dma_addr == DMA_MAPPING_ERROR)) return -ENOMEM; return 0; } dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs); void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs); void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs); int dma_map_sgtable(struct device *dev, struct sg_table *sgt, enum dma_data_direction dir, unsigned long attrs); dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs); void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs); void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs); void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs); void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs); void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle); int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); bool dma_can_mmap(struct device *dev); bool dma_pci_p2pdma_supported(struct device *dev); int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); u64 dma_get_required_mask(struct device *dev); bool dma_addressing_limited(struct device *dev); size_t dma_max_mapping_size(struct device *dev); size_t dma_opt_mapping_size(struct device *dev); unsigned long dma_get_merge_boundary(struct device *dev); struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs); void dma_free_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt, enum dma_data_direction dir); void *dma_vmap_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt); void dma_vunmap_noncontiguous(struct device *dev, void *vaddr); int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma, size_t size, struct sg_table *sgt); #else /* CONFIG_HAS_DMA */ static inline dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page, size_t offset, size_t size, enum dma_data_direction dir, unsigned long attrs) { return DMA_MAPPING_ERROR; } static inline void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { } static inline unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { return 0; } static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { } static inline int dma_map_sgtable(struct device *dev, struct sg_table *sgt, enum dma_data_direction dir, unsigned long attrs) { return -EOPNOTSUPP; } static inline dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { return DMA_MAPPING_ERROR; } static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { } static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) { return -ENOMEM; } static inline void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { return NULL; } static void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) { } static inline void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) { return NULL; } static inline void dmam_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle) { } static inline int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { return -ENXIO; } static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { return -ENXIO; } static inline bool dma_can_mmap(struct device *dev) { return false; } static inline bool dma_pci_p2pdma_supported(struct device *dev) { return false; } static inline int dma_set_mask(struct device *dev, u64 mask) { return -EIO; } static inline int dma_set_coherent_mask(struct device *dev, u64 mask) { return -EIO; } static inline u64 dma_get_required_mask(struct device *dev) { return 0; } static inline bool dma_addressing_limited(struct device *dev) { return false; } static inline size_t dma_max_mapping_size(struct device *dev) { return 0; } static inline size_t dma_opt_mapping_size(struct device *dev) { return 0; } static inline unsigned long dma_get_merge_boundary(struct device *dev) { return 0; } static inline struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size, enum dma_data_direction dir, gfp_t gfp, unsigned long attrs) { return NULL; } static inline void dma_free_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt, enum dma_data_direction dir) { } static inline void *dma_vmap_noncontiguous(struct device *dev, size_t size, struct sg_table *sgt) { return NULL; } static inline void dma_vunmap_noncontiguous(struct device *dev, void *vaddr) { } static inline int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma, size_t size, struct sg_table *sgt) { return -EINVAL; } #endif /* CONFIG_HAS_DMA */ #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir); void __dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir); void __dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir); void __dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir); bool __dma_need_sync(struct device *dev, dma_addr_t dma_addr); static inline bool dma_dev_need_sync(const struct device *dev) { /* Always call DMA sync operations when debugging is enabled */ return !dev->dma_skip_sync || IS_ENABLED(CONFIG_DMA_API_DEBUG); } static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { if (dma_dev_need_sync(dev)) __dma_sync_single_for_cpu(dev, addr, size, dir); } static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { if (dma_dev_need_sync(dev)) __dma_sync_single_for_device(dev, addr, size, dir); } static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { if (dma_dev_need_sync(dev)) __dma_sync_sg_for_cpu(dev, sg, nelems, dir); } static inline void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { if (dma_dev_need_sync(dev)) __dma_sync_sg_for_device(dev, sg, nelems, dir); } static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; } #else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ static inline bool dma_dev_need_sync(const struct device *dev) { return false; } static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { } static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { } static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { } static inline void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { } static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return false; } #endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ struct page *dma_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp); void dma_free_pages(struct device *dev, size_t size, struct page *page, dma_addr_t dma_handle, enum dma_data_direction dir); int dma_mmap_pages(struct device *dev, struct vm_area_struct *vma, size_t size, struct page *page); static inline void *dma_alloc_noncoherent(struct device *dev, size_t size, dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp) { struct page *page = dma_alloc_pages(dev, size, dma_handle, dir, gfp); return page ? page_address(page) : NULL; } static inline void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir) { dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir); } static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, size_t size, enum dma_data_direction dir, unsigned long attrs) { /* DMA must never operate on areas that might be remapped. */ if (dev_WARN_ONCE(dev, is_vmalloc_addr(ptr), "rejecting DMA map of vmalloc memory\n")) return DMA_MAPPING_ERROR; debug_dma_map_single(dev, ptr, size); return dma_map_page_attrs(dev, virt_to_page(ptr), offset_in_page(ptr), size, dir, attrs); } static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { return dma_unmap_page_attrs(dev, addr, size, dir, attrs); } static inline void dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) { return dma_sync_single_for_cpu(dev, addr + offset, size, dir); } static inline void dma_sync_single_range_for_device(struct device *dev, dma_addr_t addr, unsigned long offset, size_t size, enum dma_data_direction dir) { return dma_sync_single_for_device(dev, addr + offset, size, dir); } /** * dma_unmap_sgtable - Unmap the given buffer for DMA * @dev: The device for which to perform the DMA operation * @sgt: The sg_table object describing the buffer * @dir: DMA direction * @attrs: Optional DMA attributes for the unmap operation * * Unmaps a buffer described by a scatterlist stored in the given sg_table * object for the @dir DMA operation by the @dev device. After this function * the ownership of the buffer is transferred back to the CPU domain. */ static inline void dma_unmap_sgtable(struct device *dev, struct sg_table *sgt, enum dma_data_direction dir, unsigned long attrs) { dma_unmap_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs); } /** * dma_sync_sgtable_for_cpu - Synchronize the given buffer for CPU access * @dev: The device for which to perform the DMA operation * @sgt: The sg_table object describing the buffer * @dir: DMA direction * * Performs the needed cache synchronization and moves the ownership of the * buffer back to the CPU domain, so it is safe to perform any access to it * by the CPU. Before doing any further DMA operations, one has to transfer * the ownership of the buffer back to the DMA domain by calling the * dma_sync_sgtable_for_device(). */ static inline void dma_sync_sgtable_for_cpu(struct device *dev, struct sg_table *sgt, enum dma_data_direction dir) { dma_sync_sg_for_cpu(dev, sgt->sgl, sgt->orig_nents, dir); } /** * dma_sync_sgtable_for_device - Synchronize the given buffer for DMA * @dev: The device for which to perform the DMA operation * @sgt: The sg_table object describing the buffer * @dir: DMA direction * * Performs the needed cache synchronization and moves the ownership of the * buffer back to the DMA domain, so it is safe to perform the DMA operation. * Once finished, one has to call dma_sync_sgtable_for_cpu() or * dma_unmap_sgtable(). */ static inline void dma_sync_sgtable_for_device(struct device *dev, struct sg_table *sgt, enum dma_data_direction dir) { dma_sync_sg_for_device(dev, sgt->sgl, sgt->orig_nents, dir); } #define dma_map_single(d, a, s, r) dma_map_single_attrs(d, a, s, r, 0) #define dma_unmap_single(d, a, s, r) dma_unmap_single_attrs(d, a, s, r, 0) #define dma_map_sg(d, s, n, r) dma_map_sg_attrs(d, s, n, r, 0) #define dma_unmap_sg(d, s, n, r) dma_unmap_sg_attrs(d, s, n, r, 0) #define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0) #define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0) #define dma_get_sgtable(d, t, v, h, s) dma_get_sgtable_attrs(d, t, v, h, s, 0) #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, 0) bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size); static inline void *dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { return dma_alloc_attrs(dev, size, dma_handle, gfp, (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0); } static inline void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle) { return dma_free_attrs(dev, size, cpu_addr, dma_handle, 0); } static inline u64 dma_get_mask(struct device *dev) { if (dev->dma_mask && *dev->dma_mask) return *dev->dma_mask; return DMA_BIT_MASK(32); } /* * Set both the DMA mask and the coherent DMA mask to the same thing. * Note that we don't check the return value from dma_set_coherent_mask() * as the DMA API guarantees that the coherent DMA mask can be set to * the same or smaller than the streaming DMA mask. */ static inline int dma_set_mask_and_coherent(struct device *dev, u64 mask) { int rc = dma_set_mask(dev, mask); if (rc == 0) dma_set_coherent_mask(dev, mask); return rc; } /* * Similar to the above, except it deals with the case where the device * does not have dev->dma_mask appropriately setup. */ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask) { dev->dma_mask = &dev->coherent_dma_mask; return dma_set_mask_and_coherent(dev, mask); } static inline unsigned int dma_get_max_seg_size(struct device *dev) { if (dev->dma_parms && dev->dma_parms->max_segment_size) return dev->dma_parms->max_segment_size; return SZ_64K; } static inline int dma_set_max_seg_size(struct device *dev, unsigned int size) { if (dev->dma_parms) { dev->dma_parms->max_segment_size = size; return 0; } return -EIO; } static inline unsigned long dma_get_seg_boundary(struct device *dev) { if (dev->dma_parms && dev->dma_parms->segment_boundary_mask) return dev->dma_parms->segment_boundary_mask; return ULONG_MAX; } /** * dma_get_seg_boundary_nr_pages - return the segment boundary in "page" units * @dev: device to guery the boundary for * @page_shift: ilog() of the IOMMU page size * * Return the segment boundary in IOMMU page units (which may be different from * the CPU page size) for the passed in device. * * If @dev is NULL a boundary of U32_MAX is assumed, this case is just for * non-DMA API callers. */ static inline unsigned long dma_get_seg_boundary_nr_pages(struct device *dev, unsigned int page_shift) { if (!dev) return (U32_MAX >> page_shift) + 1; return (dma_get_seg_boundary(dev) >> page_shift) + 1; } static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) { if (dev->dma_parms) { dev->dma_parms->segment_boundary_mask = mask; return 0; } return -EIO; } static inline unsigned int dma_get_min_align_mask(struct device *dev) { if (dev->dma_parms) return dev->dma_parms->min_align_mask; return 0; } static inline int dma_set_min_align_mask(struct device *dev, unsigned int min_align_mask) { if (WARN_ON_ONCE(!dev->dma_parms)) return -EIO; dev->dma_parms->min_align_mask = min_align_mask; return 0; } #ifndef dma_get_cache_alignment static inline int dma_get_cache_alignment(void) { #ifdef ARCH_HAS_DMA_MINALIGN return ARCH_DMA_MINALIGN; #endif return 1; } #endif static inline void *dmam_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { return dmam_alloc_attrs(dev, size, dma_handle, gfp, (gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0); } static inline void *dma_alloc_wc(struct device *dev, size_t size, dma_addr_t *dma_addr, gfp_t gfp) { unsigned long attrs = DMA_ATTR_WRITE_COMBINE; if (gfp & __GFP_NOWARN) attrs |= DMA_ATTR_NO_WARN; return dma_alloc_attrs(dev, size, dma_addr, gfp, attrs); } static inline void dma_free_wc(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr) { return dma_free_attrs(dev, size, cpu_addr, dma_addr, DMA_ATTR_WRITE_COMBINE); } static inline int dma_mmap_wc(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size) { return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, DMA_ATTR_WRITE_COMBINE); } #ifdef CONFIG_NEED_DMA_MAP_STATE #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME #define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME) #define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) (((PTR)->ADDR_NAME) = (VAL)) #define dma_unmap_len(PTR, LEN_NAME) ((PTR)->LEN_NAME) #define dma_unmap_len_set(PTR, LEN_NAME, VAL) (((PTR)->LEN_NAME) = (VAL)) #else #define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) #define DEFINE_DMA_UNMAP_LEN(LEN_NAME) #define dma_unmap_addr(PTR, ADDR_NAME) (0) #define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) do { } while (0) #define dma_unmap_len(PTR, LEN_NAME) (0) #define dma_unmap_len_set(PTR, LEN_NAME, VAL) do { } while (0) #endif #endif /* _LINUX_DMA_MAPPING_H */
9 9 3 265 1 10 85 2 169 1 4 82 184 15 132 126 60 4 1 12 1 43 41 14 1 1 53 2 2 1 2 1 8 20 1 5 14 4 16 15 10 1 3 1 13 6 1 3 2 2 8 1 2 1 5 5 2 1 1 9 1 1 6 18 1 4 1 1 14 9 5 3 1 3 2 31 3 2 3 19 8 1 2 18 5 2 1 4 27 32 3 28 5 1 10 11 19 21 11 9 10 24 21 4 7 21 22 21 8 3 11 11 2 16 18 9 14 12 1 3 10 10 1 9 2 2 5 8 1 4 2 6 7 6 2 11 7 2 1 6 4 1 1 4 4 1 1 2 8 5 2 1 4 1 14 1 13 13 12 7 1 1 10 1 9 13 13 1 9 10 40 15 1 4 19 2 7 27 4 4 4 15 4 15 1 8 1 1 6 1 461 6 2 11 20 15 9 8 3 8 31 32 24 12 7 1 11 6 4 8 14 7 6 3 6 7 145 40 1 9 5 17 9 28 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 // SPDX-License-Identifier: GPL-2.0-or-later /* Userspace key control operations * * Copyright (C) 2004-5 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/init.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/slab.h> #include <linux/syscalls.h> #include <linux/key.h> #include <linux/keyctl.h> #include <linux/fs.h> #include <linux/capability.h> #include <linux/cred.h> #include <linux/string.h> #include <linux/err.h> #include <linux/vmalloc.h> #include <linux/security.h> #include <linux/uio.h> #include <linux/uaccess.h> #include <keys/request_key_auth-type.h> #include "internal.h" #define KEY_MAX_DESC_SIZE 4096 static const unsigned char keyrings_capabilities[2] = { [0] = (KEYCTL_CAPS0_CAPABILITIES | (IS_ENABLED(CONFIG_PERSISTENT_KEYRINGS) ? KEYCTL_CAPS0_PERSISTENT_KEYRINGS : 0) | (IS_ENABLED(CONFIG_KEY_DH_OPERATIONS) ? KEYCTL_CAPS0_DIFFIE_HELLMAN : 0) | (IS_ENABLED(CONFIG_ASYMMETRIC_KEY_TYPE) ? KEYCTL_CAPS0_PUBLIC_KEY : 0) | (IS_ENABLED(CONFIG_BIG_KEYS) ? KEYCTL_CAPS0_BIG_KEY : 0) | KEYCTL_CAPS0_INVALIDATE | KEYCTL_CAPS0_RESTRICT_KEYRING | KEYCTL_CAPS0_MOVE ), [1] = (KEYCTL_CAPS1_NS_KEYRING_NAME | KEYCTL_CAPS1_NS_KEY_TAG | (IS_ENABLED(CONFIG_KEY_NOTIFICATIONS) ? KEYCTL_CAPS1_NOTIFICATIONS : 0) ), }; static int key_get_type_from_user(char *type, const char __user *_type, unsigned len) { int ret; ret = strncpy_from_user(type, _type, len); if (ret < 0) return ret; if (ret == 0 || ret >= len) return -EINVAL; if (type[0] == '.') return -EPERM; type[len - 1] = '\0'; return 0; } /* * Extract the description of a new key from userspace and either add it as a * new key to the specified keyring or update a matching key in that keyring. * * If the description is NULL or an empty string, the key type is asked to * generate one from the payload. * * The keyring must be writable so that we can attach the key to it. * * If successful, the new key's serial number is returned, otherwise an error * code is returned. */ SYSCALL_DEFINE5(add_key, const char __user *, _type, const char __user *, _description, const void __user *, _payload, size_t, plen, key_serial_t, ringid) { key_ref_t keyring_ref, key_ref; char type[32], *description; void *payload; long ret; ret = -EINVAL; if (plen > 1024 * 1024 - 1) goto error; /* draw all the data into kernel space */ ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; description = NULL; if (_description) { description = strndup_user(_description, KEY_MAX_DESC_SIZE); if (IS_ERR(description)) { ret = PTR_ERR(description); goto error; } if (!*description) { kfree(description); description = NULL; } else if ((description[0] == '.') && (strncmp(type, "keyring", 7) == 0)) { ret = -EPERM; goto error2; } } /* pull the payload in if one was supplied */ payload = NULL; if (plen) { ret = -ENOMEM; payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error2; ret = -EFAULT; if (copy_from_user(payload, _payload, plen) != 0) goto error3; } /* find the target keyring (which must be writable) */ keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error3; } /* create or update the requested key and add it to the target * keyring */ key_ref = key_create_or_update(keyring_ref, type, description, payload, plen, KEY_PERM_UNDEF, KEY_ALLOC_IN_QUOTA); if (!IS_ERR(key_ref)) { ret = key_ref_to_ptr(key_ref)->serial; key_ref_put(key_ref); } else { ret = PTR_ERR(key_ref); } key_ref_put(keyring_ref); error3: kvfree_sensitive(payload, plen); error2: kfree(description); error: return ret; } /* * Search the process keyrings and keyring trees linked from those for a * matching key. Keyrings must have appropriate Search permission to be * searched. * * If a key is found, it will be attached to the destination keyring if there's * one specified and the serial number of the key will be returned. * * If no key is found, /sbin/request-key will be invoked if _callout_info is * non-NULL in an attempt to create a key. The _callout_info string will be * passed to /sbin/request-key to aid with completing the request. If the * _callout_info string is "" then it will be changed to "-". */ SYSCALL_DEFINE4(request_key, const char __user *, _type, const char __user *, _description, const char __user *, _callout_info, key_serial_t, destringid) { struct key_type *ktype; struct key *key; key_ref_t dest_ref; size_t callout_len; char type[32], *description, *callout_info; long ret; /* pull the type into kernel space */ ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; /* pull the description into kernel space */ description = strndup_user(_description, KEY_MAX_DESC_SIZE); if (IS_ERR(description)) { ret = PTR_ERR(description); goto error; } /* pull the callout info into kernel space */ callout_info = NULL; callout_len = 0; if (_callout_info) { callout_info = strndup_user(_callout_info, PAGE_SIZE); if (IS_ERR(callout_info)) { ret = PTR_ERR(callout_info); goto error2; } callout_len = strlen(callout_info); } /* get the destination keyring if specified */ dest_ref = NULL; if (destringid) { dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(dest_ref)) { ret = PTR_ERR(dest_ref); goto error3; } } /* find the key type */ ktype = key_type_lookup(type); if (IS_ERR(ktype)) { ret = PTR_ERR(ktype); goto error4; } /* do the search */ key = request_key_and_link(ktype, description, NULL, callout_info, callout_len, NULL, key_ref_to_ptr(dest_ref), KEY_ALLOC_IN_QUOTA); if (IS_ERR(key)) { ret = PTR_ERR(key); goto error5; } /* wait for the key to finish being constructed */ ret = wait_for_key_construction(key, 1); if (ret < 0) goto error6; ret = key->serial; error6: key_put(key); error5: key_type_put(ktype); error4: key_ref_put(dest_ref); error3: kfree(callout_info); error2: kfree(description); error: return ret; } /* * Get the ID of the specified process keyring. * * The requested keyring must have search permission to be found. * * If successful, the ID of the requested keyring will be returned. */ long keyctl_get_keyring_ID(key_serial_t id, int create) { key_ref_t key_ref; unsigned long lflags; long ret; lflags = create ? KEY_LOOKUP_CREATE : 0; key_ref = lookup_user_key(id, lflags, KEY_NEED_SEARCH); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } ret = key_ref_to_ptr(key_ref)->serial; key_ref_put(key_ref); error: return ret; } /* * Join a (named) session keyring. * * Create and join an anonymous session keyring or join a named session * keyring, creating it if necessary. A named session keyring must have Search * permission for it to be joined. Session keyrings without this permit will * be skipped over. It is not permitted for userspace to create or join * keyrings whose name begin with a dot. * * If successful, the ID of the joined session keyring will be returned. */ long keyctl_join_session_keyring(const char __user *_name) { char *name; long ret; /* fetch the name from userspace */ name = NULL; if (_name) { name = strndup_user(_name, KEY_MAX_DESC_SIZE); if (IS_ERR(name)) { ret = PTR_ERR(name); goto error; } ret = -EPERM; if (name[0] == '.') goto error_name; } /* join the session */ ret = join_session_keyring(name); error_name: kfree(name); error: return ret; } /* * Update a key's data payload from the given data. * * The key must grant the caller Write permission and the key type must support * updating for this to work. A negative key can be positively instantiated * with this call. * * If successful, 0 will be returned. If the key type does not support * updating, then -EOPNOTSUPP will be returned. */ long keyctl_update_key(key_serial_t id, const void __user *_payload, size_t plen) { key_ref_t key_ref; void *payload; long ret; ret = -EINVAL; if (plen > PAGE_SIZE) goto error; /* pull the payload in if one was supplied */ payload = NULL; if (plen) { ret = -ENOMEM; payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error; ret = -EFAULT; if (copy_from_user(payload, _payload, plen) != 0) goto error2; } /* find the target key (which must be writable) */ key_ref = lookup_user_key(id, 0, KEY_NEED_WRITE); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; } /* update the key */ ret = key_update(key_ref, payload, plen); key_ref_put(key_ref); error2: kvfree_sensitive(payload, plen); error: return ret; } /* * Revoke a key. * * The key must be grant the caller Write or Setattr permission for this to * work. The key type should give up its quota claim when revoked. The key * and any links to the key will be automatically garbage collected after a * certain amount of time (/proc/sys/kernel/keys/gc_delay). * * Keys with KEY_FLAG_KEEP set should not be revoked. * * If successful, 0 is returned. */ long keyctl_revoke_key(key_serial_t id) { key_ref_t key_ref; struct key *key; long ret; key_ref = lookup_user_key(id, 0, KEY_NEED_WRITE); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); if (ret != -EACCES) goto error; key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } } key = key_ref_to_ptr(key_ref); ret = 0; if (test_bit(KEY_FLAG_KEEP, &key->flags)) ret = -EPERM; else key_revoke(key); key_ref_put(key_ref); error: return ret; } /* * Invalidate a key. * * The key must be grant the caller Invalidate permission for this to work. * The key and any links to the key will be automatically garbage collected * immediately. * * Keys with KEY_FLAG_KEEP set should not be invalidated. * * If successful, 0 is returned. */ long keyctl_invalidate_key(key_serial_t id) { key_ref_t key_ref; struct key *key; long ret; kenter("%d", id); key_ref = lookup_user_key(id, 0, KEY_NEED_SEARCH); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); /* Root is permitted to invalidate certain special keys */ if (capable(CAP_SYS_ADMIN)) { key_ref = lookup_user_key(id, 0, KEY_SYSADMIN_OVERRIDE); if (IS_ERR(key_ref)) goto error; if (test_bit(KEY_FLAG_ROOT_CAN_INVAL, &key_ref_to_ptr(key_ref)->flags)) goto invalidate; goto error_put; } goto error; } invalidate: key = key_ref_to_ptr(key_ref); ret = 0; if (test_bit(KEY_FLAG_KEEP, &key->flags)) ret = -EPERM; else key_invalidate(key); error_put: key_ref_put(key_ref); error: kleave(" = %ld", ret); return ret; } /* * Clear the specified keyring, creating an empty process keyring if one of the * special keyring IDs is used. * * The keyring must grant the caller Write permission and not have * KEY_FLAG_KEEP set for this to work. If successful, 0 will be returned. */ long keyctl_keyring_clear(key_serial_t ringid) { key_ref_t keyring_ref; struct key *keyring; long ret; keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); /* Root is permitted to invalidate certain special keyrings */ if (capable(CAP_SYS_ADMIN)) { keyring_ref = lookup_user_key(ringid, 0, KEY_SYSADMIN_OVERRIDE); if (IS_ERR(keyring_ref)) goto error; if (test_bit(KEY_FLAG_ROOT_CAN_CLEAR, &key_ref_to_ptr(keyring_ref)->flags)) goto clear; goto error_put; } goto error; } clear: keyring = key_ref_to_ptr(keyring_ref); if (test_bit(KEY_FLAG_KEEP, &keyring->flags)) ret = -EPERM; else ret = keyring_clear(keyring); error_put: key_ref_put(keyring_ref); error: return ret; } /* * Create a link from a keyring to a key if there's no matching key in the * keyring, otherwise replace the link to the matching key with a link to the * new key. * * The key must grant the caller Link permission and the keyring must grant * the caller Write permission. Furthermore, if an additional link is created, * the keyring's quota will be extended. * * If successful, 0 will be returned. */ long keyctl_keyring_link(key_serial_t id, key_serial_t ringid) { key_ref_t keyring_ref, key_ref; long ret; keyring_ref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; } key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_LINK); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; } ret = key_link(key_ref_to_ptr(keyring_ref), key_ref_to_ptr(key_ref)); key_ref_put(key_ref); error2: key_ref_put(keyring_ref); error: return ret; } /* * Unlink a key from a keyring. * * The keyring must grant the caller Write permission for this to work; the key * itself need not grant the caller anything. If the last link to a key is * removed then that key will be scheduled for destruction. * * Keys or keyrings with KEY_FLAG_KEEP set should not be unlinked. * * If successful, 0 will be returned. */ long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid) { key_ref_t keyring_ref, key_ref; struct key *keyring, *key; long ret; keyring_ref = lookup_user_key(ringid, 0, KEY_NEED_WRITE); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error; } key_ref = lookup_user_key(id, KEY_LOOKUP_PARTIAL, KEY_NEED_UNLINK); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; } keyring = key_ref_to_ptr(keyring_ref); key = key_ref_to_ptr(key_ref); if (test_bit(KEY_FLAG_KEEP, &keyring->flags) && test_bit(KEY_FLAG_KEEP, &key->flags)) ret = -EPERM; else ret = key_unlink(keyring, key); key_ref_put(key_ref); error2: key_ref_put(keyring_ref); error: return ret; } /* * Move a link to a key from one keyring to another, displacing any matching * key from the destination keyring. * * The key must grant the caller Link permission and both keyrings must grant * the caller Write permission. There must also be a link in the from keyring * to the key. If both keyrings are the same, nothing is done. * * If successful, 0 will be returned. */ long keyctl_keyring_move(key_serial_t id, key_serial_t from_ringid, key_serial_t to_ringid, unsigned int flags) { key_ref_t key_ref, from_ref, to_ref; long ret; if (flags & ~KEYCTL_MOVE_EXCL) return -EINVAL; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_LINK); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); from_ref = lookup_user_key(from_ringid, 0, KEY_NEED_WRITE); if (IS_ERR(from_ref)) { ret = PTR_ERR(from_ref); goto error2; } to_ref = lookup_user_key(to_ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(to_ref)) { ret = PTR_ERR(to_ref); goto error3; } ret = key_move(key_ref_to_ptr(key_ref), key_ref_to_ptr(from_ref), key_ref_to_ptr(to_ref), flags); key_ref_put(to_ref); error3: key_ref_put(from_ref); error2: key_ref_put(key_ref); return ret; } /* * Return a description of a key to userspace. * * The key must grant the caller View permission for this to work. * * If there's a buffer, we place up to buflen bytes of data into it formatted * in the following way: * * type;uid;gid;perm;description<NUL> * * If successful, we return the amount of description available, irrespective * of how much we may have copied into the buffer. */ long keyctl_describe_key(key_serial_t keyid, char __user *buffer, size_t buflen) { struct key *key, *instkey; key_ref_t key_ref; char *infobuf; long ret; int desclen, infolen; key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_NEED_VIEW); if (IS_ERR(key_ref)) { /* viewing a key under construction is permitted if we have the * authorisation token handy */ if (PTR_ERR(key_ref) == -EACCES) { instkey = key_get_instantiation_authkey(keyid); if (!IS_ERR(instkey)) { key_put(instkey); key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_AUTHTOKEN_OVERRIDE); if (!IS_ERR(key_ref)) goto okay; } } ret = PTR_ERR(key_ref); goto error; } okay: key = key_ref_to_ptr(key_ref); desclen = strlen(key->description); /* calculate how much information we're going to return */ ret = -ENOMEM; infobuf = kasprintf(GFP_KERNEL, "%s;%d;%d;%08x;", key->type->name, from_kuid_munged(current_user_ns(), key->uid), from_kgid_munged(current_user_ns(), key->gid), key->perm); if (!infobuf) goto error2; infolen = strlen(infobuf); ret = infolen + desclen + 1; /* consider returning the data */ if (buffer && buflen >= ret) { if (copy_to_user(buffer, infobuf, infolen) != 0 || copy_to_user(buffer + infolen, key->description, desclen + 1) != 0) ret = -EFAULT; } kfree(infobuf); error2: key_ref_put(key_ref); error: return ret; } /* * Search the specified keyring and any keyrings it links to for a matching * key. Only keyrings that grant the caller Search permission will be searched * (this includes the starting keyring). Only keys with Search permission can * be found. * * If successful, the found key will be linked to the destination keyring if * supplied and the key has Link permission, and the found key ID will be * returned. */ long keyctl_keyring_search(key_serial_t ringid, const char __user *_type, const char __user *_description, key_serial_t destringid) { struct key_type *ktype; key_ref_t keyring_ref, key_ref, dest_ref; char type[32], *description; long ret; /* pull the type and description into kernel space */ ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; description = strndup_user(_description, KEY_MAX_DESC_SIZE); if (IS_ERR(description)) { ret = PTR_ERR(description); goto error; } /* get the keyring at which to begin the search */ keyring_ref = lookup_user_key(ringid, 0, KEY_NEED_SEARCH); if (IS_ERR(keyring_ref)) { ret = PTR_ERR(keyring_ref); goto error2; } /* get the destination keyring if specified */ dest_ref = NULL; if (destringid) { dest_ref = lookup_user_key(destringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(dest_ref)) { ret = PTR_ERR(dest_ref); goto error3; } } /* find the key type */ ktype = key_type_lookup(type); if (IS_ERR(ktype)) { ret = PTR_ERR(ktype); goto error4; } /* do the search */ key_ref = keyring_search(keyring_ref, ktype, description, true); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); /* treat lack or presence of a negative key the same */ if (ret == -EAGAIN) ret = -ENOKEY; goto error5; } /* link the resulting key to the destination keyring if we can */ if (dest_ref) { ret = key_permission(key_ref, KEY_NEED_LINK); if (ret < 0) goto error6; ret = key_link(key_ref_to_ptr(dest_ref), key_ref_to_ptr(key_ref)); if (ret < 0) goto error6; } ret = key_ref_to_ptr(key_ref)->serial; error6: key_ref_put(key_ref); error5: key_type_put(ktype); error4: key_ref_put(dest_ref); error3: key_ref_put(keyring_ref); error2: kfree(description); error: return ret; } /* * Call the read method */ static long __keyctl_read_key(struct key *key, char *buffer, size_t buflen) { long ret; down_read(&key->sem); ret = key_validate(key); if (ret == 0) ret = key->type->read(key, buffer, buflen); up_read(&key->sem); return ret; } /* * Read a key's payload. * * The key must either grant the caller Read permission, or it must grant the * caller Search permission when searched for from the process keyrings. * * If successful, we place up to buflen bytes of data into the buffer, if one * is provided, and return the amount of data that is available in the key, * irrespective of how much we copied into the buffer. */ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) { struct key *key; key_ref_t key_ref; long ret; char *key_data = NULL; size_t key_data_len; /* find the key first */ key_ref = lookup_user_key(keyid, 0, KEY_DEFER_PERM_CHECK); if (IS_ERR(key_ref)) { ret = -ENOKEY; goto out; } key = key_ref_to_ptr(key_ref); ret = key_read_state(key); if (ret < 0) goto key_put_out; /* Negatively instantiated */ /* see if we can read it directly */ ret = key_permission(key_ref, KEY_NEED_READ); if (ret == 0) goto can_read_key; if (ret != -EACCES) goto key_put_out; /* we can't; see if it's searchable from this process's keyrings * - we automatically take account of the fact that it may be * dangling off an instantiation key */ if (!is_key_possessed(key_ref)) { ret = -EACCES; goto key_put_out; } /* the key is probably readable - now try to read it */ can_read_key: if (!key->type->read) { ret = -EOPNOTSUPP; goto key_put_out; } if (!buffer || !buflen) { /* Get the key length from the read method */ ret = __keyctl_read_key(key, NULL, 0); goto key_put_out; } /* * Read the data with the semaphore held (since we might sleep) * to protect against the key being updated or revoked. * * Allocating a temporary buffer to hold the keys before * transferring them to user buffer to avoid potential * deadlock involving page fault and mmap_lock. * * key_data_len = (buflen <= PAGE_SIZE) * ? buflen : actual length of key data * * This prevents allocating arbitrary large buffer which can * be much larger than the actual key length. In the latter case, * at least 2 passes of this loop is required. */ key_data_len = (buflen <= PAGE_SIZE) ? buflen : 0; for (;;) { if (key_data_len) { key_data = kvmalloc(key_data_len, GFP_KERNEL); if (!key_data) { ret = -ENOMEM; goto key_put_out; } } ret = __keyctl_read_key(key, key_data, key_data_len); /* * Read methods will just return the required length without * any copying if the provided length isn't large enough. */ if (ret <= 0 || ret > buflen) break; /* * The key may change (unlikely) in between 2 consecutive * __keyctl_read_key() calls. In this case, we reallocate * a larger buffer and redo the key read when * key_data_len < ret <= buflen. */ if (ret > key_data_len) { if (unlikely(key_data)) kvfree_sensitive(key_data, key_data_len); key_data_len = ret; continue; /* Allocate buffer */ } if (copy_to_user(buffer, key_data, ret)) ret = -EFAULT; break; } kvfree_sensitive(key_data, key_data_len); key_put_out: key_put(key); out: return ret; } /* * Change the ownership of a key * * The key must grant the caller Setattr permission for this to work, though * the key need not be fully instantiated yet. For the UID to be changed, or * for the GID to be changed to a group the caller is not a member of, the * caller must have sysadmin capability. If either uid or gid is -1 then that * attribute is not changed. * * If the UID is to be changed, the new user must have sufficient quota to * accept the key. The quota deduction will be removed from the old user to * the new user should the attribute be changed. * * If successful, 0 will be returned. */ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) { struct key_user *newowner, *zapowner = NULL; struct key *key; key_ref_t key_ref; long ret; kuid_t uid; kgid_t gid; unsigned long flags; uid = make_kuid(current_user_ns(), user); gid = make_kgid(current_user_ns(), group); ret = -EINVAL; if ((user != (uid_t) -1) && !uid_valid(uid)) goto error; if ((group != (gid_t) -1) && !gid_valid(gid)) goto error; ret = 0; if (user == (uid_t) -1 && group == (gid_t) -1) goto error; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } key = key_ref_to_ptr(key_ref); /* make the changes with the locks held to prevent chown/chown races */ ret = -EACCES; down_write(&key->sem); { bool is_privileged_op = false; /* only the sysadmin can chown a key to some other UID */ if (user != (uid_t) -1 && !uid_eq(key->uid, uid)) is_privileged_op = true; /* only the sysadmin can set the key's GID to a group other * than one of those that the current process subscribes to */ if (group != (gid_t) -1 && !gid_eq(gid, key->gid) && !in_group_p(gid)) is_privileged_op = true; if (is_privileged_op && !capable(CAP_SYS_ADMIN)) goto error_put; } /* change the UID */ if (user != (uid_t) -1 && !uid_eq(uid, key->uid)) { ret = -ENOMEM; newowner = key_user_lookup(uid); if (!newowner) goto error_put; /* transfer the quota burden to the new user */ if (test_bit(KEY_FLAG_IN_QUOTA, &key->flags)) { unsigned maxkeys = uid_eq(uid, GLOBAL_ROOT_UID) ? key_quota_root_maxkeys : key_quota_maxkeys; unsigned maxbytes = uid_eq(uid, GLOBAL_ROOT_UID) ? key_quota_root_maxbytes : key_quota_maxbytes; spin_lock_irqsave(&newowner->lock, flags); if (newowner->qnkeys + 1 > maxkeys || newowner->qnbytes + key->quotalen > maxbytes || newowner->qnbytes + key->quotalen < newowner->qnbytes) goto quota_overrun; newowner->qnkeys++; newowner->qnbytes += key->quotalen; spin_unlock_irqrestore(&newowner->lock, flags); spin_lock_irqsave(&key->user->lock, flags); key->user->qnkeys--; key->user->qnbytes -= key->quotalen; spin_unlock_irqrestore(&key->user->lock, flags); } atomic_dec(&key->user->nkeys); atomic_inc(&newowner->nkeys); if (key->state != KEY_IS_UNINSTANTIATED) { atomic_dec(&key->user->nikeys); atomic_inc(&newowner->nikeys); } zapowner = key->user; key->user = newowner; key->uid = uid; } /* change the GID */ if (group != (gid_t) -1) key->gid = gid; notify_key(key, NOTIFY_KEY_SETATTR, 0); ret = 0; error_put: up_write(&key->sem); key_put(key); if (zapowner) key_user_put(zapowner); error: return ret; quota_overrun: spin_unlock_irqrestore(&newowner->lock, flags); zapowner = newowner; ret = -EDQUOT; goto error_put; } /* * Change the permission mask on a key. * * The key must grant the caller Setattr permission for this to work, though * the key need not be fully instantiated yet. If the caller does not have * sysadmin capability, it may only change the permission on keys that it owns. */ long keyctl_setperm_key(key_serial_t id, key_perm_t perm) { struct key *key; key_ref_t key_ref; long ret; ret = -EINVAL; if (perm & ~(KEY_POS_ALL | KEY_USR_ALL | KEY_GRP_ALL | KEY_OTH_ALL)) goto error; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error; } key = key_ref_to_ptr(key_ref); /* make the changes with the locks held to prevent chown/chmod races */ ret = -EACCES; down_write(&key->sem); /* if we're not the sysadmin, we can only change a key that we own */ if (uid_eq(key->uid, current_fsuid()) || capable(CAP_SYS_ADMIN)) { key->perm = perm; notify_key(key, NOTIFY_KEY_SETATTR, 0); ret = 0; } up_write(&key->sem); key_put(key); error: return ret; } /* * Get the destination keyring for instantiation and check that the caller has * Write permission on it. */ static long get_instantiation_keyring(key_serial_t ringid, struct request_key_auth *rka, struct key **_dest_keyring) { key_ref_t dkref; *_dest_keyring = NULL; /* just return a NULL pointer if we weren't asked to make a link */ if (ringid == 0) return 0; /* if a specific keyring is nominated by ID, then use that */ if (ringid > 0) { dkref = lookup_user_key(ringid, KEY_LOOKUP_CREATE, KEY_NEED_WRITE); if (IS_ERR(dkref)) return PTR_ERR(dkref); *_dest_keyring = key_ref_to_ptr(dkref); return 0; } if (ringid == KEY_SPEC_REQKEY_AUTH_KEY) return -EINVAL; /* otherwise specify the destination keyring recorded in the * authorisation key (any KEY_SPEC_*_KEYRING) */ if (ringid >= KEY_SPEC_REQUESTOR_KEYRING) { *_dest_keyring = key_get(rka->dest_keyring); return 0; } return -ENOKEY; } /* * Change the request_key authorisation key on the current process. */ static int keyctl_change_reqkey_auth(struct key *key) { struct cred *new; new = prepare_creds(); if (!new) return -ENOMEM; key_put(new->request_key_auth); new->request_key_auth = key_get(key); return commit_creds(new); } /* * Instantiate a key with the specified payload and link the key into the * destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * If successful, 0 will be returned. */ static long keyctl_instantiate_key_common(key_serial_t id, struct iov_iter *from, key_serial_t ringid) { const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; size_t plen = from ? iov_iter_count(from) : 0; void *payload; long ret; kenter("%d,,%zu,%d", id, plen, ringid); if (!plen) from = NULL; ret = -EINVAL; if (plen > 1024 * 1024 - 1) goto error; /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; instkey = cred->request_key_auth; if (!instkey) goto error; rka = instkey->payload.data[0]; if (rka->target_key->serial != id) goto error; /* pull the payload in if one was supplied */ payload = NULL; if (from) { ret = -ENOMEM; payload = kvmalloc(plen, GFP_KERNEL); if (!payload) goto error; ret = -EFAULT; if (!copy_from_iter_full(payload, plen, from)) goto error2; } /* find the destination keyring amongst those belonging to the * requesting task */ ret = get_instantiation_keyring(ringid, rka, &dest_keyring); if (ret < 0) goto error2; /* instantiate the key and link it into a keyring */ ret = key_instantiate_and_link(rka->target_key, payload, plen, dest_keyring, instkey); key_put(dest_keyring); /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) keyctl_change_reqkey_auth(NULL); error2: kvfree_sensitive(payload, plen); error: return ret; } /* * Instantiate a key with the specified payload and link the key into the * destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * If successful, 0 will be returned. */ long keyctl_instantiate_key(key_serial_t id, const void __user *_payload, size_t plen, key_serial_t ringid) { if (_payload && plen) { struct iov_iter from; int ret; ret = import_ubuf(ITER_SOURCE, (void __user *)_payload, plen, &from); if (unlikely(ret)) return ret; return keyctl_instantiate_key_common(id, &from, ringid); } return keyctl_instantiate_key_common(id, NULL, ringid); } /* * Instantiate a key with the specified multipart payload and link the key into * the destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * If successful, 0 will be returned. */ long keyctl_instantiate_key_iov(key_serial_t id, const struct iovec __user *_payload_iov, unsigned ioc, key_serial_t ringid) { struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; struct iov_iter from; long ret; if (!_payload_iov) ioc = 0; ret = import_iovec(ITER_SOURCE, _payload_iov, ioc, ARRAY_SIZE(iovstack), &iov, &from); if (ret < 0) return ret; ret = keyctl_instantiate_key_common(id, &from, ringid); kfree(iov); return ret; } /* * Negatively instantiate the key with the given timeout (in seconds) and link * the key into the destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * The key and any links to the key will be automatically garbage collected * after the timeout expires. * * Negative keys are used to rate limit repeated request_key() calls by causing * them to return -ENOKEY until the negative key expires. * * If successful, 0 will be returned. */ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) { return keyctl_reject_key(id, timeout, ENOKEY, ringid); } /* * Negatively instantiate the key with the given timeout (in seconds) and error * code and link the key into the destination keyring if one is given. * * The caller must have the appropriate instantiation permit set for this to * work (see keyctl_assume_authority). No other permissions are required. * * The key and any links to the key will be automatically garbage collected * after the timeout expires. * * Negative keys are used to rate limit repeated request_key() calls by causing * them to return the specified error code until the negative key expires. * * If successful, 0 will be returned. */ long keyctl_reject_key(key_serial_t id, unsigned timeout, unsigned error, key_serial_t ringid) { const struct cred *cred = current_cred(); struct request_key_auth *rka; struct key *instkey, *dest_keyring; long ret; kenter("%d,%u,%u,%d", id, timeout, error, ringid); /* must be a valid error code and mustn't be a kernel special */ if (error <= 0 || error >= MAX_ERRNO || error == ERESTARTSYS || error == ERESTARTNOINTR || error == ERESTARTNOHAND || error == ERESTART_RESTARTBLOCK) return -EINVAL; /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; instkey = cred->request_key_auth; if (!instkey) goto error; rka = instkey->payload.data[0]; if (rka->target_key->serial != id) goto error; /* find the destination keyring if present (which must also be * writable) */ ret = get_instantiation_keyring(ringid, rka, &dest_keyring); if (ret < 0) goto error; /* instantiate the key and link it into a keyring */ ret = key_reject_and_link(rka->target_key, timeout, error, dest_keyring, instkey); key_put(dest_keyring); /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) keyctl_change_reqkey_auth(NULL); error: return ret; } /* * Read or set the default keyring in which request_key() will cache keys and * return the old setting. * * If a thread or process keyring is specified then it will be created if it * doesn't yet exist. The old setting will be returned if successful. */ long keyctl_set_reqkey_keyring(int reqkey_defl) { struct cred *new; int ret, old_setting; old_setting = current_cred_xxx(jit_keyring); if (reqkey_defl == KEY_REQKEY_DEFL_NO_CHANGE) return old_setting; new = prepare_creds(); if (!new) return -ENOMEM; switch (reqkey_defl) { case KEY_REQKEY_DEFL_THREAD_KEYRING: ret = install_thread_keyring_to_cred(new); if (ret < 0) goto error; goto set; case KEY_REQKEY_DEFL_PROCESS_KEYRING: ret = install_process_keyring_to_cred(new); if (ret < 0) goto error; goto set; case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_SESSION_KEYRING: case KEY_REQKEY_DEFL_USER_KEYRING: case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: goto set; case KEY_REQKEY_DEFL_NO_CHANGE: case KEY_REQKEY_DEFL_GROUP_KEYRING: default: ret = -EINVAL; goto error; } set: new->jit_keyring = reqkey_defl; commit_creds(new); return old_setting; error: abort_creds(new); return ret; } /* * Set or clear the timeout on a key. * * Either the key must grant the caller Setattr permission or else the caller * must hold an instantiation authorisation token for the key. * * The timeout is either 0 to clear the timeout, or a number of seconds from * the current time. The key and any links to the key will be automatically * garbage collected after the timeout expires. * * Keys with KEY_FLAG_KEEP set should not be timed out. * * If successful, 0 is returned. */ long keyctl_set_timeout(key_serial_t id, unsigned timeout) { struct key *key, *instkey; key_ref_t key_ref; long ret; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) { /* setting the timeout on a key under construction is permitted * if we have the authorisation token handy */ if (PTR_ERR(key_ref) == -EACCES) { instkey = key_get_instantiation_authkey(id); if (!IS_ERR(instkey)) { key_put(instkey); key_ref = lookup_user_key(id, KEY_LOOKUP_PARTIAL, KEY_AUTHTOKEN_OVERRIDE); if (!IS_ERR(key_ref)) goto okay; } } ret = PTR_ERR(key_ref); goto error; } okay: key = key_ref_to_ptr(key_ref); ret = 0; if (test_bit(KEY_FLAG_KEEP, &key->flags)) { ret = -EPERM; } else { key_set_timeout(key, timeout); notify_key(key, NOTIFY_KEY_SETATTR, 0); } key_put(key); error: return ret; } /* * Assume (or clear) the authority to instantiate the specified key. * * This sets the authoritative token currently in force for key instantiation. * This must be done for a key to be instantiated. It has the effect of making * available all the keys from the caller of the request_key() that created a * key to request_key() calls made by the caller of this function. * * The caller must have the instantiation key in their process keyrings with a * Search permission grant available to the caller. * * If the ID given is 0, then the setting will be cleared and 0 returned. * * If the ID given has a matching an authorisation key, then that key will be * set and its ID will be returned. The authorisation key can be read to get * the callout information passed to request_key(). */ long keyctl_assume_authority(key_serial_t id) { struct key *authkey; long ret; /* special key IDs aren't permitted */ ret = -EINVAL; if (id < 0) goto error; /* we divest ourselves of authority if given an ID of 0 */ if (id == 0) { ret = keyctl_change_reqkey_auth(NULL); goto error; } /* attempt to assume the authority temporarily granted to us whilst we * instantiate the specified key * - the authorisation key must be in the current task's keyrings * somewhere */ authkey = key_get_instantiation_authkey(id); if (IS_ERR(authkey)) { ret = PTR_ERR(authkey); goto error; } ret = keyctl_change_reqkey_auth(authkey); if (ret == 0) ret = authkey->serial; key_put(authkey); error: return ret; } /* * Get a key's the LSM security label. * * The key must grant the caller View permission for this to work. * * If there's a buffer, then up to buflen bytes of data will be placed into it. * * If successful, the amount of information available will be returned, * irrespective of how much was copied (including the terminal NUL). */ long keyctl_get_security(key_serial_t keyid, char __user *buffer, size_t buflen) { struct key *key, *instkey; key_ref_t key_ref; char *context; long ret; key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_NEED_VIEW); if (IS_ERR(key_ref)) { if (PTR_ERR(key_ref) != -EACCES) return PTR_ERR(key_ref); /* viewing a key under construction is also permitted if we * have the authorisation token handy */ instkey = key_get_instantiation_authkey(keyid); if (IS_ERR(instkey)) return PTR_ERR(instkey); key_put(instkey); key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, KEY_AUTHTOKEN_OVERRIDE); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); } key = key_ref_to_ptr(key_ref); ret = security_key_getsecurity(key, &context); if (ret == 0) { /* if no information was returned, give userspace an empty * string */ ret = 1; if (buffer && buflen > 0 && copy_to_user(buffer, "", 1) != 0) ret = -EFAULT; } else if (ret > 0) { /* return as much data as there's room for */ if (buffer && buflen > 0) { if (buflen > ret) buflen = ret; if (copy_to_user(buffer, context, buflen) != 0) ret = -EFAULT; } kfree(context); } key_ref_put(key_ref); return ret; } /* * Attempt to install the calling process's session keyring on the process's * parent process. * * The keyring must exist and must grant the caller LINK permission, and the * parent process must be single-threaded and must have the same effective * ownership as this process and mustn't be SUID/SGID. * * The keyring will be emplaced on the parent when it next resumes userspace. * * If successful, 0 will be returned. */ long keyctl_session_to_parent(void) { struct task_struct *me, *parent; const struct cred *mycred, *pcred; struct callback_head *newwork, *oldwork; key_ref_t keyring_r; struct cred *cred; int ret; keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_NEED_LINK); if (IS_ERR(keyring_r)) return PTR_ERR(keyring_r); ret = -ENOMEM; /* our parent is going to need a new cred struct, a new tgcred struct * and new security data, so we allocate them here to prevent ENOMEM in * our parent */ cred = cred_alloc_blank(); if (!cred) goto error_keyring; newwork = &cred->rcu; cred->session_keyring = key_ref_to_ptr(keyring_r); keyring_r = NULL; init_task_work(newwork, key_change_session_keyring); me = current; rcu_read_lock(); write_lock_irq(&tasklist_lock); ret = -EPERM; oldwork = NULL; parent = rcu_dereference_protected(me->real_parent, lockdep_is_held(&tasklist_lock)); /* the parent mustn't be init and mustn't be a kernel thread */ if (parent->pid <= 1 || !parent->mm) goto unlock; /* the parent must be single threaded */ if (!thread_group_empty(parent)) goto unlock; /* the parent and the child must have different session keyrings or * there's no point */ mycred = current_cred(); pcred = __task_cred(parent); if (mycred == pcred || mycred->session_keyring == pcred->session_keyring) { ret = 0; goto unlock; } /* the parent must have the same effective ownership and mustn't be * SUID/SGID */ if (!uid_eq(pcred->uid, mycred->euid) || !uid_eq(pcred->euid, mycred->euid) || !uid_eq(pcred->suid, mycred->euid) || !gid_eq(pcred->gid, mycred->egid) || !gid_eq(pcred->egid, mycred->egid) || !gid_eq(pcred->sgid, mycred->egid)) goto unlock; /* the keyrings must have the same UID */ if ((pcred->session_keyring && !uid_eq(pcred->session_keyring->uid, mycred->euid)) || !uid_eq(mycred->session_keyring->uid, mycred->euid)) goto unlock; /* cancel an already pending keyring replacement */ oldwork = task_work_cancel_func(parent, key_change_session_keyring); /* the replacement session keyring is applied just prior to userspace * restarting */ ret = task_work_add(parent, newwork, TWA_RESUME); if (!ret) newwork = NULL; unlock: write_unlock_irq(&tasklist_lock); rcu_read_unlock(); if (oldwork) put_cred(container_of(oldwork, struct cred, rcu)); if (newwork) put_cred(cred); return ret; error_keyring: key_ref_put(keyring_r); return ret; } /* * Apply a restriction to a given keyring. * * The caller must have Setattr permission to change keyring restrictions. * * The requested type name may be a NULL pointer to reject all attempts * to link to the keyring. In this case, _restriction must also be NULL. * Otherwise, both _type and _restriction must be non-NULL. * * Returns 0 if successful. */ long keyctl_restrict_keyring(key_serial_t id, const char __user *_type, const char __user *_restriction) { key_ref_t key_ref; char type[32]; char *restriction = NULL; long ret; key_ref = lookup_user_key(id, 0, KEY_NEED_SETATTR); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); ret = -EINVAL; if (_type) { if (!_restriction) goto error; ret = key_get_type_from_user(type, _type, sizeof(type)); if (ret < 0) goto error; restriction = strndup_user(_restriction, PAGE_SIZE); if (IS_ERR(restriction)) { ret = PTR_ERR(restriction); goto error; } } else { if (_restriction) goto error; } ret = keyring_restrict(key_ref, _type ? type : NULL, restriction); kfree(restriction); error: key_ref_put(key_ref); return ret; } #ifdef CONFIG_KEY_NOTIFICATIONS /* * Watch for changes to a key. * * The caller must have View permission to watch a key or keyring. */ long keyctl_watch_key(key_serial_t id, int watch_queue_fd, int watch_id) { struct watch_queue *wqueue; struct watch_list *wlist = NULL; struct watch *watch = NULL; struct key *key; key_ref_t key_ref; long ret; if (watch_id < -1 || watch_id > 0xff) return -EINVAL; key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_VIEW); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); key = key_ref_to_ptr(key_ref); wqueue = get_watch_queue(watch_queue_fd); if (IS_ERR(wqueue)) { ret = PTR_ERR(wqueue); goto err_key; } if (watch_id >= 0) { ret = -ENOMEM; if (!key->watchers) { wlist = kzalloc(sizeof(*wlist), GFP_KERNEL); if (!wlist) goto err_wqueue; init_watch_list(wlist, NULL); } watch = kzalloc(sizeof(*watch), GFP_KERNEL); if (!watch) goto err_wlist; init_watch(watch, wqueue); watch->id = key->serial; watch->info_id = (u32)watch_id << WATCH_INFO_ID__SHIFT; ret = security_watch_key(key); if (ret < 0) goto err_watch; down_write(&key->sem); if (!key->watchers) { key->watchers = wlist; wlist = NULL; } ret = add_watch_to_object(watch, key->watchers); up_write(&key->sem); if (ret == 0) watch = NULL; } else { ret = -EBADSLT; if (key->watchers) { down_write(&key->sem); ret = remove_watch_from_object(key->watchers, wqueue, key_serial(key), false); up_write(&key->sem); } } err_watch: kfree(watch); err_wlist: kfree(wlist); err_wqueue: put_watch_queue(wqueue); err_key: key_put(key); return ret; } #endif /* CONFIG_KEY_NOTIFICATIONS */ /* * Get keyrings subsystem capabilities. */ long keyctl_capabilities(unsigned char __user *_buffer, size_t buflen) { size_t size = buflen; if (size > 0) { if (size > sizeof(keyrings_capabilities)) size = sizeof(keyrings_capabilities); if (copy_to_user(_buffer, keyrings_capabilities, size) != 0) return -EFAULT; if (size < buflen && clear_user(_buffer + size, buflen - size) != 0) return -EFAULT; } return sizeof(keyrings_capabilities); } /* * The key control system call */ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { switch (option) { case KEYCTL_GET_KEYRING_ID: return keyctl_get_keyring_ID((key_serial_t) arg2, (int) arg3); case KEYCTL_JOIN_SESSION_KEYRING: return keyctl_join_session_keyring((const char __user *) arg2); case KEYCTL_UPDATE: return keyctl_update_key((key_serial_t) arg2, (const void __user *) arg3, (size_t) arg4); case KEYCTL_REVOKE: return keyctl_revoke_key((key_serial_t) arg2); case KEYCTL_DESCRIBE: return keyctl_describe_key((key_serial_t) arg2, (char __user *) arg3, (unsigned) arg4); case KEYCTL_CLEAR: return keyctl_keyring_clear((key_serial_t) arg2); case KEYCTL_LINK: return keyctl_keyring_link((key_serial_t) arg2, (key_serial_t) arg3); case KEYCTL_UNLINK: return keyctl_keyring_unlink((key_serial_t) arg2, (key_serial_t) arg3); case KEYCTL_SEARCH: return keyctl_keyring_search((key_serial_t) arg2, (const char __user *) arg3, (const char __user *) arg4, (key_serial_t) arg5); case KEYCTL_READ: return keyctl_read_key((key_serial_t) arg2, (char __user *) arg3, (size_t) arg4); case KEYCTL_CHOWN: return keyctl_chown_key((key_serial_t) arg2, (uid_t) arg3, (gid_t) arg4); case KEYCTL_SETPERM: return keyctl_setperm_key((key_serial_t) arg2, (key_perm_t) arg3); case KEYCTL_INSTANTIATE: return keyctl_instantiate_key((key_serial_t) arg2, (const void __user *) arg3, (size_t) arg4, (key_serial_t) arg5); case KEYCTL_NEGATE: return keyctl_negate_key((key_serial_t) arg2, (unsigned) arg3, (key_serial_t) arg4); case KEYCTL_SET_REQKEY_KEYRING: return keyctl_set_reqkey_keyring(arg2); case KEYCTL_SET_TIMEOUT: return keyctl_set_timeout((key_serial_t) arg2, (unsigned) arg3); case KEYCTL_ASSUME_AUTHORITY: return keyctl_assume_authority((key_serial_t) arg2); case KEYCTL_GET_SECURITY: return keyctl_get_security((key_serial_t) arg2, (char __user *) arg3, (size_t) arg4); case KEYCTL_SESSION_TO_PARENT: return keyctl_session_to_parent(); case KEYCTL_REJECT: return keyctl_reject_key((key_serial_t) arg2, (unsigned) arg3, (unsigned) arg4, (key_serial_t) arg5); case KEYCTL_INSTANTIATE_IOV: return keyctl_instantiate_key_iov( (key_serial_t) arg2, (const struct iovec __user *) arg3, (unsigned) arg4, (key_serial_t) arg5); case KEYCTL_INVALIDATE: return keyctl_invalidate_key((key_serial_t) arg2); case KEYCTL_GET_PERSISTENT: return keyctl_get_persistent((uid_t)arg2, (key_serial_t)arg3); case KEYCTL_DH_COMPUTE: return keyctl_dh_compute((struct keyctl_dh_params __user *) arg2, (char __user *) arg3, (size_t) arg4, (struct keyctl_kdf_params __user *) arg5); case KEYCTL_RESTRICT_KEYRING: return keyctl_restrict_keyring((key_serial_t) arg2, (const char __user *) arg3, (const char __user *) arg4); case KEYCTL_PKEY_QUERY: if (arg3 != 0) return -EINVAL; return keyctl_pkey_query((key_serial_t)arg2, (const char __user *)arg4, (struct keyctl_pkey_query __user *)arg5); case KEYCTL_PKEY_ENCRYPT: case KEYCTL_PKEY_DECRYPT: case KEYCTL_PKEY_SIGN: return keyctl_pkey_e_d_s( option, (const struct keyctl_pkey_params __user *)arg2, (const char __user *)arg3, (const void __user *)arg4, (void __user *)arg5); case KEYCTL_PKEY_VERIFY: return keyctl_pkey_verify( (const struct keyctl_pkey_params __user *)arg2, (const char __user *)arg3, (const void __user *)arg4, (const void __user *)arg5); case KEYCTL_MOVE: return keyctl_keyring_move((key_serial_t)arg2, (key_serial_t)arg3, (key_serial_t)arg4, (unsigned int)arg5); case KEYCTL_CAPABILITIES: return keyctl_capabilities((unsigned char __user *)arg2, (size_t)arg3); case KEYCTL_WATCH_KEY: return keyctl_watch_key((key_serial_t)arg2, (int)arg3, (int)arg4); default: return -EOPNOTSUPP; } }
1 1 76 76 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 // SPDX-License-Identifier: GPL-2.0-only /* * This is the 1999 rewrite of IP Firewalling, aiming for kernel 2.3.x. * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org> */ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/slab.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("ip6tables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT)) static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV6, .priority = NF_IP6_PRI_FILTER, }; static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ static bool forward = true; module_param(forward, bool, 0000); static int ip6table_filter_table_init(struct net *net) { struct ip6t_replace *repl; int err; repl = ip6t_alloc_initial_table(&packet_filter); if (repl == NULL) return -ENOMEM; /* Entry 1 is the FORWARD hook */ ((struct ip6t_standard *)repl->entries)[1].target.verdict = forward ? -NF_ACCEPT - 1 : NF_DROP - 1; err = ip6t_register_table(net, &packet_filter, repl, filter_ops); kfree(repl); return err; } static int __net_init ip6table_filter_net_init(struct net *net) { if (!forward) return ip6table_filter_table_init(net); return 0; } static void __net_exit ip6table_filter_net_pre_exit(struct net *net) { ip6t_unregister_table_pre_exit(net, "filter"); } static void __net_exit ip6table_filter_net_exit(struct net *net) { ip6t_unregister_table_exit(net, "filter"); } static struct pernet_operations ip6table_filter_net_ops = { .init = ip6table_filter_net_init, .pre_exit = ip6table_filter_net_pre_exit, .exit = ip6table_filter_net_exit, }; static int __init ip6table_filter_init(void) { int ret = xt_register_template(&packet_filter, ip6table_filter_table_init); if (ret < 0) return ret; filter_ops = xt_hook_ops_alloc(&packet_filter, ip6t_do_table); if (IS_ERR(filter_ops)) { xt_unregister_template(&packet_filter); return PTR_ERR(filter_ops); } ret = register_pernet_subsys(&ip6table_filter_net_ops); if (ret < 0) { xt_unregister_template(&packet_filter); kfree(filter_ops); return ret; } return ret; } static void __exit ip6table_filter_fini(void) { unregister_pernet_subsys(&ip6table_filter_net_ops); xt_unregister_template(&packet_filter); kfree(filter_ops); } module_init(ip6table_filter_init); module_exit(ip6table_filter_fini);
1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/pgtable.h> #include <linux/string.h> #include <linux/bitops.h> #include <linux/smp.h> #include <linux/sched.h> #include <linux/sched/clock.h> #include <linux/semaphore.h> #include <linux/thread_info.h> #include <linux/init.h> #include <linux/uaccess.h> #include <linux/workqueue.h> #include <linux/delay.h> #include <linux/cpuhotplug.h> #include <asm/cpufeature.h> #include <asm/msr.h> #include <asm/bugs.h> #include <asm/cpu.h> #include <asm/intel-family.h> #include <asm/microcode.h> #include <asm/hwcap2.h> #include <asm/elf.h> #include <asm/cpu_device_id.h> #include <asm/cmdline.h> #include <asm/traps.h> #include <asm/resctrl.h> #include <asm/numa.h> #include <asm/thermal.h> #ifdef CONFIG_X86_64 #include <linux/topology.h> #endif #include "cpu.h" #ifdef CONFIG_X86_LOCAL_APIC #include <asm/mpspec.h> #include <asm/apic.h> #endif enum split_lock_detect_state { sld_off = 0, sld_warn, sld_fatal, sld_ratelimit, }; /* * Default to sld_off because most systems do not support split lock detection. * sld_state_setup() will switch this to sld_warn on systems that support * split lock/bus lock detect, unless there is a command line override. */ static enum split_lock_detect_state sld_state __ro_after_init = sld_off; static u64 msr_test_ctrl_cache __ro_after_init; /* * With a name like MSR_TEST_CTL it should go without saying, but don't touch * MSR_TEST_CTL unless the CPU is one of the whitelisted models. Writing it * on CPUs that do not support SLD can cause fireworks, even when writing '0'. */ static bool cpu_model_supports_sld __ro_after_init; /* * Processors which have self-snooping capability can handle conflicting * memory type across CPUs by snooping its own cache. However, there exists * CPU models in which having conflicting memory types still leads to * unpredictable behavior, machine check errors, or hangs. Clear this * feature to prevent its use on machines with known erratas. */ static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c) { switch (c->x86_vfm) { case INTEL_CORE_YONAH: case INTEL_CORE2_MEROM: case INTEL_CORE2_MEROM_L: case INTEL_CORE2_PENRYN: case INTEL_CORE2_DUNNINGTON: case INTEL_NEHALEM: case INTEL_NEHALEM_G: case INTEL_NEHALEM_EP: case INTEL_NEHALEM_EX: case INTEL_WESTMERE: case INTEL_WESTMERE_EP: case INTEL_SANDYBRIDGE: setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP); } } static bool ring3mwait_disabled __read_mostly; static int __init ring3mwait_disable(char *__unused) { ring3mwait_disabled = true; return 1; } __setup("ring3mwait=disable", ring3mwait_disable); static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) { /* * Ring 3 MONITOR/MWAIT feature cannot be detected without * cpu model and family comparison. */ if (c->x86 != 6) return; switch (c->x86_vfm) { case INTEL_XEON_PHI_KNL: case INTEL_XEON_PHI_KNM: break; default: return; } if (ring3mwait_disabled) return; set_cpu_cap(c, X86_FEATURE_RING3MWAIT); this_cpu_or(msr_misc_features_shadow, 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT); if (c == &boot_cpu_data) ELF_HWCAP2 |= HWCAP2_RING3MWAIT; } /* * Early microcode releases for the Spectre v2 mitigation were broken. * Information taken from; * - https://newsroom.intel.com/wp-content/uploads/sites/11/2018/03/microcode-update-guidance.pdf * - https://kb.vmware.com/s/article/52345 * - Microcode revisions observed in the wild * - Release note from 20180108 microcode release */ struct sku_microcode { u32 vfm; u8 stepping; u32 microcode; }; static const struct sku_microcode spectre_bad_microcodes[] = { { INTEL_KABYLAKE, 0x0B, 0x80 }, { INTEL_KABYLAKE, 0x0A, 0x80 }, { INTEL_KABYLAKE, 0x09, 0x80 }, { INTEL_KABYLAKE_L, 0x0A, 0x80 }, { INTEL_KABYLAKE_L, 0x09, 0x80 }, { INTEL_SKYLAKE_X, 0x03, 0x0100013e }, { INTEL_SKYLAKE_X, 0x04, 0x0200003c }, { INTEL_BROADWELL, 0x04, 0x28 }, { INTEL_BROADWELL_G, 0x01, 0x1b }, { INTEL_BROADWELL_D, 0x02, 0x14 }, { INTEL_BROADWELL_D, 0x03, 0x07000011 }, { INTEL_BROADWELL_X, 0x01, 0x0b000025 }, { INTEL_HASWELL_L, 0x01, 0x21 }, { INTEL_HASWELL_G, 0x01, 0x18 }, { INTEL_HASWELL, 0x03, 0x23 }, { INTEL_HASWELL_X, 0x02, 0x3b }, { INTEL_HASWELL_X, 0x04, 0x10 }, { INTEL_IVYBRIDGE_X, 0x04, 0x42a }, /* Observed in the wild */ { INTEL_SANDYBRIDGE_X, 0x06, 0x61b }, { INTEL_SANDYBRIDGE_X, 0x07, 0x712 }, }; static bool bad_spectre_microcode(struct cpuinfo_x86 *c) { int i; /* * We know that the hypervisor lie to us on the microcode version so * we may as well hope that it is running the correct version. */ if (cpu_has(c, X86_FEATURE_HYPERVISOR)) return false; for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) { if (c->x86_vfm == spectre_bad_microcodes[i].vfm && c->x86_stepping == spectre_bad_microcodes[i].stepping) return (c->microcode <= spectre_bad_microcodes[i].microcode); } return false; } #define MSR_IA32_TME_ACTIVATE 0x982 /* Helpers to access TME_ACTIVATE MSR */ #define TME_ACTIVATE_LOCKED(x) (x & 0x1) #define TME_ACTIVATE_ENABLED(x) (x & 0x2) #define TME_ACTIVATE_KEYID_BITS(x) ((x >> 32) & 0xf) /* Bits 35:32 */ static void detect_tme_early(struct cpuinfo_x86 *c) { u64 tme_activate; int keyid_bits; rdmsrl(MSR_IA32_TME_ACTIVATE, tme_activate); if (!TME_ACTIVATE_LOCKED(tme_activate) || !TME_ACTIVATE_ENABLED(tme_activate)) { pr_info_once("x86/tme: not enabled by BIOS\n"); clear_cpu_cap(c, X86_FEATURE_TME); return; } pr_info_once("x86/tme: enabled by BIOS\n"); keyid_bits = TME_ACTIVATE_KEYID_BITS(tme_activate); if (!keyid_bits) return; /* * KeyID bits are set by BIOS and can be present regardless * of whether the kernel is using them. They effectively lower * the number of physical address bits. * * Update cpuinfo_x86::x86_phys_bits accordingly. */ c->x86_phys_bits -= keyid_bits; pr_info_once("x86/mktme: BIOS enabled: x86_phys_bits reduced by %d\n", keyid_bits); } void intel_unlock_cpuid_leafs(struct cpuinfo_x86 *c) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return; if (c->x86 < 6 || (c->x86 == 6 && c->x86_model < 0xd)) return; /* * The BIOS can have limited CPUID to leaf 2, which breaks feature * enumeration. Unlock it and update the maximum leaf info. */ if (msr_clear_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) c->cpuid_level = cpuid_eax(0); } static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; if ((c->x86 == 0xf && c->x86_model >= 0x03) || (c->x86 == 0x6 && c->x86_model >= 0x0e)) set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); if (c->x86 >= 6 && !cpu_has(c, X86_FEATURE_IA64)) c->microcode = intel_get_microcode_revision(); /* Now if any of them are set, check the blacklist and clear the lot */ if ((cpu_has(c, X86_FEATURE_SPEC_CTRL) || cpu_has(c, X86_FEATURE_INTEL_STIBP) || cpu_has(c, X86_FEATURE_IBRS) || cpu_has(c, X86_FEATURE_IBPB) || cpu_has(c, X86_FEATURE_STIBP)) && bad_spectre_microcode(c)) { pr_warn("Intel Spectre v2 broken microcode detected; disabling Speculation Control\n"); setup_clear_cpu_cap(X86_FEATURE_IBRS); setup_clear_cpu_cap(X86_FEATURE_IBPB); setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); setup_clear_cpu_cap(X86_FEATURE_SSBD); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); } /* * Atom erratum AAE44/AAF40/AAG38/AAH41: * * A race condition between speculative fetches and invalidating * a large page. This is worked around in microcode, but we * need the microcode to have already been loaded... so if it is * not, recommend a BIOS update and disable large pages. */ if (c->x86_vfm == INTEL_ATOM_BONNELL && c->x86_stepping <= 2 && c->microcode < 0x20e) { pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n"); clear_cpu_cap(c, X86_FEATURE_PSE); } #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #else /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ if (c->x86 == 15 && c->x86_cache_alignment == 64) c->x86_cache_alignment = 128; #endif /* CPUID workaround for 0F33/0F34 CPU */ if (c->x86 == 0xF && c->x86_model == 0x3 && (c->x86_stepping == 0x3 || c->x86_stepping == 0x4)) c->x86_phys_bits = 36; /* * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate * with P/T states and does not stop in deep C-states. * * It is also reliable across cores and sockets. (but not across * cabinets - we turn it off in that case explicitly.) */ if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ switch (c->x86_vfm) { case INTEL_ATOM_SALTWELL_MID: case INTEL_ATOM_SALTWELL_TABLET: case INTEL_ATOM_SILVERMONT_MID: case INTEL_ATOM_AIRMONT_NP: set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); break; } /* * There is a known erratum on Pentium III and Core Solo * and Core Duo CPUs. * " Page with PAT set to WC while associated MTRR is UC * may consolidate to UC " * Because of this erratum, it is better to stick with * setting WC in MTRR rather than using PAT on these CPUs. * * Enable PAT WC only on P4, Core 2 or later CPUs. */ if (c->x86 == 6 && c->x86_model < 15) clear_cpu_cap(c, X86_FEATURE_PAT); /* * If fast string is not enabled in IA32_MISC_ENABLE for any reason, * clear the fast string and enhanced fast string CPU capabilities. */ if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); if (!(misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING)) { pr_info("Disabled fast string operations\n"); setup_clear_cpu_cap(X86_FEATURE_REP_GOOD); setup_clear_cpu_cap(X86_FEATURE_ERMS); } } /* * Intel Quark Core DevMan_001.pdf section 6.4.11 * "The operating system also is required to invalidate (i.e., flush) * the TLB when any changes are made to any of the page table entries. * The operating system must reload CR3 to cause the TLB to be flushed" * * As a result, boot_cpu_has(X86_FEATURE_PGE) in arch/x86/include/asm/tlbflush.h * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE * to be modified. */ if (c->x86_vfm == INTEL_QUARK_X1000) { pr_info("Disabling PGE capability bit\n"); setup_clear_cpu_cap(X86_FEATURE_PGE); } check_memory_type_self_snoop_errata(c); /* * Adjust the number of physical bits early because it affects the * valid bits of the MTRR mask registers. */ if (cpu_has(c, X86_FEATURE_TME)) detect_tme_early(c); } static void bsp_init_intel(struct cpuinfo_x86 *c) { resctrl_cpu_detect(c); } #ifdef CONFIG_X86_32 /* * Early probe support logic for ppro memory erratum #50 * * This is called before we do cpu ident work */ int ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 1 && boot_cpu_data.x86_stepping < 8) { pr_info("Pentium Pro with Errata#50 detected. Taking evasive action.\n"); return 1; } return 0; } static void intel_smp_check(struct cpuinfo_x86 *c) { /* calling is from identify_secondary_cpu() ? */ if (!c->cpu_index) return; /* * Mask B, Pentium, but not Pentium MMX */ if (c->x86 == 5 && c->x86_stepping >= 1 && c->x86_stepping <= 4 && c->x86_model <= 3) { /* * Remember we have B step Pentia with bugs */ WARN_ONCE(1, "WARNING: SMP operation may be unreliable" "with B stepping processors.\n"); } } static int forcepae; static int __init forcepae_setup(char *__unused) { forcepae = 1; return 1; } __setup("forcepae", forcepae_setup); static void intel_workarounds(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_F00F_BUG /* * All models of Pentium and Pentium with MMX technology CPUs * have the F0 0F bug, which lets nonprivileged users lock up the * system. Announce that the fault handler will be checking for it. * The Quark is also family 5, but does not have the same bug. */ clear_cpu_bug(c, X86_BUG_F00F); if (c->x86 == 5 && c->x86_model < 9) { static int f00f_workaround_enabled; set_cpu_bug(c, X86_BUG_F00F); if (!f00f_workaround_enabled) { pr_notice("Intel Pentium with F0 0F bug - workaround enabled.\n"); f00f_workaround_enabled = 1; } } #endif /* * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until * model 3 mask 3 */ if ((c->x86<<8 | c->x86_model<<4 | c->x86_stepping) < 0x633) clear_cpu_cap(c, X86_FEATURE_SEP); /* * PAE CPUID issue: many Pentium M report no PAE but may have a * functionally usable PAE implementation. * Forcefully enable PAE if kernel parameter "forcepae" is present. */ if (forcepae) { pr_warn("PAE forced!\n"); set_cpu_cap(c, X86_FEATURE_PAE); add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); } /* * P4 Xeon erratum 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_stepping == 1)) { if (msr_set_bit(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) > 0) { pr_info("CPU: C0 stepping P4 Xeon detected.\n"); pr_info("CPU: Disabling hardware prefetching (Erratum 037)\n"); } } /* * See if we have a good local APIC by checking for buggy Pentia, * i.e. all B steppings and the C2 stepping of P54C when using their * integrated APIC (see 11AP erratum in "Pentium Processor * Specification Update"). */ if (boot_cpu_has(X86_FEATURE_APIC) && (c->x86<<8 | c->x86_model<<4) == 0x520 && (c->x86_stepping < 0x6 || c->x86_stepping == 0xb)) set_cpu_bug(c, X86_BUG_11AP); #ifdef CONFIG_X86_INTEL_USERCOPY /* * Set up the preferred alignment for movsl bulk memory moves */ switch (c->x86) { case 4: /* 486: untested */ break; case 5: /* Old Pentia: untested */ break; case 6: /* PII/PIII only like movsl with 8-byte alignment */ movsl_mask.mask = 7; break; case 15: /* P4 is OK down to 8-byte alignment */ movsl_mask.mask = 7; break; } #endif intel_smp_check(c); } #else static void intel_workarounds(struct cpuinfo_x86 *c) { } #endif static void srat_detect_node(struct cpuinfo_x86 *c) { #ifdef CONFIG_NUMA unsigned node; int cpu = smp_processor_id(); /* Don't do the funky fallback heuristics the AMD version employs for now. */ node = numa_cpu_node(cpu); if (node == NUMA_NO_NODE || !node_online(node)) { /* reuse the value from init_cpu_to_node() */ node = cpu_to_node(cpu); } numa_set_node(cpu, node); #endif } static void init_cpuid_fault(struct cpuinfo_x86 *c) { u64 msr; if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) { if (msr & MSR_PLATFORM_INFO_CPUID_FAULT) set_cpu_cap(c, X86_FEATURE_CPUID_FAULT); } } static void init_intel_misc_features(struct cpuinfo_x86 *c) { u64 msr; if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr)) return; /* Clear all MISC features */ this_cpu_write(msr_misc_features_shadow, 0); /* Check features and update capabilities and shadow control bits */ init_cpuid_fault(c); probe_xeon_phi_r3mwait(c); msr = this_cpu_read(msr_misc_features_shadow); wrmsrl(MSR_MISC_FEATURES_ENABLES, msr); } static void split_lock_init(void); static void bus_lock_init(void); static void init_intel(struct cpuinfo_x86 *c) { early_init_intel(c); intel_workarounds(c); init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); /* Check for version and the number of counters */ if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); } if (cpu_has(c, X86_FEATURE_XMM2)) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (boot_cpu_has(X86_FEATURE_DS)) { unsigned int l1, l2; rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & MSR_IA32_MISC_ENABLE_BTS_UNAVAIL)) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL)) set_cpu_cap(c, X86_FEATURE_PEBS); } if (boot_cpu_has(X86_FEATURE_CLFLUSH) && (c->x86_vfm == INTEL_CORE2_DUNNINGTON || c->x86_vfm == INTEL_NEHALEM_EX || c->x86_vfm == INTEL_WESTMERE_EX)) set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR); if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT) set_cpu_bug(c, X86_BUG_MONITOR); #ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_REP_GOOD); #else /* * Names for the Pentium II/Celeron processors * detectable only by also checking the cache size. * Dixon is NOT a Celeron. */ if (c->x86 == 6) { unsigned int l2 = c->x86_cache_size; char *p = NULL; switch (c->x86_model) { case 5: if (l2 == 0) p = "Celeron (Covington)"; else if (l2 == 256) p = "Mobile Pentium II (Dixon)"; break; case 6: if (l2 == 128) p = "Celeron (Mendocino)"; else if (c->x86_stepping == 0 || c->x86_stepping == 5) p = "Celeron-A"; break; case 8: if (l2 == 128) p = "Celeron (Coppermine)"; break; } if (p) strcpy(c->x86_model_id, p); } if (c->x86 == 15) set_cpu_cap(c, X86_FEATURE_P4); if (c->x86 == 6) set_cpu_cap(c, X86_FEATURE_P3); #endif /* Work around errata */ srat_detect_node(c); init_ia32_feat_ctl(c); init_intel_misc_features(c); split_lock_init(); bus_lock_init(); intel_init_thermal(c); } #ifdef CONFIG_X86_32 static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) { /* * Intel PIII Tualatin. This comes in two flavours. * One has 256kb of cache, the other 512. We have no way * to determine which, so we use a boottime override * for the 512kb model, and assume 256 otherwise. */ if ((c->x86 == 6) && (c->x86_model == 11) && (size == 0)) size = 256; /* * Intel Quark SoC X1000 contains a 4-way set associative * 16K cache with a 16 byte cache line and 256 lines per tag */ if ((c->x86 == 5) && (c->x86_model == 9)) size = 16; return size; } #endif #define TLB_INST_4K 0x01 #define TLB_INST_4M 0x02 #define TLB_INST_2M_4M 0x03 #define TLB_INST_ALL 0x05 #define TLB_INST_1G 0x06 #define TLB_DATA_4K 0x11 #define TLB_DATA_4M 0x12 #define TLB_DATA_2M_4M 0x13 #define TLB_DATA_4K_4M 0x14 #define TLB_DATA_1G 0x16 #define TLB_DATA0_4K 0x21 #define TLB_DATA0_4M 0x22 #define TLB_DATA0_2M_4M 0x23 #define STLB_4K 0x41 #define STLB_4K_2M 0x42 static const struct _tlb_table intel_tlb_table[] = { { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, { 0x02, TLB_INST_4M, 2, " TLB_INST 4 MByte pages, full associative" }, { 0x03, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way set associative" }, { 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" }, { 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" }, { 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" }, { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" }, { 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, { 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, { 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" }, { 0x55, TLB_INST_2M_4M, 7, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, { 0x56, TLB_DATA0_4M, 16, " TLB_DATA0 4 MByte pages, 4-way set associative" }, { 0x57, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, 4-way associative" }, { 0x59, TLB_DATA0_4K, 16, " TLB_DATA0 4 KByte pages, fully associative" }, { 0x5a, TLB_DATA0_2M_4M, 32, " TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative" }, { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set associative" }, { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set associative" }, { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" }, { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, { 0x00, 0, 0 } }; static void intel_tlb_lookup(const unsigned char desc) { unsigned char k; if (desc == 0) return; /* look up this descriptor in the table */ for (k = 0; intel_tlb_table[k].descriptor != desc && intel_tlb_table[k].descriptor != 0; k++) ; if (intel_tlb_table[k].tlb_type == 0) return; switch (intel_tlb_table[k].tlb_type) { case STLB_4K: if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; break; case STLB_4K_2M: if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; break; case TLB_INST_ALL: if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; break; case TLB_INST_4K: if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; break; case TLB_INST_4M: if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; break; case TLB_INST_2M_4M: if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; break; case TLB_DATA_4K: case TLB_DATA0_4K: if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; break; case TLB_DATA_4M: case TLB_DATA0_4M: if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; break; case TLB_DATA_2M_4M: case TLB_DATA0_2M_4M: if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; break; case TLB_DATA_4K_4M: if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; break; case TLB_DATA_1G: if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; break; } } static void intel_detect_tlb(struct cpuinfo_x86 *c) { int i, j, n; unsigned int regs[4]; unsigned char *desc = (unsigned char *)regs; if (c->cpuid_level < 2) return; /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; for (i = 0 ; i < n ; i++) { cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]); /* If bit 31 is set, this is an unknown format */ for (j = 0 ; j < 3 ; j++) if (regs[j] & (1 << 31)) regs[j] = 0; /* Byte 0 is level count, not a descriptor */ for (j = 1 ; j < 16 ; j++) intel_tlb_lookup(desc[j]); } } static const struct cpu_dev intel_cpu_dev = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, #ifdef CONFIG_X86_32 .legacy_models = { { .family = 4, .model_names = { [0] = "486 DX-25/33", [1] = "486 DX-50", [2] = "486 SX", [3] = "486 DX/2", [4] = "486 SL", [5] = "486 SX/2", [7] = "486 DX/2-WB", [8] = "486 DX/4", [9] = "486 DX/4-WB" } }, { .family = 5, .model_names = { [0] = "Pentium 60/66 A-step", [1] = "Pentium 60/66", [2] = "Pentium 75 - 200", [3] = "OverDrive PODP5V83", [4] = "Pentium MMX", [7] = "Mobile Pentium 75 - 200", [8] = "Mobile Pentium MMX", [9] = "Quark SoC X1000", } }, { .family = 6, .model_names = { [0] = "Pentium Pro A-step", [1] = "Pentium Pro", [3] = "Pentium II (Klamath)", [4] = "Pentium II (Deschutes)", [5] = "Pentium II (Deschutes)", [6] = "Mobile Pentium II", [7] = "Pentium III (Katmai)", [8] = "Pentium III (Coppermine)", [10] = "Pentium III (Cascades)", [11] = "Pentium III (Tualatin)", } }, { .family = 15, .model_names = { [0] = "Pentium 4 (Unknown)", [1] = "Pentium 4 (Willamette)", [2] = "Pentium 4 (Northwood)", [4] = "Pentium 4 (Foster)", [5] = "Pentium 4 (Foster)", } }, }, .legacy_cache_size = intel_size_cache, #endif .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, .c_bsp_init = bsp_init_intel, .c_init = init_intel, .c_x86_vendor = X86_VENDOR_INTEL, }; cpu_dev_register(intel_cpu_dev); #undef pr_fmt #define pr_fmt(fmt) "x86/split lock detection: " fmt static const struct { const char *option; enum split_lock_detect_state state; } sld_options[] __initconst = { { "off", sld_off }, { "warn", sld_warn }, { "fatal", sld_fatal }, { "ratelimit:", sld_ratelimit }, }; static struct ratelimit_state bld_ratelimit; static unsigned int sysctl_sld_mitigate = 1; static DEFINE_SEMAPHORE(buslock_sem, 1); #ifdef CONFIG_PROC_SYSCTL static struct ctl_table sld_sysctls[] = { { .procname = "split_lock_mitigate", .data = &sysctl_sld_mitigate, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_douintvec_minmax, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, }; static int __init sld_mitigate_sysctl_init(void) { register_sysctl_init("kernel", sld_sysctls); return 0; } late_initcall(sld_mitigate_sysctl_init); #endif static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt), ratelimit; if (strncmp(arg, opt, len)) return false; /* * Min ratelimit is 1 bus lock/sec. * Max ratelimit is 1000 bus locks/sec. */ if (sscanf(arg, "ratelimit:%d", &ratelimit) == 1 && ratelimit > 0 && ratelimit <= 1000) { ratelimit_state_init(&bld_ratelimit, HZ, ratelimit); ratelimit_set_flags(&bld_ratelimit, RATELIMIT_MSG_ON_RELEASE); return true; } return len == arglen; } static bool split_lock_verify_msr(bool on) { u64 ctrl, tmp; if (rdmsrl_safe(MSR_TEST_CTRL, &ctrl)) return false; if (on) ctrl |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; else ctrl &= ~MSR_TEST_CTRL_SPLIT_LOCK_DETECT; if (wrmsrl_safe(MSR_TEST_CTRL, ctrl)) return false; rdmsrl(MSR_TEST_CTRL, tmp); return ctrl == tmp; } static void __init sld_state_setup(void) { enum split_lock_detect_state state = sld_warn; char arg[20]; int i, ret; if (!boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && !boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) return; ret = cmdline_find_option(boot_command_line, "split_lock_detect", arg, sizeof(arg)); if (ret >= 0) { for (i = 0; i < ARRAY_SIZE(sld_options); i++) { if (match_option(arg, ret, sld_options[i].option)) { state = sld_options[i].state; break; } } } sld_state = state; } static void __init __split_lock_setup(void) { if (!split_lock_verify_msr(false)) { pr_info("MSR access failed: Disabled\n"); return; } rdmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); if (!split_lock_verify_msr(true)) { pr_info("MSR access failed: Disabled\n"); return; } /* Restore the MSR to its cached value. */ wrmsrl(MSR_TEST_CTRL, msr_test_ctrl_cache); setup_force_cpu_cap(X86_FEATURE_SPLIT_LOCK_DETECT); } /* * MSR_TEST_CTRL is per core, but we treat it like a per CPU MSR. Locking * is not implemented as one thread could undo the setting of the other * thread immediately after dropping the lock anyway. */ static void sld_update_msr(bool on) { u64 test_ctrl_val = msr_test_ctrl_cache; if (on) test_ctrl_val |= MSR_TEST_CTRL_SPLIT_LOCK_DETECT; wrmsrl(MSR_TEST_CTRL, test_ctrl_val); } static void split_lock_init(void) { /* * #DB for bus lock handles ratelimit and #AC for split lock is * disabled. */ if (sld_state == sld_ratelimit) { split_lock_verify_msr(false); return; } if (cpu_model_supports_sld) split_lock_verify_msr(sld_state != sld_off); } static void __split_lock_reenable_unlock(struct work_struct *work) { sld_update_msr(true); up(&buslock_sem); } static DECLARE_DELAYED_WORK(sl_reenable_unlock, __split_lock_reenable_unlock); static void __split_lock_reenable(struct work_struct *work) { sld_update_msr(true); } static DECLARE_DELAYED_WORK(sl_reenable, __split_lock_reenable); /* * If a CPU goes offline with pending delayed work to re-enable split lock * detection then the delayed work will be executed on some other CPU. That * handles releasing the buslock_sem, but because it executes on a * different CPU probably won't re-enable split lock detection. This is a * problem on HT systems since the sibling CPU on the same core may then be * left running with split lock detection disabled. * * Unconditionally re-enable detection here. */ static int splitlock_cpu_offline(unsigned int cpu) { sld_update_msr(true); return 0; } static void split_lock_warn(unsigned long ip) { struct delayed_work *work; int cpu; if (!current->reported_split_lock) pr_warn_ratelimited("#AC: %s/%d took a split_lock trap at address: 0x%lx\n", current->comm, current->pid, ip); current->reported_split_lock = 1; if (sysctl_sld_mitigate) { /* * misery factor #1: * sleep 10ms before trying to execute split lock. */ if (msleep_interruptible(10) > 0) return; /* * Misery factor #2: * only allow one buslocked disabled core at a time. */ if (down_interruptible(&buslock_sem) == -EINTR) return; work = &sl_reenable_unlock; } else { work = &sl_reenable; } cpu = get_cpu(); schedule_delayed_work_on(cpu, work, 2); /* Disable split lock detection on this CPU to make progress */ sld_update_msr(false); put_cpu(); } bool handle_guest_split_lock(unsigned long ip) { if (sld_state == sld_warn) { split_lock_warn(ip); return true; } pr_warn_once("#AC: %s/%d %s split_lock trap at address: 0x%lx\n", current->comm, current->pid, sld_state == sld_fatal ? "fatal" : "bogus", ip); current->thread.error_code = 0; current->thread.trap_nr = X86_TRAP_AC; force_sig_fault(SIGBUS, BUS_ADRALN, NULL); return false; } EXPORT_SYMBOL_GPL(handle_guest_split_lock); static void bus_lock_init(void) { u64 val; if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) return; rdmsrl(MSR_IA32_DEBUGCTLMSR, val); if ((boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) && (sld_state == sld_warn || sld_state == sld_fatal)) || sld_state == sld_off) { /* * Warn and fatal are handled by #AC for split lock if #AC for * split lock is supported. */ val &= ~DEBUGCTLMSR_BUS_LOCK_DETECT; } else { val |= DEBUGCTLMSR_BUS_LOCK_DETECT; } wrmsrl(MSR_IA32_DEBUGCTLMSR, val); } bool handle_user_split_lock(struct pt_regs *regs, long error_code) { if ((regs->flags & X86_EFLAGS_AC) || sld_state == sld_fatal) return false; split_lock_warn(regs->ip); return true; } void handle_bus_lock(struct pt_regs *regs) { switch (sld_state) { case sld_off: break; case sld_ratelimit: /* Enforce no more than bld_ratelimit bus locks/sec. */ while (!__ratelimit(&bld_ratelimit)) msleep(20); /* Warn on the bus lock. */ fallthrough; case sld_warn: pr_warn_ratelimited("#DB: %s/%d took a bus_lock trap at address: 0x%lx\n", current->comm, current->pid, regs->ip); break; case sld_fatal: force_sig_fault(SIGBUS, BUS_ADRALN, NULL); break; } } /* * CPU models that are known to have the per-core split-lock detection * feature even though they do not enumerate IA32_CORE_CAPABILITIES. */ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = { X86_MATCH_VFM(INTEL_ICELAKE_X, 0), X86_MATCH_VFM(INTEL_ICELAKE_L, 0), X86_MATCH_VFM(INTEL_ICELAKE_D, 0), {} }; static void __init split_lock_setup(struct cpuinfo_x86 *c) { const struct x86_cpu_id *m; u64 ia32_core_caps; if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) return; /* Check for CPUs that have support but do not enumerate it: */ m = x86_match_cpu(split_lock_cpu_ids); if (m) goto supported; if (!cpu_has(c, X86_FEATURE_CORE_CAPABILITIES)) return; /* * Not all bits in MSR_IA32_CORE_CAPS are architectural, but * MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT is. All CPUs that set * it have split lock detection. */ rdmsrl(MSR_IA32_CORE_CAPS, ia32_core_caps); if (ia32_core_caps & MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT) goto supported; /* CPU is not in the model list and does not have the MSR bit: */ return; supported: cpu_model_supports_sld = true; __split_lock_setup(); } static void sld_state_show(void) { if (!boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT) && !boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) return; switch (sld_state) { case sld_off: pr_info("disabled\n"); break; case sld_warn: if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { pr_info("#AC: crashing the kernel on kernel split_locks and warning on user-space split_locks\n"); if (cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/splitlock", NULL, splitlock_cpu_offline) < 0) pr_warn("No splitlock CPU offline handler\n"); } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { pr_info("#DB: warning on user-space bus_locks\n"); } break; case sld_fatal: if (boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT)) { pr_info("#AC: crashing the kernel on kernel split_locks and sending SIGBUS on user-space split_locks\n"); } else if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) { pr_info("#DB: sending SIGBUS on user-space bus_locks%s\n", boot_cpu_has(X86_FEATURE_SPLIT_LOCK_DETECT) ? " from non-WB" : ""); } break; case sld_ratelimit: if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT)) pr_info("#DB: setting system wide bus lock rate limit to %u/sec\n", bld_ratelimit.burst); break; } } void __init sld_setup(struct cpuinfo_x86 *c) { split_lock_setup(c); sld_state_setup(); sld_state_show(); } #define X86_HYBRID_CPU_TYPE_ID_SHIFT 24 /** * get_this_hybrid_cpu_type() - Get the type of this hybrid CPU * * Returns the CPU type [31:24] (i.e., Atom or Core) of a CPU in * a hybrid processor. If the processor is not hybrid, returns 0. */ u8 get_this_hybrid_cpu_type(void) { if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) return 0; return cpuid_eax(0x0000001a) >> X86_HYBRID_CPU_TYPE_ID_SHIFT; }
4 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 // SPDX-License-Identifier: GPL-2.0 /* Copyright(c) 2016-20 Intel Corporation. */ #include <linux/file.h> #include <linux/freezer.h> #include <linux/highmem.h> #include <linux/kthread.h> #include <linux/miscdevice.h> #include <linux/node.h> #include <linux/pagemap.h> #include <linux/ratelimit.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/sysfs.h> #include <linux/vmalloc.h> #include <asm/sgx.h> #include "driver.h" #include "encl.h" #include "encls.h" struct sgx_epc_section sgx_epc_sections[SGX_MAX_EPC_SECTIONS]; static int sgx_nr_epc_sections; static struct task_struct *ksgxd_tsk; static DECLARE_WAIT_QUEUE_HEAD(ksgxd_waitq); static DEFINE_XARRAY(sgx_epc_address_space); /* * These variables are part of the state of the reclaimer, and must be accessed * with sgx_reclaimer_lock acquired. */ static LIST_HEAD(sgx_active_page_list); static DEFINE_SPINLOCK(sgx_reclaimer_lock); static atomic_long_t sgx_nr_free_pages = ATOMIC_LONG_INIT(0); /* Nodes with one or more EPC sections. */ static nodemask_t sgx_numa_mask; /* * Array with one list_head for each possible NUMA node. Each * list contains all the sgx_epc_section's which are on that * node. */ static struct sgx_numa_node *sgx_numa_nodes; static LIST_HEAD(sgx_dirty_page_list); /* * Reset post-kexec EPC pages to the uninitialized state. The pages are removed * from the input list, and made available for the page allocator. SECS pages * prepending their children in the input list are left intact. * * Return 0 when sanitization was successful or kthread was stopped, and the * number of unsanitized pages otherwise. */ static unsigned long __sgx_sanitize_pages(struct list_head *dirty_page_list) { unsigned long left_dirty = 0; struct sgx_epc_page *page; LIST_HEAD(dirty); int ret; /* dirty_page_list is thread-local, no need for a lock: */ while (!list_empty(dirty_page_list)) { if (kthread_should_stop()) return 0; page = list_first_entry(dirty_page_list, struct sgx_epc_page, list); /* * Checking page->poison without holding the node->lock * is racy, but losing the race (i.e. poison is set just * after the check) just means __eremove() will be uselessly * called for a page that sgx_free_epc_page() will put onto * the node->sgx_poison_page_list later. */ if (page->poison) { struct sgx_epc_section *section = &sgx_epc_sections[page->section]; struct sgx_numa_node *node = section->node; spin_lock(&node->lock); list_move(&page->list, &node->sgx_poison_page_list); spin_unlock(&node->lock); continue; } ret = __eremove(sgx_get_epc_virt_addr(page)); if (!ret) { /* * page is now sanitized. Make it available via the SGX * page allocator: */ list_del(&page->list); sgx_free_epc_page(page); } else { /* The page is not yet clean - move to the dirty list. */ list_move_tail(&page->list, &dirty); left_dirty++; } cond_resched(); } list_splice(&dirty, dirty_page_list); return left_dirty; } static bool sgx_reclaimer_age(struct sgx_epc_page *epc_page) { struct sgx_encl_page *page = epc_page->owner; struct sgx_encl *encl = page->encl; struct sgx_encl_mm *encl_mm; bool ret = true; int idx; idx = srcu_read_lock(&encl->srcu); list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) { if (!mmget_not_zero(encl_mm->mm)) continue; mmap_read_lock(encl_mm->mm); ret = !sgx_encl_test_and_clear_young(encl_mm->mm, page); mmap_read_unlock(encl_mm->mm); mmput_async(encl_mm->mm); if (!ret) break; } srcu_read_unlock(&encl->srcu, idx); if (!ret) return false; return true; } static void sgx_reclaimer_block(struct sgx_epc_page *epc_page) { struct sgx_encl_page *page = epc_page->owner; unsigned long addr = page->desc & PAGE_MASK; struct sgx_encl *encl = page->encl; int ret; sgx_zap_enclave_ptes(encl, addr); mutex_lock(&encl->lock); ret = __eblock(sgx_get_epc_virt_addr(epc_page)); if (encls_failed(ret)) ENCLS_WARN(ret, "EBLOCK"); mutex_unlock(&encl->lock); } static int __sgx_encl_ewb(struct sgx_epc_page *epc_page, void *va_slot, struct sgx_backing *backing) { struct sgx_pageinfo pginfo; int ret; pginfo.addr = 0; pginfo.secs = 0; pginfo.contents = (unsigned long)kmap_local_page(backing->contents); pginfo.metadata = (unsigned long)kmap_local_page(backing->pcmd) + backing->pcmd_offset; ret = __ewb(&pginfo, sgx_get_epc_virt_addr(epc_page), va_slot); set_page_dirty(backing->pcmd); set_page_dirty(backing->contents); kunmap_local((void *)(unsigned long)(pginfo.metadata - backing->pcmd_offset)); kunmap_local((void *)(unsigned long)pginfo.contents); return ret; } void sgx_ipi_cb(void *info) { } /* * Swap page to the regular memory transformed to the blocked state by using * EBLOCK, which means that it can no longer be referenced (no new TLB entries). * * The first trial just tries to write the page assuming that some other thread * has reset the count for threads inside the enclave by using ETRACK, and * previous thread count has been zeroed out. The second trial calls ETRACK * before EWB. If that fails we kick all the HW threads out, and then do EWB, * which should be guaranteed the succeed. */ static void sgx_encl_ewb(struct sgx_epc_page *epc_page, struct sgx_backing *backing) { struct sgx_encl_page *encl_page = epc_page->owner; struct sgx_encl *encl = encl_page->encl; struct sgx_va_page *va_page; unsigned int va_offset; void *va_slot; int ret; encl_page->desc &= ~SGX_ENCL_PAGE_BEING_RECLAIMED; va_page = list_first_entry(&encl->va_pages, struct sgx_va_page, list); va_offset = sgx_alloc_va_slot(va_page); va_slot = sgx_get_epc_virt_addr(va_page->epc_page) + va_offset; if (sgx_va_page_full(va_page)) list_move_tail(&va_page->list, &encl->va_pages); ret = __sgx_encl_ewb(epc_page, va_slot, backing); if (ret == SGX_NOT_TRACKED) { ret = __etrack(sgx_get_epc_virt_addr(encl->secs.epc_page)); if (ret) { if (encls_failed(ret)) ENCLS_WARN(ret, "ETRACK"); } ret = __sgx_encl_ewb(epc_page, va_slot, backing); if (ret == SGX_NOT_TRACKED) { /* * Slow path, send IPIs to kick cpus out of the * enclave. Note, it's imperative that the cpu * mask is generated *after* ETRACK, else we'll * miss cpus that entered the enclave between * generating the mask and incrementing epoch. */ on_each_cpu_mask(sgx_encl_cpumask(encl), sgx_ipi_cb, NULL, 1); ret = __sgx_encl_ewb(epc_page, va_slot, backing); } } if (ret) { if (encls_failed(ret)) ENCLS_WARN(ret, "EWB"); sgx_free_va_slot(va_page, va_offset); } else { encl_page->desc |= va_offset; encl_page->va_page = va_page; } } static void sgx_reclaimer_write(struct sgx_epc_page *epc_page, struct sgx_backing *backing) { struct sgx_encl_page *encl_page = epc_page->owner; struct sgx_encl *encl = encl_page->encl; struct sgx_backing secs_backing; int ret; mutex_lock(&encl->lock); sgx_encl_ewb(epc_page, backing); encl_page->epc_page = NULL; encl->secs_child_cnt--; sgx_encl_put_backing(backing); if (!encl->secs_child_cnt && test_bit(SGX_ENCL_INITIALIZED, &encl->flags)) { ret = sgx_encl_alloc_backing(encl, PFN_DOWN(encl->size), &secs_backing); if (ret) goto out; sgx_encl_ewb(encl->secs.epc_page, &secs_backing); sgx_encl_free_epc_page(encl->secs.epc_page); encl->secs.epc_page = NULL; sgx_encl_put_backing(&secs_backing); } out: mutex_unlock(&encl->lock); } /* * Take a fixed number of pages from the head of the active page pool and * reclaim them to the enclave's private shmem files. Skip the pages, which have * been accessed since the last scan. Move those pages to the tail of active * page pool so that the pages get scanned in LRU like fashion. * * Batch process a chunk of pages (at the moment 16) in order to degrade amount * of IPI's and ETRACK's potentially required. sgx_encl_ewb() does degrade a bit * among the HW threads with three stage EWB pipeline (EWB, ETRACK + EWB and IPI * + EWB) but not sufficiently. Reclaiming one page at a time would also be * problematic as it would increase the lock contention too much, which would * halt forward progress. */ static void sgx_reclaim_pages(void) { struct sgx_epc_page *chunk[SGX_NR_TO_SCAN]; struct sgx_backing backing[SGX_NR_TO_SCAN]; struct sgx_encl_page *encl_page; struct sgx_epc_page *epc_page; pgoff_t page_index; int cnt = 0; int ret; int i; spin_lock(&sgx_reclaimer_lock); for (i = 0; i < SGX_NR_TO_SCAN; i++) { if (list_empty(&sgx_active_page_list)) break; epc_page = list_first_entry(&sgx_active_page_list, struct sgx_epc_page, list); list_del_init(&epc_page->list); encl_page = epc_page->owner; if (kref_get_unless_zero(&encl_page->encl->refcount) != 0) chunk[cnt++] = epc_page; else /* The owner is freeing the page. No need to add the * page back to the list of reclaimable pages. */ epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; } spin_unlock(&sgx_reclaimer_lock); for (i = 0; i < cnt; i++) { epc_page = chunk[i]; encl_page = epc_page->owner; if (!sgx_reclaimer_age(epc_page)) goto skip; page_index = PFN_DOWN(encl_page->desc - encl_page->encl->base); mutex_lock(&encl_page->encl->lock); ret = sgx_encl_alloc_backing(encl_page->encl, page_index, &backing[i]); if (ret) { mutex_unlock(&encl_page->encl->lock); goto skip; } encl_page->desc |= SGX_ENCL_PAGE_BEING_RECLAIMED; mutex_unlock(&encl_page->encl->lock); continue; skip: spin_lock(&sgx_reclaimer_lock); list_add_tail(&epc_page->list, &sgx_active_page_list); spin_unlock(&sgx_reclaimer_lock); kref_put(&encl_page->encl->refcount, sgx_encl_release); chunk[i] = NULL; } for (i = 0; i < cnt; i++) { epc_page = chunk[i]; if (epc_page) sgx_reclaimer_block(epc_page); } for (i = 0; i < cnt; i++) { epc_page = chunk[i]; if (!epc_page) continue; encl_page = epc_page->owner; sgx_reclaimer_write(epc_page, &backing[i]); kref_put(&encl_page->encl->refcount, sgx_encl_release); epc_page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; sgx_free_epc_page(epc_page); } } static bool sgx_should_reclaim(unsigned long watermark) { return atomic_long_read(&sgx_nr_free_pages) < watermark && !list_empty(&sgx_active_page_list); } /* * sgx_reclaim_direct() should be called (without enclave's mutex held) * in locations where SGX memory resources might be low and might be * needed in order to make forward progress. */ void sgx_reclaim_direct(void) { if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) sgx_reclaim_pages(); } static int ksgxd(void *p) { set_freezable(); /* * Sanitize pages in order to recover from kexec(). The 2nd pass is * required for SECS pages, whose child pages blocked EREMOVE. */ __sgx_sanitize_pages(&sgx_dirty_page_list); WARN_ON(__sgx_sanitize_pages(&sgx_dirty_page_list)); while (!kthread_should_stop()) { if (try_to_freeze()) continue; wait_event_freezable(ksgxd_waitq, kthread_should_stop() || sgx_should_reclaim(SGX_NR_HIGH_PAGES)); if (sgx_should_reclaim(SGX_NR_HIGH_PAGES)) sgx_reclaim_pages(); cond_resched(); } return 0; } static bool __init sgx_page_reclaimer_init(void) { struct task_struct *tsk; tsk = kthread_run(ksgxd, NULL, "ksgxd"); if (IS_ERR(tsk)) return false; ksgxd_tsk = tsk; return true; } bool current_is_ksgxd(void) { return current == ksgxd_tsk; } static struct sgx_epc_page *__sgx_alloc_epc_page_from_node(int nid) { struct sgx_numa_node *node = &sgx_numa_nodes[nid]; struct sgx_epc_page *page = NULL; spin_lock(&node->lock); if (list_empty(&node->free_page_list)) { spin_unlock(&node->lock); return NULL; } page = list_first_entry(&node->free_page_list, struct sgx_epc_page, list); list_del_init(&page->list); page->flags = 0; spin_unlock(&node->lock); atomic_long_dec(&sgx_nr_free_pages); return page; } /** * __sgx_alloc_epc_page() - Allocate an EPC page * * Iterate through NUMA nodes and reserve ia free EPC page to the caller. Start * from the NUMA node, where the caller is executing. * * Return: * - an EPC page: A borrowed EPC pages were available. * - NULL: Out of EPC pages. */ struct sgx_epc_page *__sgx_alloc_epc_page(void) { struct sgx_epc_page *page; int nid_of_current = numa_node_id(); int nid = nid_of_current; if (node_isset(nid_of_current, sgx_numa_mask)) { page = __sgx_alloc_epc_page_from_node(nid_of_current); if (page) return page; } /* Fall back to the non-local NUMA nodes: */ while (true) { nid = next_node_in(nid, sgx_numa_mask); if (nid == nid_of_current) break; page = __sgx_alloc_epc_page_from_node(nid); if (page) return page; } return ERR_PTR(-ENOMEM); } /** * sgx_mark_page_reclaimable() - Mark a page as reclaimable * @page: EPC page * * Mark a page as reclaimable and add it to the active page list. Pages * are automatically removed from the active list when freed. */ void sgx_mark_page_reclaimable(struct sgx_epc_page *page) { spin_lock(&sgx_reclaimer_lock); page->flags |= SGX_EPC_PAGE_RECLAIMER_TRACKED; list_add_tail(&page->list, &sgx_active_page_list); spin_unlock(&sgx_reclaimer_lock); } /** * sgx_unmark_page_reclaimable() - Remove a page from the reclaim list * @page: EPC page * * Clear the reclaimable flag and remove the page from the active page list. * * Return: * 0 on success, * -EBUSY if the page is in the process of being reclaimed */ int sgx_unmark_page_reclaimable(struct sgx_epc_page *page) { spin_lock(&sgx_reclaimer_lock); if (page->flags & SGX_EPC_PAGE_RECLAIMER_TRACKED) { /* The page is being reclaimed. */ if (list_empty(&page->list)) { spin_unlock(&sgx_reclaimer_lock); return -EBUSY; } list_del(&page->list); page->flags &= ~SGX_EPC_PAGE_RECLAIMER_TRACKED; } spin_unlock(&sgx_reclaimer_lock); return 0; } /** * sgx_alloc_epc_page() - Allocate an EPC page * @owner: the owner of the EPC page * @reclaim: reclaim pages if necessary * * Iterate through EPC sections and borrow a free EPC page to the caller. When a * page is no longer needed it must be released with sgx_free_epc_page(). If * @reclaim is set to true, directly reclaim pages when we are out of pages. No * mm's can be locked when @reclaim is set to true. * * Finally, wake up ksgxd when the number of pages goes below the watermark * before returning back to the caller. * * Return: * an EPC page, * -errno on error */ struct sgx_epc_page *sgx_alloc_epc_page(void *owner, bool reclaim) { struct sgx_epc_page *page; for ( ; ; ) { page = __sgx_alloc_epc_page(); if (!IS_ERR(page)) { page->owner = owner; break; } if (list_empty(&sgx_active_page_list)) return ERR_PTR(-ENOMEM); if (!reclaim) { page = ERR_PTR(-EBUSY); break; } if (signal_pending(current)) { page = ERR_PTR(-ERESTARTSYS); break; } sgx_reclaim_pages(); cond_resched(); } if (sgx_should_reclaim(SGX_NR_LOW_PAGES)) wake_up(&ksgxd_waitq); return page; } /** * sgx_free_epc_page() - Free an EPC page * @page: an EPC page * * Put the EPC page back to the list of free pages. It's the caller's * responsibility to make sure that the page is in uninitialized state. In other * words, do EREMOVE, EWB or whatever operation is necessary before calling * this function. */ void sgx_free_epc_page(struct sgx_epc_page *page) { struct sgx_epc_section *section = &sgx_epc_sections[page->section]; struct sgx_numa_node *node = section->node; spin_lock(&node->lock); page->owner = NULL; if (page->poison) list_add(&page->list, &node->sgx_poison_page_list); else list_add_tail(&page->list, &node->free_page_list); page->flags = SGX_EPC_PAGE_IS_FREE; spin_unlock(&node->lock); atomic_long_inc(&sgx_nr_free_pages); } static bool __init sgx_setup_epc_section(u64 phys_addr, u64 size, unsigned long index, struct sgx_epc_section *section) { unsigned long nr_pages = size >> PAGE_SHIFT; unsigned long i; section->virt_addr = memremap(phys_addr, size, MEMREMAP_WB); if (!section->virt_addr) return false; section->pages = vmalloc(nr_pages * sizeof(struct sgx_epc_page)); if (!section->pages) { memunmap(section->virt_addr); return false; } section->phys_addr = phys_addr; xa_store_range(&sgx_epc_address_space, section->phys_addr, phys_addr + size - 1, section, GFP_KERNEL); for (i = 0; i < nr_pages; i++) { section->pages[i].section = index; section->pages[i].flags = 0; section->pages[i].owner = NULL; section->pages[i].poison = 0; list_add_tail(&section->pages[i].list, &sgx_dirty_page_list); } return true; } bool arch_is_platform_page(u64 paddr) { return !!xa_load(&sgx_epc_address_space, paddr); } EXPORT_SYMBOL_GPL(arch_is_platform_page); static struct sgx_epc_page *sgx_paddr_to_page(u64 paddr) { struct sgx_epc_section *section; section = xa_load(&sgx_epc_address_space, paddr); if (!section) return NULL; return &section->pages[PFN_DOWN(paddr - section->phys_addr)]; } /* * Called in process context to handle a hardware reported * error in an SGX EPC page. * If the MF_ACTION_REQUIRED bit is set in flags, then the * context is the task that consumed the poison data. Otherwise * this is called from a kernel thread unrelated to the page. */ int arch_memory_failure(unsigned long pfn, int flags) { struct sgx_epc_page *page = sgx_paddr_to_page(pfn << PAGE_SHIFT); struct sgx_epc_section *section; struct sgx_numa_node *node; /* * mm/memory-failure.c calls this routine for all errors * where there isn't a "struct page" for the address. But that * includes other address ranges besides SGX. */ if (!page) return -ENXIO; /* * If poison was consumed synchronously. Send a SIGBUS to * the task. Hardware has already exited the SGX enclave and * will not allow re-entry to an enclave that has a memory * error. The signal may help the task understand why the * enclave is broken. */ if (flags & MF_ACTION_REQUIRED) force_sig(SIGBUS); section = &sgx_epc_sections[page->section]; node = section->node; spin_lock(&node->lock); /* Already poisoned? Nothing more to do */ if (page->poison) goto out; page->poison = 1; /* * If the page is on a free list, move it to the per-node * poison page list. */ if (page->flags & SGX_EPC_PAGE_IS_FREE) { list_move(&page->list, &node->sgx_poison_page_list); goto out; } /* * TBD: Add additional plumbing to enable pre-emptive * action for asynchronous poison notification. Until * then just hope that the poison: * a) is not accessed - sgx_free_epc_page() will deal with it * when the user gives it back * b) results in a recoverable machine check rather than * a fatal one */ out: spin_unlock(&node->lock); return 0; } /** * A section metric is concatenated in a way that @low bits 12-31 define the * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the * metric. */ static inline u64 __init sgx_calc_section_metric(u64 low, u64 high) { return (low & GENMASK_ULL(31, 12)) + ((high & GENMASK_ULL(19, 0)) << 32); } #ifdef CONFIG_NUMA static ssize_t sgx_total_bytes_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%lu\n", sgx_numa_nodes[dev->id].size); } static DEVICE_ATTR_RO(sgx_total_bytes); static umode_t arch_node_attr_is_visible(struct kobject *kobj, struct attribute *attr, int idx) { /* Make all x86/ attributes invisible when SGX is not initialized: */ if (nodes_empty(sgx_numa_mask)) return 0; return attr->mode; } static struct attribute *arch_node_dev_attrs[] = { &dev_attr_sgx_total_bytes.attr, NULL, }; const struct attribute_group arch_node_dev_group = { .name = "x86", .attrs = arch_node_dev_attrs, .is_visible = arch_node_attr_is_visible, }; static void __init arch_update_sysfs_visibility(int nid) { struct node *node = node_devices[nid]; int ret; ret = sysfs_update_group(&node->dev.kobj, &arch_node_dev_group); if (ret) pr_err("sysfs update failed (%d), files may be invisible", ret); } #else /* !CONFIG_NUMA */ static void __init arch_update_sysfs_visibility(int nid) {} #endif static bool __init sgx_page_cache_init(void) { u32 eax, ebx, ecx, edx, type; u64 pa, size; int nid; int i; sgx_numa_nodes = kmalloc_array(num_possible_nodes(), sizeof(*sgx_numa_nodes), GFP_KERNEL); if (!sgx_numa_nodes) return false; for (i = 0; i < ARRAY_SIZE(sgx_epc_sections); i++) { cpuid_count(SGX_CPUID, i + SGX_CPUID_EPC, &eax, &ebx, &ecx, &edx); type = eax & SGX_CPUID_EPC_MASK; if (type == SGX_CPUID_EPC_INVALID) break; if (type != SGX_CPUID_EPC_SECTION) { pr_err_once("Unknown EPC section type: %u\n", type); break; } pa = sgx_calc_section_metric(eax, ebx); size = sgx_calc_section_metric(ecx, edx); pr_info("EPC section 0x%llx-0x%llx\n", pa, pa + size - 1); if (!sgx_setup_epc_section(pa, size, i, &sgx_epc_sections[i])) { pr_err("No free memory for an EPC section\n"); break; } nid = numa_map_to_online_node(phys_to_target_node(pa)); if (nid == NUMA_NO_NODE) { /* The physical address is already printed above. */ pr_warn(FW_BUG "Unable to map EPC section to online node. Fallback to the NUMA node 0.\n"); nid = 0; } if (!node_isset(nid, sgx_numa_mask)) { spin_lock_init(&sgx_numa_nodes[nid].lock); INIT_LIST_HEAD(&sgx_numa_nodes[nid].free_page_list); INIT_LIST_HEAD(&sgx_numa_nodes[nid].sgx_poison_page_list); node_set(nid, sgx_numa_mask); sgx_numa_nodes[nid].size = 0; /* Make SGX-specific node sysfs files visible: */ arch_update_sysfs_visibility(nid); } sgx_epc_sections[i].node = &sgx_numa_nodes[nid]; sgx_numa_nodes[nid].size += size; sgx_nr_epc_sections++; } if (!sgx_nr_epc_sections) { pr_err("There are zero EPC sections.\n"); return false; } return true; } /* * Update the SGX_LEPUBKEYHASH MSRs to the values specified by caller. * Bare-metal driver requires to update them to hash of enclave's signer * before EINIT. KVM needs to update them to guest's virtual MSR values * before doing EINIT from guest. */ void sgx_update_lepubkeyhash(u64 *lepubkeyhash) { int i; WARN_ON_ONCE(preemptible()); for (i = 0; i < 4; i++) wrmsrl(MSR_IA32_SGXLEPUBKEYHASH0 + i, lepubkeyhash[i]); } const struct file_operations sgx_provision_fops = { .owner = THIS_MODULE, }; static struct miscdevice sgx_dev_provision = { .minor = MISC_DYNAMIC_MINOR, .name = "sgx_provision", .nodename = "sgx_provision", .fops = &sgx_provision_fops, }; /** * sgx_set_attribute() - Update allowed attributes given file descriptor * @allowed_attributes: Pointer to allowed enclave attributes * @attribute_fd: File descriptor for specific attribute * * Append enclave attribute indicated by file descriptor to allowed * attributes. Currently only SGX_ATTR_PROVISIONKEY indicated by * /dev/sgx_provision is supported. * * Return: * -0: SGX_ATTR_PROVISIONKEY is appended to allowed_attributes * -EINVAL: Invalid, or not supported file descriptor */ int sgx_set_attribute(unsigned long *allowed_attributes, unsigned int attribute_fd) { struct fd f = fdget(attribute_fd); if (!f.file) return -EINVAL; if (f.file->f_op != &sgx_provision_fops) { fdput(f); return -EINVAL; } *allowed_attributes |= SGX_ATTR_PROVISIONKEY; fdput(f); return 0; } EXPORT_SYMBOL_GPL(sgx_set_attribute); static int __init sgx_init(void) { int ret; int i; if (!cpu_feature_enabled(X86_FEATURE_SGX)) return -ENODEV; if (!sgx_page_cache_init()) return -ENOMEM; if (!sgx_page_reclaimer_init()) { ret = -ENOMEM; goto err_page_cache; } ret = misc_register(&sgx_dev_provision); if (ret) goto err_kthread; /* * Always try to initialize the native *and* KVM drivers. * The KVM driver is less picky than the native one and * can function if the native one is not supported on the * current system or fails to initialize. * * Error out only if both fail to initialize. */ ret = sgx_drv_init(); if (sgx_vepc_init() && ret) goto err_provision; return 0; err_provision: misc_deregister(&sgx_dev_provision); err_kthread: kthread_stop(ksgxd_tsk); err_page_cache: for (i = 0; i < sgx_nr_epc_sections; i++) { vfree(sgx_epc_sections[i].pages); memunmap(sgx_epc_sections[i].virt_addr); } return ret; } device_initcall(sgx_init);
1 7 20 2 2 31 4 21 8 20 1 26 4 20 1 1 4 1 2 1 1 4 1 4 4 4 4 4 6 1 5 6 4 19 6 13 11 2 4 1 6 11 2 9 10 4 6 9 1 2 7 28 1 10 8 10 2 7 1 1 3 2 1 7 7 7 2 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010 IBM Corporation * Copyright (C) 2010 Politecnico di Torino, Italy * TORSEC group -- https://security.polito.it * * Authors: * Mimi Zohar <zohar@us.ibm.com> * Roberto Sassu <roberto.sassu@polito.it> * * See Documentation/security/keys/trusted-encrypted.rst */ #include <linux/uaccess.h> #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/parser.h> #include <linux/string.h> #include <linux/err.h> #include <keys/user-type.h> #include <keys/trusted-type.h> #include <keys/encrypted-type.h> #include <linux/key-type.h> #include <linux/random.h> #include <linux/rcupdate.h> #include <linux/scatterlist.h> #include <linux/ctype.h> #include <crypto/aes.h> #include <crypto/hash.h> #include <crypto/sha2.h> #include <crypto/skcipher.h> #include <crypto/utils.h> #include "encrypted.h" #include "ecryptfs_format.h" static const char KEY_TRUSTED_PREFIX[] = "trusted:"; static const char KEY_USER_PREFIX[] = "user:"; static const char hash_alg[] = "sha256"; static const char hmac_alg[] = "hmac(sha256)"; static const char blkcipher_alg[] = "cbc(aes)"; static const char key_format_default[] = "default"; static const char key_format_ecryptfs[] = "ecryptfs"; static const char key_format_enc32[] = "enc32"; static unsigned int ivsize; static int blksize; #define KEY_TRUSTED_PREFIX_LEN (sizeof (KEY_TRUSTED_PREFIX) - 1) #define KEY_USER_PREFIX_LEN (sizeof (KEY_USER_PREFIX) - 1) #define KEY_ECRYPTFS_DESC_LEN 16 #define HASH_SIZE SHA256_DIGEST_SIZE #define MAX_DATA_SIZE 4096 #define MIN_DATA_SIZE 20 #define KEY_ENC32_PAYLOAD_LEN 32 static struct crypto_shash *hash_tfm; enum { Opt_new, Opt_load, Opt_update, Opt_err }; enum { Opt_default, Opt_ecryptfs, Opt_enc32, Opt_error }; static const match_table_t key_format_tokens = { {Opt_default, "default"}, {Opt_ecryptfs, "ecryptfs"}, {Opt_enc32, "enc32"}, {Opt_error, NULL} }; static const match_table_t key_tokens = { {Opt_new, "new"}, {Opt_load, "load"}, {Opt_update, "update"}, {Opt_err, NULL} }; static bool user_decrypted_data = IS_ENABLED(CONFIG_USER_DECRYPTED_DATA); module_param(user_decrypted_data, bool, 0); MODULE_PARM_DESC(user_decrypted_data, "Allow instantiation of encrypted keys using provided decrypted data"); static int aes_get_sizes(void) { struct crypto_skcipher *tfm; tfm = crypto_alloc_skcipher(blkcipher_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { pr_err("encrypted_key: failed to alloc_cipher (%ld)\n", PTR_ERR(tfm)); return PTR_ERR(tfm); } ivsize = crypto_skcipher_ivsize(tfm); blksize = crypto_skcipher_blocksize(tfm); crypto_free_skcipher(tfm); return 0; } /* * valid_ecryptfs_desc - verify the description of a new/loaded encrypted key * * The description of a encrypted key with format 'ecryptfs' must contain * exactly 16 hexadecimal characters. * */ static int valid_ecryptfs_desc(const char *ecryptfs_desc) { int i; if (strlen(ecryptfs_desc) != KEY_ECRYPTFS_DESC_LEN) { pr_err("encrypted_key: key description must be %d hexadecimal " "characters long\n", KEY_ECRYPTFS_DESC_LEN); return -EINVAL; } for (i = 0; i < KEY_ECRYPTFS_DESC_LEN; i++) { if (!isxdigit(ecryptfs_desc[i])) { pr_err("encrypted_key: key description must contain " "only hexadecimal characters\n"); return -EINVAL; } } return 0; } /* * valid_master_desc - verify the 'key-type:desc' of a new/updated master-key * * key-type:= "trusted:" | "user:" * desc:= master-key description * * Verify that 'key-type' is valid and that 'desc' exists. On key update, * only the master key description is permitted to change, not the key-type. * The key-type remains constant. * * On success returns 0, otherwise -EINVAL. */ static int valid_master_desc(const char *new_desc, const char *orig_desc) { int prefix_len; if (!strncmp(new_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) prefix_len = KEY_TRUSTED_PREFIX_LEN; else if (!strncmp(new_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) prefix_len = KEY_USER_PREFIX_LEN; else return -EINVAL; if (!new_desc[prefix_len]) return -EINVAL; if (orig_desc && strncmp(new_desc, orig_desc, prefix_len)) return -EINVAL; return 0; } /* * datablob_parse - parse the keyctl data * * datablob format: * new [<format>] <master-key name> <decrypted data length> [<decrypted data>] * load [<format>] <master-key name> <decrypted data length> * <encrypted iv + data> * update <new-master-key name> * * Tokenizes a copy of the keyctl data, returning a pointer to each token, * which is null terminated. * * On success returns 0, otherwise -EINVAL. */ static int datablob_parse(char *datablob, const char **format, char **master_desc, char **decrypted_datalen, char **hex_encoded_iv, char **decrypted_data) { substring_t args[MAX_OPT_ARGS]; int ret = -EINVAL; int key_cmd; int key_format; char *p, *keyword; keyword = strsep(&datablob, " \t"); if (!keyword) { pr_info("encrypted_key: insufficient parameters specified\n"); return ret; } key_cmd = match_token(keyword, key_tokens, args); /* Get optional format: default | ecryptfs */ p = strsep(&datablob, " \t"); if (!p) { pr_err("encrypted_key: insufficient parameters specified\n"); return ret; } key_format = match_token(p, key_format_tokens, args); switch (key_format) { case Opt_ecryptfs: case Opt_enc32: case Opt_default: *format = p; *master_desc = strsep(&datablob, " \t"); break; case Opt_error: *master_desc = p; break; } if (!*master_desc) { pr_info("encrypted_key: master key parameter is missing\n"); goto out; } if (valid_master_desc(*master_desc, NULL) < 0) { pr_info("encrypted_key: master key parameter \'%s\' " "is invalid\n", *master_desc); goto out; } if (decrypted_datalen) { *decrypted_datalen = strsep(&datablob, " \t"); if (!*decrypted_datalen) { pr_info("encrypted_key: keylen parameter is missing\n"); goto out; } } switch (key_cmd) { case Opt_new: if (!decrypted_datalen) { pr_info("encrypted_key: keyword \'%s\' not allowed " "when called from .update method\n", keyword); break; } *decrypted_data = strsep(&datablob, " \t"); ret = 0; break; case Opt_load: if (!decrypted_datalen) { pr_info("encrypted_key: keyword \'%s\' not allowed " "when called from .update method\n", keyword); break; } *hex_encoded_iv = strsep(&datablob, " \t"); if (!*hex_encoded_iv) { pr_info("encrypted_key: hex blob is missing\n"); break; } ret = 0; break; case Opt_update: if (decrypted_datalen) { pr_info("encrypted_key: keyword \'%s\' not allowed " "when called from .instantiate method\n", keyword); break; } ret = 0; break; case Opt_err: pr_info("encrypted_key: keyword \'%s\' not recognized\n", keyword); break; } out: return ret; } /* * datablob_format - format as an ascii string, before copying to userspace */ static char *datablob_format(struct encrypted_key_payload *epayload, size_t asciiblob_len) { char *ascii_buf, *bufp; u8 *iv = epayload->iv; int len; int i; ascii_buf = kmalloc(asciiblob_len + 1, GFP_KERNEL); if (!ascii_buf) goto out; ascii_buf[asciiblob_len] = '\0'; /* copy datablob master_desc and datalen strings */ len = sprintf(ascii_buf, "%s %s %s ", epayload->format, epayload->master_desc, epayload->datalen); /* convert the hex encoded iv, encrypted-data and HMAC to ascii */ bufp = &ascii_buf[len]; for (i = 0; i < (asciiblob_len - len) / 2; i++) bufp = hex_byte_pack(bufp, iv[i]); out: return ascii_buf; } /* * request_user_key - request the user key * * Use a user provided key to encrypt/decrypt an encrypted-key. */ static struct key *request_user_key(const char *master_desc, const u8 **master_key, size_t *master_keylen) { const struct user_key_payload *upayload; struct key *ukey; ukey = request_key(&key_type_user, master_desc, NULL); if (IS_ERR(ukey)) goto error; down_read(&ukey->sem); upayload = user_key_payload_locked(ukey); if (!upayload) { /* key was revoked before we acquired its semaphore */ up_read(&ukey->sem); key_put(ukey); ukey = ERR_PTR(-EKEYREVOKED); goto error; } *master_key = upayload->data; *master_keylen = upayload->datalen; error: return ukey; } static int calc_hmac(u8 *digest, const u8 *key, unsigned int keylen, const u8 *buf, unsigned int buflen) { struct crypto_shash *tfm; int err; tfm = crypto_alloc_shash(hmac_alg, 0, 0); if (IS_ERR(tfm)) { pr_err("encrypted_key: can't alloc %s transform: %ld\n", hmac_alg, PTR_ERR(tfm)); return PTR_ERR(tfm); } err = crypto_shash_setkey(tfm, key, keylen); if (!err) err = crypto_shash_tfm_digest(tfm, buf, buflen, digest); crypto_free_shash(tfm); return err; } enum derived_key_type { ENC_KEY, AUTH_KEY }; /* Derive authentication/encryption key from trusted key */ static int get_derived_key(u8 *derived_key, enum derived_key_type key_type, const u8 *master_key, size_t master_keylen) { u8 *derived_buf; unsigned int derived_buf_len; int ret; derived_buf_len = strlen("AUTH_KEY") + 1 + master_keylen; if (derived_buf_len < HASH_SIZE) derived_buf_len = HASH_SIZE; derived_buf = kzalloc(derived_buf_len, GFP_KERNEL); if (!derived_buf) return -ENOMEM; if (key_type) strcpy(derived_buf, "AUTH_KEY"); else strcpy(derived_buf, "ENC_KEY"); memcpy(derived_buf + strlen(derived_buf) + 1, master_key, master_keylen); ret = crypto_shash_tfm_digest(hash_tfm, derived_buf, derived_buf_len, derived_key); kfree_sensitive(derived_buf); return ret; } static struct skcipher_request *init_skcipher_req(const u8 *key, unsigned int key_len) { struct skcipher_request *req; struct crypto_skcipher *tfm; int ret; tfm = crypto_alloc_skcipher(blkcipher_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { pr_err("encrypted_key: failed to load %s transform (%ld)\n", blkcipher_alg, PTR_ERR(tfm)); return ERR_CAST(tfm); } ret = crypto_skcipher_setkey(tfm, key, key_len); if (ret < 0) { pr_err("encrypted_key: failed to setkey (%d)\n", ret); crypto_free_skcipher(tfm); return ERR_PTR(ret); } req = skcipher_request_alloc(tfm, GFP_KERNEL); if (!req) { pr_err("encrypted_key: failed to allocate request for %s\n", blkcipher_alg); crypto_free_skcipher(tfm); return ERR_PTR(-ENOMEM); } skcipher_request_set_callback(req, 0, NULL, NULL); return req; } static struct key *request_master_key(struct encrypted_key_payload *epayload, const u8 **master_key, size_t *master_keylen) { struct key *mkey = ERR_PTR(-EINVAL); if (!strncmp(epayload->master_desc, KEY_TRUSTED_PREFIX, KEY_TRUSTED_PREFIX_LEN)) { mkey = request_trusted_key(epayload->master_desc + KEY_TRUSTED_PREFIX_LEN, master_key, master_keylen); } else if (!strncmp(epayload->master_desc, KEY_USER_PREFIX, KEY_USER_PREFIX_LEN)) { mkey = request_user_key(epayload->master_desc + KEY_USER_PREFIX_LEN, master_key, master_keylen); } else goto out; if (IS_ERR(mkey)) { int ret = PTR_ERR(mkey); if (ret == -ENOTSUPP) pr_info("encrypted_key: key %s not supported", epayload->master_desc); else pr_info("encrypted_key: key %s not found", epayload->master_desc); goto out; } dump_master_key(*master_key, *master_keylen); out: return mkey; } /* Before returning data to userspace, encrypt decrypted data. */ static int derived_key_encrypt(struct encrypted_key_payload *epayload, const u8 *derived_key, unsigned int derived_keylen) { struct scatterlist sg_in[2]; struct scatterlist sg_out[1]; struct crypto_skcipher *tfm; struct skcipher_request *req; unsigned int encrypted_datalen; u8 iv[AES_BLOCK_SIZE]; int ret; encrypted_datalen = roundup(epayload->decrypted_datalen, blksize); req = init_skcipher_req(derived_key, derived_keylen); ret = PTR_ERR(req); if (IS_ERR(req)) goto out; dump_decrypted_data(epayload); sg_init_table(sg_in, 2); sg_set_buf(&sg_in[0], epayload->decrypted_data, epayload->decrypted_datalen); sg_set_page(&sg_in[1], ZERO_PAGE(0), AES_BLOCK_SIZE, 0); sg_init_table(sg_out, 1); sg_set_buf(sg_out, epayload->encrypted_data, encrypted_datalen); memcpy(iv, epayload->iv, sizeof(iv)); skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv); ret = crypto_skcipher_encrypt(req); tfm = crypto_skcipher_reqtfm(req); skcipher_request_free(req); crypto_free_skcipher(tfm); if (ret < 0) pr_err("encrypted_key: failed to encrypt (%d)\n", ret); else dump_encrypted_data(epayload, encrypted_datalen); out: return ret; } static int datablob_hmac_append(struct encrypted_key_payload *epayload, const u8 *master_key, size_t master_keylen) { u8 derived_key[HASH_SIZE]; u8 *digest; int ret; ret = get_derived_key(derived_key, AUTH_KEY, master_key, master_keylen); if (ret < 0) goto out; digest = epayload->format + epayload->datablob_len; ret = calc_hmac(digest, derived_key, sizeof derived_key, epayload->format, epayload->datablob_len); if (!ret) dump_hmac(NULL, digest, HASH_SIZE); out: memzero_explicit(derived_key, sizeof(derived_key)); return ret; } /* verify HMAC before decrypting encrypted key */ static int datablob_hmac_verify(struct encrypted_key_payload *epayload, const u8 *format, const u8 *master_key, size_t master_keylen) { u8 derived_key[HASH_SIZE]; u8 digest[HASH_SIZE]; int ret; char *p; unsigned short len; ret = get_derived_key(derived_key, AUTH_KEY, master_key, master_keylen); if (ret < 0) goto out; len = epayload->datablob_len; if (!format) { p = epayload->master_desc; len -= strlen(epayload->format) + 1; } else p = epayload->format; ret = calc_hmac(digest, derived_key, sizeof derived_key, p, len); if (ret < 0) goto out; ret = crypto_memneq(digest, epayload->format + epayload->datablob_len, sizeof(digest)); if (ret) { ret = -EINVAL; dump_hmac("datablob", epayload->format + epayload->datablob_len, HASH_SIZE); dump_hmac("calc", digest, HASH_SIZE); } out: memzero_explicit(derived_key, sizeof(derived_key)); return ret; } static int derived_key_decrypt(struct encrypted_key_payload *epayload, const u8 *derived_key, unsigned int derived_keylen) { struct scatterlist sg_in[1]; struct scatterlist sg_out[2]; struct crypto_skcipher *tfm; struct skcipher_request *req; unsigned int encrypted_datalen; u8 iv[AES_BLOCK_SIZE]; u8 *pad; int ret; /* Throwaway buffer to hold the unused zero padding at the end */ pad = kmalloc(AES_BLOCK_SIZE, GFP_KERNEL); if (!pad) return -ENOMEM; encrypted_datalen = roundup(epayload->decrypted_datalen, blksize); req = init_skcipher_req(derived_key, derived_keylen); ret = PTR_ERR(req); if (IS_ERR(req)) goto out; dump_encrypted_data(epayload, encrypted_datalen); sg_init_table(sg_in, 1); sg_init_table(sg_out, 2); sg_set_buf(sg_in, epayload->encrypted_data, encrypted_datalen); sg_set_buf(&sg_out[0], epayload->decrypted_data, epayload->decrypted_datalen); sg_set_buf(&sg_out[1], pad, AES_BLOCK_SIZE); memcpy(iv, epayload->iv, sizeof(iv)); skcipher_request_set_crypt(req, sg_in, sg_out, encrypted_datalen, iv); ret = crypto_skcipher_decrypt(req); tfm = crypto_skcipher_reqtfm(req); skcipher_request_free(req); crypto_free_skcipher(tfm); if (ret < 0) goto out; dump_decrypted_data(epayload); out: kfree(pad); return ret; } /* Allocate memory for decrypted key and datablob. */ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key, const char *format, const char *master_desc, const char *datalen, const char *decrypted_data) { struct encrypted_key_payload *epayload = NULL; unsigned short datablob_len; unsigned short decrypted_datalen; unsigned short payload_datalen; unsigned int encrypted_datalen; unsigned int format_len; long dlen; int i; int ret; ret = kstrtol(datalen, 10, &dlen); if (ret < 0 || dlen < MIN_DATA_SIZE || dlen > MAX_DATA_SIZE) return ERR_PTR(-EINVAL); format_len = (!format) ? strlen(key_format_default) : strlen(format); decrypted_datalen = dlen; payload_datalen = decrypted_datalen; if (decrypted_data) { if (!user_decrypted_data) { pr_err("encrypted key: instantiation of keys using provided decrypted data is disabled since CONFIG_USER_DECRYPTED_DATA is set to false\n"); return ERR_PTR(-EINVAL); } if (strlen(decrypted_data) != decrypted_datalen * 2) { pr_err("encrypted key: decrypted data provided does not match decrypted data length provided\n"); return ERR_PTR(-EINVAL); } for (i = 0; i < strlen(decrypted_data); i++) { if (!isxdigit(decrypted_data[i])) { pr_err("encrypted key: decrypted data provided must contain only hexadecimal characters\n"); return ERR_PTR(-EINVAL); } } } if (format) { if (!strcmp(format, key_format_ecryptfs)) { if (dlen != ECRYPTFS_MAX_KEY_BYTES) { pr_err("encrypted_key: keylen for the ecryptfs format must be equal to %d bytes\n", ECRYPTFS_MAX_KEY_BYTES); return ERR_PTR(-EINVAL); } decrypted_datalen = ECRYPTFS_MAX_KEY_BYTES; payload_datalen = sizeof(struct ecryptfs_auth_tok); } else if (!strcmp(format, key_format_enc32)) { if (decrypted_datalen != KEY_ENC32_PAYLOAD_LEN) { pr_err("encrypted_key: enc32 key payload incorrect length: %d\n", decrypted_datalen); return ERR_PTR(-EINVAL); } } } encrypted_datalen = roundup(decrypted_datalen, blksize); datablob_len = format_len + 1 + strlen(master_desc) + 1 + strlen(datalen) + 1 + ivsize + 1 + encrypted_datalen; ret = key_payload_reserve(key, payload_datalen + datablob_len + HASH_SIZE + 1); if (ret < 0) return ERR_PTR(ret); epayload = kzalloc(sizeof(*epayload) + payload_datalen + datablob_len + HASH_SIZE + 1, GFP_KERNEL); if (!epayload) return ERR_PTR(-ENOMEM); epayload->payload_datalen = payload_datalen; epayload->decrypted_datalen = decrypted_datalen; epayload->datablob_len = datablob_len; return epayload; } static int encrypted_key_decrypt(struct encrypted_key_payload *epayload, const char *format, const char *hex_encoded_iv) { struct key *mkey; u8 derived_key[HASH_SIZE]; const u8 *master_key; u8 *hmac; const char *hex_encoded_data; unsigned int encrypted_datalen; size_t master_keylen; size_t asciilen; int ret; encrypted_datalen = roundup(epayload->decrypted_datalen, blksize); asciilen = (ivsize + 1 + encrypted_datalen + HASH_SIZE) * 2; if (strlen(hex_encoded_iv) != asciilen) return -EINVAL; hex_encoded_data = hex_encoded_iv + (2 * ivsize) + 2; ret = hex2bin(epayload->iv, hex_encoded_iv, ivsize); if (ret < 0) return -EINVAL; ret = hex2bin(epayload->encrypted_data, hex_encoded_data, encrypted_datalen); if (ret < 0) return -EINVAL; hmac = epayload->format + epayload->datablob_len; ret = hex2bin(hmac, hex_encoded_data + (encrypted_datalen * 2), HASH_SIZE); if (ret < 0) return -EINVAL; mkey = request_master_key(epayload, &master_key, &master_keylen); if (IS_ERR(mkey)) return PTR_ERR(mkey); ret = datablob_hmac_verify(epayload, format, master_key, master_keylen); if (ret < 0) { pr_err("encrypted_key: bad hmac (%d)\n", ret); goto out; } ret = get_derived_key(derived_key, ENC_KEY, master_key, master_keylen); if (ret < 0) goto out; ret = derived_key_decrypt(epayload, derived_key, sizeof derived_key); if (ret < 0) pr_err("encrypted_key: failed to decrypt key (%d)\n", ret); out: up_read(&mkey->sem); key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); return ret; } static void __ekey_init(struct encrypted_key_payload *epayload, const char *format, const char *master_desc, const char *datalen) { unsigned int format_len; format_len = (!format) ? strlen(key_format_default) : strlen(format); epayload->format = epayload->payload_data + epayload->payload_datalen; epayload->master_desc = epayload->format + format_len + 1; epayload->datalen = epayload->master_desc + strlen(master_desc) + 1; epayload->iv = epayload->datalen + strlen(datalen) + 1; epayload->encrypted_data = epayload->iv + ivsize + 1; epayload->decrypted_data = epayload->payload_data; if (!format) memcpy(epayload->format, key_format_default, format_len); else { if (!strcmp(format, key_format_ecryptfs)) epayload->decrypted_data = ecryptfs_get_auth_tok_key((struct ecryptfs_auth_tok *)epayload->payload_data); memcpy(epayload->format, format, format_len); } memcpy(epayload->master_desc, master_desc, strlen(master_desc)); memcpy(epayload->datalen, datalen, strlen(datalen)); } /* * encrypted_init - initialize an encrypted key * * For a new key, use either a random number or user-provided decrypted data in * case it is provided. A random number is used for the iv in both cases. For * an old key, decrypt the hex encoded data. */ static int encrypted_init(struct encrypted_key_payload *epayload, const char *key_desc, const char *format, const char *master_desc, const char *datalen, const char *hex_encoded_iv, const char *decrypted_data) { int ret = 0; if (format && !strcmp(format, key_format_ecryptfs)) { ret = valid_ecryptfs_desc(key_desc); if (ret < 0) return ret; ecryptfs_fill_auth_tok((struct ecryptfs_auth_tok *)epayload->payload_data, key_desc); } __ekey_init(epayload, format, master_desc, datalen); if (hex_encoded_iv) { ret = encrypted_key_decrypt(epayload, format, hex_encoded_iv); } else if (decrypted_data) { get_random_bytes(epayload->iv, ivsize); ret = hex2bin(epayload->decrypted_data, decrypted_data, epayload->decrypted_datalen); } else { get_random_bytes(epayload->iv, ivsize); get_random_bytes(epayload->decrypted_data, epayload->decrypted_datalen); } return ret; } /* * encrypted_instantiate - instantiate an encrypted key * * Instantiates the key: * - by decrypting an existing encrypted datablob, or * - by creating a new encrypted key based on a kernel random number, or * - using provided decrypted data. * * On success, return 0. Otherwise return errno. */ static int encrypted_instantiate(struct key *key, struct key_preparsed_payload *prep) { struct encrypted_key_payload *epayload = NULL; char *datablob = NULL; const char *format = NULL; char *master_desc = NULL; char *decrypted_datalen = NULL; char *hex_encoded_iv = NULL; char *decrypted_data = NULL; size_t datalen = prep->datalen; int ret; if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; datablob = kmalloc(datalen + 1, GFP_KERNEL); if (!datablob) return -ENOMEM; datablob[datalen] = 0; memcpy(datablob, prep->data, datalen); ret = datablob_parse(datablob, &format, &master_desc, &decrypted_datalen, &hex_encoded_iv, &decrypted_data); if (ret < 0) goto out; epayload = encrypted_key_alloc(key, format, master_desc, decrypted_datalen, decrypted_data); if (IS_ERR(epayload)) { ret = PTR_ERR(epayload); goto out; } ret = encrypted_init(epayload, key->description, format, master_desc, decrypted_datalen, hex_encoded_iv, decrypted_data); if (ret < 0) { kfree_sensitive(epayload); goto out; } rcu_assign_keypointer(key, epayload); out: kfree_sensitive(datablob); return ret; } static void encrypted_rcu_free(struct rcu_head *rcu) { struct encrypted_key_payload *epayload; epayload = container_of(rcu, struct encrypted_key_payload, rcu); kfree_sensitive(epayload); } /* * encrypted_update - update the master key description * * Change the master key description for an existing encrypted key. * The next read will return an encrypted datablob using the new * master key description. * * On success, return 0. Otherwise return errno. */ static int encrypted_update(struct key *key, struct key_preparsed_payload *prep) { struct encrypted_key_payload *epayload = key->payload.data[0]; struct encrypted_key_payload *new_epayload; char *buf; char *new_master_desc = NULL; const char *format = NULL; size_t datalen = prep->datalen; int ret = 0; if (key_is_negative(key)) return -ENOKEY; if (datalen <= 0 || datalen > 32767 || !prep->data) return -EINVAL; buf = kmalloc(datalen + 1, GFP_KERNEL); if (!buf) return -ENOMEM; buf[datalen] = 0; memcpy(buf, prep->data, datalen); ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL, NULL); if (ret < 0) goto out; ret = valid_master_desc(new_master_desc, epayload->master_desc); if (ret < 0) goto out; new_epayload = encrypted_key_alloc(key, epayload->format, new_master_desc, epayload->datalen, NULL); if (IS_ERR(new_epayload)) { ret = PTR_ERR(new_epayload); goto out; } __ekey_init(new_epayload, epayload->format, new_master_desc, epayload->datalen); memcpy(new_epayload->iv, epayload->iv, ivsize); memcpy(new_epayload->payload_data, epayload->payload_data, epayload->payload_datalen); rcu_assign_keypointer(key, new_epayload); call_rcu(&epayload->rcu, encrypted_rcu_free); out: kfree_sensitive(buf); return ret; } /* * encrypted_read - format and copy out the encrypted data * * The resulting datablob format is: * <master-key name> <decrypted data length> <encrypted iv> <encrypted data> * * On success, return to userspace the encrypted key datablob size. */ static long encrypted_read(const struct key *key, char *buffer, size_t buflen) { struct encrypted_key_payload *epayload; struct key *mkey; const u8 *master_key; size_t master_keylen; char derived_key[HASH_SIZE]; char *ascii_buf; size_t asciiblob_len; int ret; epayload = dereference_key_locked(key); /* returns the hex encoded iv, encrypted-data, and hmac as ascii */ asciiblob_len = epayload->datablob_len + ivsize + 1 + roundup(epayload->decrypted_datalen, blksize) + (HASH_SIZE * 2); if (!buffer || buflen < asciiblob_len) return asciiblob_len; mkey = request_master_key(epayload, &master_key, &master_keylen); if (IS_ERR(mkey)) return PTR_ERR(mkey); ret = get_derived_key(derived_key, ENC_KEY, master_key, master_keylen); if (ret < 0) goto out; ret = derived_key_encrypt(epayload, derived_key, sizeof derived_key); if (ret < 0) goto out; ret = datablob_hmac_append(epayload, master_key, master_keylen); if (ret < 0) goto out; ascii_buf = datablob_format(epayload, asciiblob_len); if (!ascii_buf) { ret = -ENOMEM; goto out; } up_read(&mkey->sem); key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); memcpy(buffer, ascii_buf, asciiblob_len); kfree_sensitive(ascii_buf); return asciiblob_len; out: up_read(&mkey->sem); key_put(mkey); memzero_explicit(derived_key, sizeof(derived_key)); return ret; } /* * encrypted_destroy - clear and free the key's payload */ static void encrypted_destroy(struct key *key) { kfree_sensitive(key->payload.data[0]); } struct key_type key_type_encrypted = { .name = "encrypted", .instantiate = encrypted_instantiate, .update = encrypted_update, .destroy = encrypted_destroy, .describe = user_describe, .read = encrypted_read, }; EXPORT_SYMBOL_GPL(key_type_encrypted); static int __init init_encrypted(void) { int ret; hash_tfm = crypto_alloc_shash(hash_alg, 0, 0); if (IS_ERR(hash_tfm)) { pr_err("encrypted_key: can't allocate %s transform: %ld\n", hash_alg, PTR_ERR(hash_tfm)); return PTR_ERR(hash_tfm); } ret = aes_get_sizes(); if (ret < 0) goto out; ret = register_key_type(&key_type_encrypted); if (ret < 0) goto out; return 0; out: crypto_free_shash(hash_tfm); return ret; } static void __exit cleanup_encrypted(void) { crypto_free_shash(hash_tfm); unregister_key_type(&key_type_encrypted); } late_initcall(init_encrypted); module_exit(cleanup_encrypted); MODULE_DESCRIPTION("Encrypted key type"); MODULE_LICENSE("GPL");
18 13 38 38 7 4 3 1 1 2 7 1 1 1 4 42 42 13 4 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtual NCI device simulation driver * * Copyright (C) 2020 Samsung Electrnoics * Bongsu Jeon <bongsu.jeon@samsung.com> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/wait.h> #include <net/nfc/nci_core.h> #define IOCTL_GET_NCIDEV_IDX 0 #define VIRTUAL_NFC_PROTOCOLS (NFC_PROTO_JEWEL_MASK | \ NFC_PROTO_MIFARE_MASK | \ NFC_PROTO_FELICA_MASK | \ NFC_PROTO_ISO14443_MASK | \ NFC_PROTO_ISO14443_B_MASK | \ NFC_PROTO_ISO15693_MASK) struct virtual_nci_dev { struct nci_dev *ndev; struct mutex mtx; struct sk_buff *send_buff; struct wait_queue_head wq; bool running; }; static int virtual_nci_open(struct nci_dev *ndev) { struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); vdev->running = true; return 0; } static int virtual_nci_close(struct nci_dev *ndev) { struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); mutex_lock(&vdev->mtx); kfree_skb(vdev->send_buff); vdev->send_buff = NULL; vdev->running = false; mutex_unlock(&vdev->mtx); return 0; } static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) { struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); mutex_lock(&vdev->mtx); if (vdev->send_buff || !vdev->running) { mutex_unlock(&vdev->mtx); kfree_skb(skb); return -1; } vdev->send_buff = skb_copy(skb, GFP_KERNEL); if (!vdev->send_buff) { mutex_unlock(&vdev->mtx); kfree_skb(skb); return -1; } mutex_unlock(&vdev->mtx); wake_up_interruptible(&vdev->wq); consume_skb(skb); return 0; } static const struct nci_ops virtual_nci_ops = { .open = virtual_nci_open, .close = virtual_nci_close, .send = virtual_nci_send }; static ssize_t virtual_ncidev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct virtual_nci_dev *vdev = file->private_data; size_t actual_len; mutex_lock(&vdev->mtx); while (!vdev->send_buff) { mutex_unlock(&vdev->mtx); if (wait_event_interruptible(vdev->wq, vdev->send_buff)) return -EFAULT; mutex_lock(&vdev->mtx); } actual_len = min_t(size_t, count, vdev->send_buff->len); if (copy_to_user(buf, vdev->send_buff->data, actual_len)) { mutex_unlock(&vdev->mtx); return -EFAULT; } skb_pull(vdev->send_buff, actual_len); if (vdev->send_buff->len == 0) { consume_skb(vdev->send_buff); vdev->send_buff = NULL; } mutex_unlock(&vdev->mtx); return actual_len; } static ssize_t virtual_ncidev_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct virtual_nci_dev *vdev = file->private_data; struct sk_buff *skb; skb = alloc_skb(count, GFP_KERNEL); if (!skb) return -ENOMEM; if (copy_from_user(skb_put(skb, count), buf, count)) { kfree_skb(skb); return -EFAULT; } if (strnlen(skb->data, count) != count) { kfree_skb(skb); return -EINVAL; } nci_recv_frame(vdev->ndev, skb); return count; } static int virtual_ncidev_open(struct inode *inode, struct file *file) { int ret = 0; struct virtual_nci_dev *vdev; vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) return -ENOMEM; vdev->ndev = nci_allocate_device(&virtual_nci_ops, VIRTUAL_NFC_PROTOCOLS, 0, 0); if (!vdev->ndev) { kfree(vdev); return -ENOMEM; } mutex_init(&vdev->mtx); init_waitqueue_head(&vdev->wq); file->private_data = vdev; nci_set_drvdata(vdev->ndev, vdev); ret = nci_register_device(vdev->ndev); if (ret < 0) { nci_free_device(vdev->ndev); mutex_destroy(&vdev->mtx); kfree(vdev); return ret; } return 0; } static int virtual_ncidev_close(struct inode *inode, struct file *file) { struct virtual_nci_dev *vdev = file->private_data; nci_unregister_device(vdev->ndev); nci_free_device(vdev->ndev); mutex_destroy(&vdev->mtx); kfree(vdev); return 0; } static long virtual_ncidev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct virtual_nci_dev *vdev = file->private_data; const struct nfc_dev *nfc_dev = vdev->ndev->nfc_dev; void __user *p = (void __user *)arg; if (cmd != IOCTL_GET_NCIDEV_IDX) return -ENOTTY; if (copy_to_user(p, &nfc_dev->idx, sizeof(nfc_dev->idx))) return -EFAULT; return 0; } static const struct file_operations virtual_ncidev_fops = { .owner = THIS_MODULE, .read = virtual_ncidev_read, .write = virtual_ncidev_write, .open = virtual_ncidev_open, .release = virtual_ncidev_close, .unlocked_ioctl = virtual_ncidev_ioctl }; static struct miscdevice miscdev = { .minor = MISC_DYNAMIC_MINOR, .name = "virtual_nci", .fops = &virtual_ncidev_fops, .mode = 0600, }; module_misc_device(miscdev); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Virtual NCI device simulation driver"); MODULE_AUTHOR("Bongsu Jeon <bongsu.jeon@samsung.com>");
130 130 129 1 130 1 2 127 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2016, Intel Corporation * Authors: Salvatore Benedetto <salvatore.benedetto@intel.com> */ #include <linux/kernel.h> #include <linux/export.h> #include <linux/err.h> #include <linux/string.h> #include <crypto/dh.h> #include <crypto/kpp.h> #define DH_KPP_SECRET_MIN_SIZE (sizeof(struct kpp_secret) + 3 * sizeof(int)) static inline u8 *dh_pack_data(u8 *dst, u8 *end, const void *src, size_t size) { if (!dst || size > end - dst) return NULL; memcpy(dst, src, size); return dst + size; } static inline const u8 *dh_unpack_data(void *dst, const void *src, size_t size) { memcpy(dst, src, size); return src + size; } static inline unsigned int dh_data_size(const struct dh *p) { return p->key_size + p->p_size + p->g_size; } unsigned int crypto_dh_key_len(const struct dh *p) { return DH_KPP_SECRET_MIN_SIZE + dh_data_size(p); } EXPORT_SYMBOL_GPL(crypto_dh_key_len); int crypto_dh_encode_key(char *buf, unsigned int len, const struct dh *params) { u8 *ptr = buf; u8 * const end = ptr + len; struct kpp_secret secret = { .type = CRYPTO_KPP_SECRET_TYPE_DH, .len = len }; if (unlikely(!len)) return -EINVAL; ptr = dh_pack_data(ptr, end, &secret, sizeof(secret)); ptr = dh_pack_data(ptr, end, &params->key_size, sizeof(params->key_size)); ptr = dh_pack_data(ptr, end, &params->p_size, sizeof(params->p_size)); ptr = dh_pack_data(ptr, end, &params->g_size, sizeof(params->g_size)); ptr = dh_pack_data(ptr, end, params->key, params->key_size); ptr = dh_pack_data(ptr, end, params->p, params->p_size); ptr = dh_pack_data(ptr, end, params->g, params->g_size); if (ptr != end) return -EINVAL; return 0; } EXPORT_SYMBOL_GPL(crypto_dh_encode_key); int __crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params) { const u8 *ptr = buf; struct kpp_secret secret; if (unlikely(!buf || len < DH_KPP_SECRET_MIN_SIZE)) return -EINVAL; ptr = dh_unpack_data(&secret, ptr, sizeof(secret)); if (secret.type != CRYPTO_KPP_SECRET_TYPE_DH) return -EINVAL; ptr = dh_unpack_data(&params->key_size, ptr, sizeof(params->key_size)); ptr = dh_unpack_data(&params->p_size, ptr, sizeof(params->p_size)); ptr = dh_unpack_data(&params->g_size, ptr, sizeof(params->g_size)); if (secret.len != crypto_dh_key_len(params)) return -EINVAL; /* Don't allocate memory. Set pointers to data within * the given buffer */ params->key = (void *)ptr; params->p = (void *)(ptr + params->key_size); params->g = (void *)(ptr + params->key_size + params->p_size); return 0; } int crypto_dh_decode_key(const char *buf, unsigned int len, struct dh *params) { int err; err = __crypto_dh_decode_key(buf, len, params); if (err) return err; /* * Don't permit the buffer for 'key' or 'g' to be larger than 'p', since * some drivers assume otherwise. */ if (params->key_size > params->p_size || params->g_size > params->p_size) return -EINVAL; /* * Don't permit 'p' to be 0. It's not a prime number, and it's subject * to corner cases such as 'mod 0' being undefined or * crypto_kpp_maxsize() returning 0. */ if (memchr_inv(params->p, 0, params->p_size) == NULL) return -EINVAL; return 0; } EXPORT_SYMBOL_GPL(crypto_dh_decode_key);
2 249 251 251 40 22 22 9 54 13 9 9 112 1 7 13 85 49 7 37 23 23 31 54 20 91 1757 1289 1 28 475 22 674 21 666 665 664 1 356 139 183 295 666 5 5 3 1 1 1 4 4 4 1 252 252 251 251 251 251 252 252 252 1 251 251 251 251 251 4 246 251 1 1 1 1 243 243 243 243 220 25 243 9 1 8 8 1 8 244 25 220 244 6 63 10 10 1 1 8 2 2 3 3 2 2 1 1 1 1 1 1 2 2 1 11 11 11 1 10 9 3 2 4 1 1 4 3 2 1 1 1 1 1 13 12 13 77 78 6 72 12 60 2 59 60 57 5 56 14 9 50 6 4 56 57 3 60 45 56 4 58 58 27 27 27 27 27 10 25 39 19 49 2 9 4 2 4 44 9 13 13 48 14 60 60 30 30 5 4 16 5 5 6 134 134 1 133 135 134 134 134 4 1 10 10 1 78 30 2000 1818 112 18 446 446 33 473 473 365 138 461 41 425 77 77 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 // SPDX-License-Identifier: GPL-2.0-or-later /* * Neighbour Discovery for IPv6 * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * Mike Shaver <shaver@ingenia.com> */ /* * Changes: * * Alexey I. Froloff : RFC6106 (DNSSL) support * Pierre Ynard : export userland ND options * through netlink (RDNSS support) * Lars Fenneberg : fixed MTU setting on receipt * of an RA. * Janos Farkas : kmalloc failure checks * Alexey Kuznetsov : state machine reworked * and moved to net/core. * Pekka Savola : RFC2461 validation * YOSHIFUJI Hideaki @USAGI : Verify ND options properly */ #define pr_fmt(fmt) "ICMPv6: " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> #include <linux/sched.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/route.h> #include <linux/init.h> #include <linux/rcupdate.h> #include <linux/slab.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <linux/if_addr.h> #include <linux/if_ether.h> #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/jhash.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> #include <net/protocol.h> #include <net/ndisc.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/icmp.h> #include <net/netlink.h> #include <linux/rtnetlink.h> #include <net/flow.h> #include <net/ip6_checksum.h> #include <net/inet_common.h> #include <linux/proc_fs.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey); static bool ndisc_allow_add(const struct net_device *dev, struct netlink_ext_ack *extack); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); static int pndisc_constructor(struct pneigh_entry *n); static void pndisc_destructor(struct pneigh_entry *n); static void pndisc_redo(struct sk_buff *skb); static int ndisc_is_multicast(const void *pkey); static const struct neigh_ops ndisc_generic_ops = { .family = AF_INET6, .solicit = ndisc_solicit, .error_report = ndisc_error_report, .output = neigh_resolve_output, .connected_output = neigh_connected_output, }; static const struct neigh_ops ndisc_hh_ops = { .family = AF_INET6, .solicit = ndisc_solicit, .error_report = ndisc_error_report, .output = neigh_resolve_output, .connected_output = neigh_resolve_output, }; static const struct neigh_ops ndisc_direct_ops = { .family = AF_INET6, .output = neigh_direct_output, .connected_output = neigh_direct_output, }; struct neigh_table nd_tbl = { .family = AF_INET6, .key_len = sizeof(struct in6_addr), .protocol = cpu_to_be16(ETH_P_IPV6), .hash = ndisc_hash, .key_eq = ndisc_key_eq, .constructor = ndisc_constructor, .pconstructor = pndisc_constructor, .pdestructor = pndisc_destructor, .proxy_redo = pndisc_redo, .is_multicast = ndisc_is_multicast, .allow_add = ndisc_allow_add, .id = "ndisc_cache", .parms = { .tbl = &nd_tbl, .reachable_time = ND_REACHABLE_TIME, .data = { [NEIGH_VAR_MCAST_PROBES] = 3, [NEIGH_VAR_UCAST_PROBES] = 3, [NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER, [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME, [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, [NEIGH_VAR_INTERVAL_PROBE_TIME_MS] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, [NEIGH_VAR_PROXY_QLEN] = 64, [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, }, }, .gc_interval = 30 * HZ, .gc_thresh1 = 128, .gc_thresh2 = 512, .gc_thresh3 = 1024, }; EXPORT_SYMBOL_GPL(nd_tbl); void __ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, int data_len, int pad) { int space = __ndisc_opt_addr_space(data_len, pad); u8 *opt = skb_put(skb, space); opt[0] = type; opt[1] = space>>3; memset(opt + 2, 0, pad); opt += pad; space -= pad; memcpy(opt+2, data, data_len); data_len += 2; opt += data_len; space -= data_len; if (space > 0) memset(opt, 0, space); } EXPORT_SYMBOL_GPL(__ndisc_fill_addr_option); static inline void ndisc_fill_addr_option(struct sk_buff *skb, int type, const void *data, u8 icmp6_type) { __ndisc_fill_addr_option(skb, type, data, skb->dev->addr_len, ndisc_addr_option_pad(skb->dev->type)); ndisc_ops_fill_addr_option(skb->dev, skb, icmp6_type); } static inline void ndisc_fill_redirect_addr_option(struct sk_buff *skb, void *ha, const u8 *ops_data) { ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, ha, NDISC_REDIRECT); ndisc_ops_fill_redirect_addr_option(skb->dev, skb, ops_data); } static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, struct nd_opt_hdr *end) { int type; if (!cur || !end || cur >= end) return NULL; type = cur->nd_opt_type; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); } while (cur < end && cur->nd_opt_type != type); return cur <= end && cur->nd_opt_type == type ? cur : NULL; } static inline int ndisc_is_useropt(const struct net_device *dev, struct nd_opt_hdr *opt) { return opt->nd_opt_type == ND_OPT_PREFIX_INFO || opt->nd_opt_type == ND_OPT_RDNSS || opt->nd_opt_type == ND_OPT_DNSSL || opt->nd_opt_type == ND_OPT_CAPTIVE_PORTAL || opt->nd_opt_type == ND_OPT_PREF64 || ndisc_ops_is_useropt(dev, opt->nd_opt_type); } static struct nd_opt_hdr *ndisc_next_useropt(const struct net_device *dev, struct nd_opt_hdr *cur, struct nd_opt_hdr *end) { if (!cur || !end || cur >= end) return NULL; do { cur = ((void *)cur) + (cur->nd_opt_len << 3); } while (cur < end && !ndisc_is_useropt(dev, cur)); return cur <= end && ndisc_is_useropt(dev, cur) ? cur : NULL; } struct ndisc_options *ndisc_parse_options(const struct net_device *dev, u8 *opt, int opt_len, struct ndisc_options *ndopts) { struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt; if (!nd_opt || opt_len < 0 || !ndopts) return NULL; memset(ndopts, 0, sizeof(*ndopts)); while (opt_len) { int l; if (opt_len < sizeof(struct nd_opt_hdr)) return NULL; l = nd_opt->nd_opt_len << 3; if (opt_len < l || l == 0) return NULL; if (ndisc_ops_parse_options(dev, nd_opt, ndopts)) goto next_opt; switch (nd_opt->nd_opt_type) { case ND_OPT_SOURCE_LL_ADDR: case ND_OPT_TARGET_LL_ADDR: case ND_OPT_MTU: case ND_OPT_NONCE: case ND_OPT_REDIRECT_HDR: if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { ND_PRINTK(2, warn, "%s: duplicated ND6 option found: type=%d\n", __func__, nd_opt->nd_opt_type); } else { ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; } break; case ND_OPT_PREFIX_INFO: ndopts->nd_opts_pi_end = nd_opt; if (!ndopts->nd_opt_array[nd_opt->nd_opt_type]) ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt; break; #ifdef CONFIG_IPV6_ROUTE_INFO case ND_OPT_ROUTE_INFO: ndopts->nd_opts_ri_end = nd_opt; if (!ndopts->nd_opts_ri) ndopts->nd_opts_ri = nd_opt; break; #endif default: if (ndisc_is_useropt(dev, nd_opt)) { ndopts->nd_useropts_end = nd_opt; if (!ndopts->nd_useropts) ndopts->nd_useropts = nd_opt; } else { /* * Unknown options must be silently ignored, * to accommodate future extension to the * protocol. */ ND_PRINTK(2, notice, "%s: ignored unsupported option; type=%d, len=%d\n", __func__, nd_opt->nd_opt_type, nd_opt->nd_opt_len); } } next_opt: opt_len -= l; nd_opt = ((void *)nd_opt) + l; } return ndopts; } int ndisc_mc_map(const struct in6_addr *addr, char *buf, struct net_device *dev, int dir) { switch (dev->type) { case ARPHRD_ETHER: case ARPHRD_IEEE802: /* Not sure. Check it later. --ANK */ case ARPHRD_FDDI: ipv6_eth_mc_map(addr, buf); return 0; case ARPHRD_ARCNET: ipv6_arcnet_mc_map(addr, buf); return 0; case ARPHRD_INFINIBAND: ipv6_ib_mc_map(addr, dev->broadcast, buf); return 0; case ARPHRD_IPGRE: return ipv6_ipgre_mc_map(addr, dev->broadcast, buf); default: if (dir) { memcpy(buf, dev->broadcast, dev->addr_len); return 0; } } return -EINVAL; } EXPORT_SYMBOL(ndisc_mc_map); static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd) { return ndisc_hashfn(pkey, dev, hash_rnd); } static bool ndisc_key_eq(const struct neighbour *n, const void *pkey) { return neigh_key_eq128(n, pkey); } static int ndisc_constructor(struct neighbour *neigh) { struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key; struct net_device *dev = neigh->dev; struct inet6_dev *in6_dev; struct neigh_parms *parms; bool is_multicast = ipv6_addr_is_multicast(addr); in6_dev = in6_dev_get(dev); if (!in6_dev) { return -EINVAL; } parms = in6_dev->nd_parms; __neigh_parms_put(neigh->parms); neigh->parms = neigh_parms_clone(parms); neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; if (!dev->header_ops) { neigh->nud_state = NUD_NOARP; neigh->ops = &ndisc_direct_ops; neigh->output = neigh_direct_output; } else { if (is_multicast) { neigh->nud_state = NUD_NOARP; ndisc_mc_map(addr, neigh->ha, dev, 1); } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) { neigh->nud_state = NUD_NOARP; memcpy(neigh->ha, dev->dev_addr, dev->addr_len); if (dev->flags&IFF_LOOPBACK) neigh->type = RTN_LOCAL; } else if (dev->flags&IFF_POINTOPOINT) { neigh->nud_state = NUD_NOARP; memcpy(neigh->ha, dev->broadcast, dev->addr_len); } if (dev->header_ops->cache) neigh->ops = &ndisc_hh_ops; else neigh->ops = &ndisc_generic_ops; if (neigh->nud_state&NUD_VALID) neigh->output = neigh->ops->connected_output; else neigh->output = neigh->ops->output; } in6_dev_put(in6_dev); return 0; } static int pndisc_constructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; struct in6_addr maddr; struct net_device *dev = n->dev; if (!dev || !__in6_dev_get(dev)) return -EINVAL; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_inc(dev, &maddr); return 0; } static void pndisc_destructor(struct pneigh_entry *n) { struct in6_addr *addr = (struct in6_addr *)&n->key; struct in6_addr maddr; struct net_device *dev = n->dev; if (!dev || !__in6_dev_get(dev)) return; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_dec(dev, &maddr); } /* called with rtnl held */ static bool ndisc_allow_add(const struct net_device *dev, struct netlink_ext_ack *extack) { struct inet6_dev *idev = __in6_dev_get(dev); if (!idev || idev->cnf.disable_ipv6) { NL_SET_ERR_MSG(extack, "IPv6 is disabled on this device"); return false; } return true; } static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, int len) { int hlen = LL_RESERVED_SPACE(dev); int tlen = dev->needed_tailroom; struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); if (!skb) { ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", __func__); return NULL; } skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); skb_reset_transport_header(skb); /* Manually assign socket ownership as we avoid calling * sock_alloc_send_pskb() to bypass wmem buffer limits */ skb_set_owner_w(skb, sk); return skb; } static void ip6_nd_hdr(struct sk_buff *skb, const struct in6_addr *saddr, const struct in6_addr *daddr, int hop_limit, int len) { struct ipv6hdr *hdr; struct inet6_dev *idev; unsigned tclass; rcu_read_lock(); idev = __in6_dev_get(skb->dev); tclass = idev ? READ_ONCE(idev->cnf.ndisc_tclass) : 0; rcu_read_unlock(); skb_push(skb, sizeof(*hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); ip6_flow_hdr(hdr, tclass, 0); hdr->payload_len = htons(len); hdr->nexthdr = IPPROTO_ICMPV6; hdr->hop_limit = hop_limit; hdr->saddr = *saddr; hdr->daddr = *daddr; } void ndisc_send_skb(struct sk_buff *skb, const struct in6_addr *daddr, const struct in6_addr *saddr) { struct dst_entry *dst = skb_dst(skb); struct net *net = dev_net(skb->dev); struct sock *sk = net->ipv6.ndisc_sk; struct inet6_dev *idev; int err; struct icmp6hdr *icmp6h = icmp6_hdr(skb); u8 type; type = icmp6h->icmp6_type; if (!dst) { struct flowi6 fl6; int oif = skb->dev->ifindex; icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); return; } skb_dst_set(skb, dst); } icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, csum_partial(icmp6h, skb->len, 0)); ip6_nd_hdr(skb, saddr, daddr, READ_ONCE(inet6_sk(sk)->hop_limit), skb->len); rcu_read_lock(); idev = __in6_dev_get(dst->dev); IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, dst->dev, dst_output); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } rcu_read_unlock(); } EXPORT_SYMBOL(ndisc_send_skb); void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, const struct in6_addr *solicited_addr, bool router, bool solicited, bool override, bool inc_opt) { struct sk_buff *skb; struct in6_addr tmpaddr; struct inet6_ifaddr *ifp; const struct in6_addr *src_addr; struct nd_msg *msg; int optlen = 0; /* for anycast or proxy, solicited_addr != src_addr */ ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1); if (ifp) { src_addr = solicited_addr; if (ifp->flags & IFA_F_OPTIMISTIC) override = false; inc_opt |= READ_ONCE(ifp->idev->cnf.force_tllao); in6_ifa_put(ifp); } else { if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr, inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs, &tmpaddr)) return; src_addr = &tmpaddr; } if (!dev->addr_len) inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev, NDISC_NEIGHBOUR_ADVERTISEMENT); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) return; msg = skb_put(skb, sizeof(*msg)); *msg = (struct nd_msg) { .icmph = { .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT, .icmp6_router = router, .icmp6_solicited = solicited, .icmp6_override = override, }, .target = *solicited_addr, }; if (inc_opt) ndisc_fill_addr_option(skb, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, NDISC_NEIGHBOUR_ADVERTISEMENT); ndisc_send_skb(skb, daddr, src_addr); } static void ndisc_send_unsol_na(struct net_device *dev) { struct inet6_dev *idev; struct inet6_ifaddr *ifa; idev = in6_dev_get(dev); if (!idev) return; read_lock_bh(&idev->lock); list_for_each_entry(ifa, &idev->addr_list, if_list) { /* skip tentative addresses until dad completes */ if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_OPTIMISTIC)) continue; ndisc_send_na(dev, &in6addr_linklocal_allnodes, &ifa->addr, /*router=*/ !!idev->cnf.forwarding, /*solicited=*/ false, /*override=*/ true, /*inc_opt=*/ true); } read_unlock_bh(&idev->lock); in6_dev_put(idev); } struct sk_buff *ndisc_ns_create(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *saddr, u64 nonce) { int inc_opt = dev->addr_len; struct sk_buff *skb; struct nd_msg *msg; int optlen = 0; if (!saddr) return NULL; if (ipv6_addr_any(saddr)) inc_opt = false; if (inc_opt) optlen += ndisc_opt_addr_space(dev, NDISC_NEIGHBOUR_SOLICITATION); if (nonce != 0) optlen += 8; skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) return NULL; msg = skb_put(skb, sizeof(*msg)); *msg = (struct nd_msg) { .icmph = { .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION, }, .target = *solicit, }; if (inc_opt) ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, NDISC_NEIGHBOUR_SOLICITATION); if (nonce != 0) { u8 *opt = skb_put(skb, 8); opt[0] = ND_OPT_NONCE; opt[1] = 8 >> 3; memcpy(opt + 2, &nonce, 6); } return skb; } EXPORT_SYMBOL(ndisc_ns_create); void ndisc_send_ns(struct net_device *dev, const struct in6_addr *solicit, const struct in6_addr *daddr, const struct in6_addr *saddr, u64 nonce) { struct in6_addr addr_buf; struct sk_buff *skb; if (!saddr) { if (ipv6_get_lladdr(dev, &addr_buf, (IFA_F_TENTATIVE | IFA_F_OPTIMISTIC))) return; saddr = &addr_buf; } skb = ndisc_ns_create(dev, solicit, saddr, nonce); if (skb) ndisc_send_skb(skb, daddr, saddr); } void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr) { struct sk_buff *skb; struct rs_msg *msg; int send_sllao = dev->addr_len; int optlen = 0; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* * According to section 2.2 of RFC 4429, we must not * send router solicitations with a sllao from * optimistic addresses, but we may send the solicitation * if we don't include the sllao. So here we check * if our address is optimistic, and if so, we * suppress the inclusion of the sllao. */ if (send_sllao) { struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr, dev, 1); if (ifp) { if (ifp->flags & IFA_F_OPTIMISTIC) { send_sllao = 0; } in6_ifa_put(ifp); } else { send_sllao = 0; } } #endif if (send_sllao) optlen += ndisc_opt_addr_space(dev, NDISC_ROUTER_SOLICITATION); skb = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!skb) return; msg = skb_put(skb, sizeof(*msg)); *msg = (struct rs_msg) { .icmph = { .icmp6_type = NDISC_ROUTER_SOLICITATION, }, }; if (send_sllao) ndisc_fill_addr_option(skb, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, NDISC_ROUTER_SOLICITATION); ndisc_send_skb(skb, daddr, saddr); } static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb) { /* * "The sender MUST return an ICMP * destination unreachable" */ dst_link_failure(skb); kfree_skb(skb); } /* Called with locked neigh: either read or both */ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb) { struct in6_addr *saddr = NULL; struct in6_addr mcaddr; struct net_device *dev = neigh->dev; struct in6_addr *target = (struct in6_addr *)&neigh->primary_key; int probes = atomic_read(&neigh->probes); if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, false, 1, IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) saddr = &ipv6_hdr(skb)->saddr; probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES); if (probes < 0) { if (!(READ_ONCE(neigh->nud_state) & NUD_VALID)) { ND_PRINTK(1, dbg, "%s: trying to ucast probe in NUD_INVALID: %pI6\n", __func__, target); } ndisc_send_ns(dev, target, target, saddr, 0); } else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) { neigh_app_ns(neigh); } else { addrconf_addr_solict_mult(target, &mcaddr); ndisc_send_ns(dev, target, &mcaddr, saddr, 0); } } static int pndisc_is_router(const void *pkey, struct net_device *dev) { struct pneigh_entry *n; int ret = -1; read_lock_bh(&nd_tbl.lock); n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev); if (n) ret = !!(n->flags & NTF_ROUTER); read_unlock_bh(&nd_tbl.lock); return ret; } void ndisc_update(const struct net_device *dev, struct neighbour *neigh, const u8 *lladdr, u8 new, u32 flags, u8 icmp6_type, struct ndisc_options *ndopts) { neigh_update(neigh, lladdr, new, flags, 0); /* report ndisc ops about neighbour update */ ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts); } static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; struct inet6_ifaddr *ifp; struct inet6_dev *idev = NULL; struct neighbour *neigh; int dad = ipv6_addr_any(saddr); int is_router = -1; SKB_DR(reason); u64 nonce = 0; bool inc; if (skb->len < sizeof(struct nd_msg)) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK(2, warn, "NS: multicast target address\n"); return reason; } /* * RFC2461 7.1.1: * DAD has to be destined for solicited node multicast address. */ if (dad && !ipv6_addr_is_solict_mult(daddr)) { ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n"); return reason; } if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev); if (!lladdr) { ND_PRINTK(2, warn, "NS: invalid link-layer address length\n"); return reason; } /* RFC2461 7.1.1: * If the IP source address is the unspecified address, * there MUST NOT be source link-layer address option * in the message. */ if (dad) { ND_PRINTK(2, warn, "NS: bad DAD packet (link-layer address option)\n"); return reason; } } if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1) memcpy(&nonce, (u8 *)(ndopts.nd_opts_nonce + 1), 6); inc = ipv6_addr_is_multicast(daddr); ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { have_ifp: if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) { if (dad) { if (nonce != 0 && ifp->dad_nonce == nonce) { u8 *np = (u8 *)&nonce; /* Matching nonce if looped back */ ND_PRINTK(2, notice, "%s: IPv6 DAD loopback for address %pI6c nonce %pM ignored\n", ifp->idev->dev->name, &ifp->addr, np); goto out; } /* * We are colliding with another node * who is doing DAD * so fail our DAD process */ addrconf_dad_failure(skb, ifp); return reason; } else { /* * This is not a dad solicitation. * If we are an optimistic node, * we should respond. * Otherwise, we should ignore it. */ if (!(ifp->flags & IFA_F_OPTIMISTIC)) goto out; } } idev = ifp->idev; } else { struct net *net = dev_net(dev); /* perhaps an address on the master device */ if (netif_is_l3_slave(dev)) { struct net_device *mdev; mdev = netdev_master_upper_dev_get_rcu(dev); if (mdev) { ifp = ipv6_get_ifaddr(net, &msg->target, mdev, 1); if (ifp) goto have_ifp; } } idev = in6_dev_get(dev); if (!idev) { /* XXX: count this drop? */ return reason; } if (ipv6_chk_acast_addr(net, dev, &msg->target) || (READ_ONCE(idev->cnf.forwarding) && (READ_ONCE(net->ipv6.devconf_all->proxy_ndp) || READ_ONCE(idev->cnf.proxy_ndp)) && (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc && NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) { /* * for anycast or proxy, * sender should delay its response * by a random time between 0 and * MAX_ANYCAST_DELAY_TIME seconds. * (RFC2461) -- yoshfuji */ struct sk_buff *n = skb_clone(skb, GFP_ATOMIC); if (n) pneigh_enqueue(&nd_tbl, idev->nd_parms, n); goto out; } } else { SKB_DR_SET(reason, IPV6_NDISC_NS_OTHERHOST); goto out; } } if (is_router < 0) is_router = READ_ONCE(idev->cnf.forwarding); if (dad) { ndisc_send_na(dev, &in6addr_linklocal_allnodes, &msg->target, !!is_router, false, (ifp != NULL), true); goto out; } if (inc) NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast); else NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast); /* * update / create cache entry * for the source address */ neigh = __neigh_lookup(&nd_tbl, saddr, dev, !inc || lladdr || !dev->addr_len); if (neigh) ndisc_update(dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE, NDISC_NEIGHBOUR_SOLICITATION, &ndopts); if (neigh || !dev->header_ops) { ndisc_send_na(dev, saddr, &msg->target, !!is_router, true, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); reason = SKB_CONSUMED; } out: if (ifp) in6_ifa_put(ifp); else in6_dev_put(idev); return reason; } static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr) { struct inet6_dev *idev = __in6_dev_get(dev); switch (READ_ONCE(idev->cnf.accept_untracked_na)) { case 0: /* Don't accept untracked na (absent in neighbor cache) */ return 0; case 1: /* Create new entries from na if currently untracked */ return 1; case 2: /* Create new entries from untracked na only if saddr is in the * same subnet as an address configured on the interface that * received the na */ return !!ipv6_chk_prefix(saddr, dev); default: return 0; } } static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct nd_msg, opt)); struct ndisc_options ndopts; struct net_device *dev = skb->dev; struct inet6_dev *idev = __in6_dev_get(dev); struct inet6_ifaddr *ifp; struct neighbour *neigh; SKB_DR(reason); u8 new_state; if (skb->len < sizeof(struct nd_msg)) return SKB_DROP_REASON_PKT_TOO_SMALL; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK(2, warn, "NA: target address is multicast\n"); return reason; } if (ipv6_addr_is_multicast(daddr) && msg->icmph.icmp6_solicited) { ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n"); return reason; } /* For some 802.11 wireless deployments (and possibly other networks), * there will be a NA proxy and unsolicitd packets are attacks * and thus should not be accepted. * drop_unsolicited_na takes precedence over accept_untracked_na */ if (!msg->icmph.icmp6_solicited && idev && READ_ONCE(idev->cnf.drop_unsolicited_na)) return reason; if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (ndopts.nd_opts_tgt_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev); if (!lladdr) { ND_PRINTK(2, warn, "NA: invalid link-layer address length\n"); return reason; } } ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1); if (ifp) { if (skb->pkt_type != PACKET_LOOPBACK && (ifp->flags & IFA_F_TENTATIVE)) { addrconf_dad_failure(skb, ifp); return reason; } /* What should we make now? The advertisement is invalid, but ndisc specs say nothing about it. It could be misconfiguration, or an smart proxy agent tries to help us :-) We should not print the error if NA has been received from loopback - it is just our own unsolicited advertisement. */ if (skb->pkt_type != PACKET_LOOPBACK) ND_PRINTK(1, warn, "NA: %pM advertised our address %pI6c on %s!\n", eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name); in6_ifa_put(ifp); return reason; } neigh = neigh_lookup(&nd_tbl, &msg->target, dev); /* RFC 9131 updates original Neighbour Discovery RFC 4861. * NAs with Target LL Address option without a corresponding * entry in the neighbour cache can now create a STALE neighbour * cache entry on routers. * * entry accept fwding solicited behaviour * ------- ------ ------ --------- ---------------------- * present X X 0 Set state to STALE * present X X 1 Set state to REACHABLE * absent 0 X X Do nothing * absent 1 0 X Do nothing * absent 1 1 X Add a new STALE entry * * Note that we don't do a (daddr == all-routers-mcast) check. */ new_state = msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE; if (!neigh && lladdr && idev && READ_ONCE(idev->cnf.forwarding)) { if (accept_untracked_na(dev, saddr)) { neigh = neigh_create(&nd_tbl, &msg->target, dev); new_state = NUD_STALE; } } if (neigh && !IS_ERR(neigh)) { u8 old_flags = neigh->flags; struct net *net = dev_net(dev); if (READ_ONCE(neigh->nud_state) & NUD_FAILED) goto out; /* * Don't update the neighbor cache entry on a proxy NA from * ourselves because either the proxied node is off link or it * has already sent a NA to us. */ if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && READ_ONCE(net->ipv6.devconf_all->forwarding) && READ_ONCE(net->ipv6.devconf_all->proxy_ndp) && pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) { /* XXX: idev->cnf.proxy_ndp */ goto out; } ndisc_update(dev, neigh, lladdr, new_state, NEIGH_UPDATE_F_WEAK_OVERRIDE| (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0), NDISC_NEIGHBOUR_ADVERTISEMENT, &ndopts); if ((old_flags & ~neigh->flags) & NTF_ROUTER) { /* * Change: router to host */ rt6_clean_tohost(dev_net(dev), saddr); } reason = SKB_CONSUMED; out: neigh_release(neigh); } return reason; } static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb) { struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb); unsigned long ndoptlen = skb->len - sizeof(*rs_msg); struct neighbour *neigh; struct inet6_dev *idev; const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; struct ndisc_options ndopts; u8 *lladdr = NULL; SKB_DR(reason); if (skb->len < sizeof(*rs_msg)) return SKB_DROP_REASON_PKT_TOO_SMALL; idev = __in6_dev_get(skb->dev); if (!idev) { ND_PRINTK(1, err, "RS: can't find in6 device\n"); return reason; } /* Don't accept RS if we're not in router mode */ if (!READ_ONCE(idev->cnf.forwarding)) goto out; /* * Don't update NCE if src = ::; * this implies that the source node has no ip address assigned yet. */ if (ipv6_addr_any(saddr)) goto out; /* Parse ND options */ if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, skb->dev); if (!lladdr) goto out; } neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1); if (neigh) { ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE_ISROUTER, NDISC_ROUTER_SOLICITATION, &ndopts); neigh_release(neigh); reason = SKB_CONSUMED; } out: return reason; } static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) { struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra); struct sk_buff *skb; struct nlmsghdr *nlh; struct nduseroptmsg *ndmsg; struct net *net = dev_net(ra->dev); int err; int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg) + (opt->nd_opt_len << 3)); size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr)); skb = nlmsg_new(msg_size, GFP_ATOMIC); if (!skb) { err = -ENOBUFS; goto errout; } nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0); if (!nlh) { goto nla_put_failure; } ndmsg = nlmsg_data(nlh); ndmsg->nduseropt_family = AF_INET6; ndmsg->nduseropt_ifindex = ra->dev->ifindex; ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type; ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code; ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3; memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr)) goto nla_put_failure; nlmsg_end(skb, nlh); rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL, GFP_ATOMIC); return; nla_put_failure: nlmsg_free(skb); err = -EMSGSIZE; errout: rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err); } static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb) { struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb); bool send_ifinfo_notify = false; struct neighbour *neigh = NULL; struct ndisc_options ndopts; struct fib6_info *rt = NULL; struct inet6_dev *in6_dev; struct fib6_table *table; u32 defrtr_usr_metric; unsigned int pref = 0; __u32 old_if_flags; struct net *net; SKB_DR(reason); int lifetime; int optlen; __u8 *opt = (__u8 *)(ra_msg + 1); optlen = (skb_tail_pointer(skb) - skb_transport_header(skb)) - sizeof(struct ra_msg); ND_PRINTK(2, info, "RA: %s, dev: %s\n", __func__, skb->dev->name); if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "RA: source address is not link-local\n"); return reason; } if (optlen < 0) return SKB_DROP_REASON_PKT_TOO_SMALL; #ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) { ND_PRINTK(2, warn, "RA: from host or unauthorized router\n"); return reason; } #endif in6_dev = __in6_dev_get(skb->dev); if (!in6_dev) { ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", skb->dev->name); return reason; } if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (!ipv6_accept_ra(in6_dev)) { ND_PRINTK(2, info, "RA: %s, did not accept ra for dev: %s\n", __func__, skb->dev->name); goto skip_linkparms; } #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific parameters from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { ND_PRINTK(2, info, "RA: %s, nodetype is NODEFAULT, dev: %s\n", __func__, skb->dev->name); goto skip_linkparms; } #endif if (in6_dev->if_flags & IF_RS_SENT) { /* * flag that an RA was received after an RS was sent * out on this interface. */ in6_dev->if_flags |= IF_RA_RCVD; } /* * Remember the managed/otherconf flags from most recently * received RA message (RFC 2462) -- yoshfuji */ old_if_flags = in6_dev->if_flags; in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED | IF_RA_OTHERCONF)) | (ra_msg->icmph.icmp6_addrconf_managed ? IF_RA_MANAGED : 0) | (ra_msg->icmph.icmp6_addrconf_other ? IF_RA_OTHERCONF : 0); if (old_if_flags != in6_dev->if_flags) send_ifinfo_notify = true; if (!READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) { ND_PRINTK(2, info, "RA: %s, defrtr is false for dev: %s\n", __func__, skb->dev->name); goto skip_defrtr; } lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime); if (lifetime != 0 && lifetime < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) { ND_PRINTK(2, info, "RA: router lifetime (%ds) is too short: %s\n", lifetime, skb->dev->name); goto skip_defrtr; } /* Do not accept RA with source-addr found on local machine unless * accept_ra_from_local is set to true. */ net = dev_net(in6_dev->dev); if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(net, &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: default router ignored\n", skb->dev->name); goto skip_defrtr; } #ifdef CONFIG_IPV6_ROUTER_PREF pref = ra_msg->icmph.icmp6_router_pref; /* 10b is handled as if it were 00b (medium) */ if (pref == ICMPV6_ROUTER_PREF_INVALID || !READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref)) pref = ICMPV6_ROUTER_PREF_MEDIUM; #endif /* routes added from RAs do not use nexthop objects */ rt = rt6_get_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev); if (rt) { neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6, rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); fib6_info_release(rt); return reason; } } /* Set default route metric as specified by user */ defrtr_usr_metric = in6_dev->cnf.ra_defrtr_metric; /* delete the route if lifetime is 0 or if metric needs change */ if (rt && (lifetime == 0 || rt->fib6_metric != defrtr_usr_metric)) { ip6_del_rt(net, rt, false); rt = NULL; } ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, metric: %d, for dev: %s\n", rt, lifetime, defrtr_usr_metric, skb->dev->name); if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); if (neigh) neigh_release(neigh); rt = rt6_add_dflt_router(net, &ipv6_hdr(skb)->saddr, skb->dev, pref, defrtr_usr_metric, lifetime); if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", __func__); return reason; } neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6, rt->fib6_nh->fib_nh_dev, NULL, &ipv6_hdr(skb)->saddr); if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); fib6_info_release(rt); return reason; } neigh->flags |= NTF_ROUTER; } else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) { struct nl_info nlinfo = { .nl_net = net, }; rt->fib6_flags = (rt->fib6_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); inet6_rt_notify(RTM_NEWROUTE, rt, &nlinfo, NLM_F_REPLACE); } if (rt) { table = rt->fib6_table; spin_lock_bh(&table->tb6_lock); fib6_set_expires(rt, jiffies + (HZ * lifetime)); fib6_add_gc_list(rt); spin_unlock_bh(&table->tb6_lock); } if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) < 256 && ra_msg->icmph.icmp6_hop_limit) { if (READ_ONCE(in6_dev->cnf.accept_ra_min_hop_limit) <= ra_msg->icmph.icmp6_hop_limit) { WRITE_ONCE(in6_dev->cnf.hop_limit, ra_msg->icmph.icmp6_hop_limit); fib6_metric_set(rt, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); } else { ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than minimum\n"); } } skip_defrtr: /* * Update Reachable Time and Retrans Timer */ if (in6_dev->nd_parms) { unsigned long rtime = ntohl(ra_msg->retrans_timer); if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) { rtime = (rtime*HZ)/1000; if (rtime < HZ/100) rtime = HZ/100; NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime); in6_dev->tstamp = jiffies; send_ifinfo_notify = true; } rtime = ntohl(ra_msg->reachable_time); if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) { rtime = (rtime*HZ)/1000; if (rtime < HZ/10) rtime = HZ/10; if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) { NEIGH_VAR_SET(in6_dev->nd_parms, BASE_REACHABLE_TIME, rtime); NEIGH_VAR_SET(in6_dev->nd_parms, GC_STALETIME, 3 * rtime); in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime); in6_dev->tstamp = jiffies; send_ifinfo_notify = true; } } } skip_linkparms: /* * Process options. */ if (!neigh) neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr, skb->dev, 1); if (neigh) { u8 *lladdr = NULL; if (ndopts.nd_opts_src_lladdr) { lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, skb->dev); if (!lladdr) { ND_PRINTK(2, warn, "RA: invalid link-layer address length\n"); goto out; } } ndisc_update(skb->dev, neigh, lladdr, NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE| NEIGH_UPDATE_F_OVERRIDE_ISROUTER| NEIGH_UPDATE_F_ISROUTER, NDISC_ROUTER_ADVERTISEMENT, &ndopts); reason = SKB_CONSUMED; } if (!ipv6_accept_ra(in6_dev)) { ND_PRINTK(2, info, "RA: %s, accept_ra is false for dev: %s\n", __func__, skb->dev->name); goto out; } #ifdef CONFIG_IPV6_ROUTE_INFO if (!READ_ONCE(in6_dev->cnf.accept_ra_from_local) && ipv6_chk_addr(dev_net(in6_dev->dev), &ipv6_hdr(skb)->saddr, in6_dev->dev, 0)) { ND_PRINTK(2, info, "RA from local address detected on dev: %s: router info ignored.\n", skb->dev->name); goto skip_routeinfo; } if (READ_ONCE(in6_dev->cnf.accept_ra_rtr_pref) && ndopts.nd_opts_ri) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_ri; p; p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) { struct route_info *ri = (struct route_info *)p; #ifdef CONFIG_IPV6_NDISC_NODETYPE if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT && ri->prefix_len == 0) continue; #endif if (ri->prefix_len == 0 && !READ_ONCE(in6_dev->cnf.accept_ra_defrtr)) continue; if (ri->lifetime != 0 && ntohl(ri->lifetime) < READ_ONCE(in6_dev->cnf.accept_ra_min_lft)) continue; if (ri->prefix_len < READ_ONCE(in6_dev->cnf.accept_ra_rt_info_min_plen)) continue; if (ri->prefix_len > READ_ONCE(in6_dev->cnf.accept_ra_rt_info_max_plen)) continue; rt6_route_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, &ipv6_hdr(skb)->saddr); } } skip_routeinfo: #endif #ifdef CONFIG_IPV6_NDISC_NODETYPE /* skip link-specific ndopts from interior routers */ if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT) { ND_PRINTK(2, info, "RA: %s, nodetype is NODEFAULT (interior routes), dev: %s\n", __func__, skb->dev->name); goto out; } #endif if (READ_ONCE(in6_dev->cnf.accept_ra_pinfo) && ndopts.nd_opts_pi) { struct nd_opt_hdr *p; for (p = ndopts.nd_opts_pi; p; p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) { addrconf_prefix_rcv(skb->dev, (u8 *)p, (p->nd_opt_len) << 3, ndopts.nd_opts_src_lladdr != NULL); } } if (ndopts.nd_opts_mtu && READ_ONCE(in6_dev->cnf.accept_ra_mtu)) { __be32 n; u32 mtu; memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu)); mtu = ntohl(n); if (in6_dev->ra_mtu != mtu) { in6_dev->ra_mtu = mtu; send_ifinfo_notify = true; } if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) { ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu); } else if (READ_ONCE(in6_dev->cnf.mtu6) != mtu) { WRITE_ONCE(in6_dev->cnf.mtu6, mtu); fib6_metric_set(rt, RTAX_MTU, mtu); rt6_mtu_change(skb->dev, mtu); } } if (ndopts.nd_useropts) { struct nd_opt_hdr *p; for (p = ndopts.nd_useropts; p; p = ndisc_next_useropt(skb->dev, p, ndopts.nd_useropts_end)) { ndisc_ra_useropt(skb, p); } } if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) { ND_PRINTK(2, warn, "RA: invalid RA options\n"); } out: /* Send a notify if RA changed managed/otherconf flags or * timer settings or ra_mtu value */ if (send_ifinfo_notify) inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); fib6_info_release(rt); if (neigh) neigh_release(neigh); return reason; } static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb) { struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb); u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + offsetof(struct rd_msg, opt)); struct ndisc_options ndopts; SKB_DR(reason); u8 *hdr; #ifdef CONFIG_IPV6_NDISC_NODETYPE switch (skb->ndisc_nodetype) { case NDISC_NODETYPE_HOST: case NDISC_NODETYPE_NODEFAULT: ND_PRINTK(2, warn, "Redirect: from host or unauthorized router\n"); return reason; } #endif if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "Redirect: source address is not link-local\n"); return reason; } if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts)) return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS; if (!ndopts.nd_opts_rh) { ip6_redirect_no_header(skb, dev_net(skb->dev), skb->dev->ifindex); return reason; } hdr = (u8 *)ndopts.nd_opts_rh; hdr += 8; if (!pskb_pull(skb, hdr - skb_transport_header(skb))) return SKB_DROP_REASON_PKT_TOO_SMALL; return icmpv6_notify(skb, NDISC_REDIRECT, 0, 0); } static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb, struct sk_buff *orig_skb, int rd_len) { u8 *opt = skb_put(skb, rd_len); memset(opt, 0, 8); *(opt++) = ND_OPT_REDIRECT_HDR; *(opt++) = (rd_len >> 3); opt += 6; skb_copy_bits(orig_skb, skb_network_offset(orig_skb), opt, rd_len - 8); } void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) { struct net_device *dev = skb->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.ndisc_sk; int optlen = 0; struct inet_peer *peer; struct sk_buff *buff; struct rd_msg *msg; struct in6_addr saddr_buf; struct rt6_info *rt; struct dst_entry *dst; struct flowi6 fl6; int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL, ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; bool ret; if (netif_is_l3_master(skb->dev)) { dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); if (!dev) return; } if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { ND_PRINTK(2, warn, "Redirect: no link-local address on %s\n", dev->name); return; } if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) && ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) { ND_PRINTK(2, warn, "Redirect: target address is not link-local unicast\n"); return; } icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); return; } dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); if (IS_ERR(dst)) return; rt = dst_rt6_info(dst); if (rt->rt6i_flags & RTF_GATEWAY) { ND_PRINTK(2, warn, "Redirect: destination is not a neighbour\n"); goto release; } peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1); ret = inet_peer_xrlim_allow(peer, 1*HZ); if (peer) inet_putpeer(peer); if (!ret) goto release; if (dev->addr_len) { struct neighbour *neigh = dst_neigh_lookup(skb_dst(skb), target); if (!neigh) { ND_PRINTK(2, warn, "Redirect: no neigh for target address\n"); goto release; } read_lock_bh(&neigh->lock); if (neigh->nud_state & NUD_VALID) { memcpy(ha_buf, neigh->ha, dev->addr_len); read_unlock_bh(&neigh->lock); ha = ha_buf; optlen += ndisc_redirect_opt_addr_space(dev, neigh, ops_data_buf, &ops_data); } else read_unlock_bh(&neigh->lock); neigh_release(neigh); } rd_len = min_t(unsigned int, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(*msg) - optlen, skb->len + 8); rd_len &= ~0x7; optlen += rd_len; buff = ndisc_alloc_skb(dev, sizeof(*msg) + optlen); if (!buff) goto release; msg = skb_put(buff, sizeof(*msg)); *msg = (struct rd_msg) { .icmph = { .icmp6_type = NDISC_REDIRECT, }, .target = *target, .dest = ipv6_hdr(skb)->daddr, }; /* * include target_address option */ if (ha) ndisc_fill_redirect_addr_option(buff, ha, ops_data); /* * build redirect option and copy skb over to the new packet. */ if (rd_len) ndisc_fill_redirect_hdr_option(buff, skb, rd_len); skb_dst_set(buff, dst); ndisc_send_skb(buff, &ipv6_hdr(skb)->saddr, &saddr_buf); return; release: dst_release(dst); } static void pndisc_redo(struct sk_buff *skb) { enum skb_drop_reason reason = ndisc_recv_ns(skb); kfree_skb_reason(skb, reason); } static int ndisc_is_multicast(const void *pkey) { return ipv6_addr_is_multicast((struct in6_addr *)pkey); } static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb) { struct inet6_dev *idev = __in6_dev_get(skb->dev); if (!idev) return true; if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED && READ_ONCE(idev->cnf.suppress_frag_ndisc)) { net_warn_ratelimited("Received fragmented ndisc packet. Carefully consider disabling suppress_frag_ndisc.\n"); return true; } return false; } enum skb_drop_reason ndisc_rcv(struct sk_buff *skb) { struct nd_msg *msg; SKB_DR(reason); if (ndisc_suppress_frag_ndisc(skb)) return SKB_DROP_REASON_IPV6_NDISC_FRAG; if (skb_linearize(skb)) return SKB_DROP_REASON_NOMEM; msg = (struct nd_msg *)skb_transport_header(skb); __skb_push(skb, skb->data - skb_transport_header(skb)); if (ipv6_hdr(skb)->hop_limit != 255) { ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n", ipv6_hdr(skb)->hop_limit); return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT; } if (msg->icmph.icmp6_code != 0) { ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n", msg->icmph.icmp6_code); return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE; } switch (msg->icmph.icmp6_type) { case NDISC_NEIGHBOUR_SOLICITATION: memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); reason = ndisc_recv_ns(skb); break; case NDISC_NEIGHBOUR_ADVERTISEMENT: reason = ndisc_recv_na(skb); break; case NDISC_ROUTER_SOLICITATION: reason = ndisc_recv_rs(skb); break; case NDISC_ROUTER_ADVERTISEMENT: reason = ndisc_router_discovery(skb); break; case NDISC_REDIRECT: reason = ndisc_redirect_rcv(skb); break; } return reason; } static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; bool evict_nocarrier; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); fib6_run_gc(0, net, false); fallthrough; case NETDEV_UP: idev = in6_dev_get(dev); if (!idev) break; if (READ_ONCE(idev->cnf.ndisc_notify) || READ_ONCE(net->ipv6.devconf_all->ndisc_notify)) ndisc_send_unsol_na(dev); in6_dev_put(idev); break; case NETDEV_CHANGE: idev = in6_dev_get(dev); if (!idev) evict_nocarrier = true; else { evict_nocarrier = READ_ONCE(idev->cnf.ndisc_evict_nocarrier) && READ_ONCE(net->ipv6.devconf_all->ndisc_evict_nocarrier); in6_dev_put(idev); } change_info = ptr; if (change_info->flags_changed & IFF_NOARP) neigh_changeaddr(&nd_tbl, dev); if (evict_nocarrier && !netif_carrier_ok(dev)) neigh_carrier_down(&nd_tbl, dev); break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); fib6_run_gc(0, net, false); break; case NETDEV_NOTIFY_PEERS: ndisc_send_unsol_na(dev); break; default: break; } return NOTIFY_DONE; } static struct notifier_block ndisc_netdev_notifier = { .notifier_call = ndisc_netdev_event, .priority = ADDRCONF_NOTIFY_PRIORITY - 5, }; #ifdef CONFIG_SYSCTL static void ndisc_warn_deprecated_sysctl(const struct ctl_table *ctl, const char *func, const char *dev_name) { static char warncomm[TASK_COMM_LEN]; static int warned; if (strcmp(warncomm, current->comm) && warned < 5) { strcpy(warncomm, current->comm); pr_warn("process `%s' is using deprecated sysctl (%s) net.ipv6.neigh.%s.%s - use net.ipv6.neigh.%s.%s_ms instead\n", warncomm, func, dev_name, ctl->procname, dev_name, ctl->procname); warned++; } } int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; int ret; if ((strcmp(ctl->procname, "retrans_time") == 0) || (strcmp(ctl->procname, "base_reachable_time") == 0)) ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default"); if (strcmp(ctl->procname, "retrans_time") == 0) ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos); else if (strcmp(ctl->procname, "base_reachable_time") == 0) ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) || (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); else ret = -1; if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) { if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)) idev->nd_parms->reachable_time = neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)); WRITE_ONCE(idev->tstamp, jiffies); inet6_ifinfo_notify(RTM_NEWLINK, idev); in6_dev_put(idev); } return ret; } #endif static int __net_init ndisc_net_init(struct net *net) { struct ipv6_pinfo *np; struct sock *sk; int err; err = inet_ctl_sock_create(&sk, PF_INET6, SOCK_RAW, IPPROTO_ICMPV6, net); if (err < 0) { ND_PRINTK(0, err, "NDISC: Failed to initialize the control socket (err %d)\n", err); return err; } net->ipv6.ndisc_sk = sk; np = inet6_sk(sk); np->hop_limit = 255; /* Do not loopback ndisc messages */ inet6_clear_bit(MC6_LOOP, sk); return 0; } static void __net_exit ndisc_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.ndisc_sk); } static struct pernet_operations ndisc_net_ops = { .init = ndisc_net_init, .exit = ndisc_net_exit, }; int __init ndisc_init(void) { int err; err = register_pernet_subsys(&ndisc_net_ops); if (err) return err; /* * Initialize the neighbour table */ neigh_table_init(NEIGH_ND_TABLE, &nd_tbl); #ifdef CONFIG_SYSCTL err = neigh_sysctl_register(NULL, &nd_tbl.parms, ndisc_ifinfo_sysctl_change); if (err) goto out_unregister_pernet; out: #endif return err; #ifdef CONFIG_SYSCTL out_unregister_pernet: unregister_pernet_subsys(&ndisc_net_ops); goto out; #endif } int __init ndisc_late_init(void) { return register_netdevice_notifier(&ndisc_netdev_notifier); } void ndisc_late_cleanup(void) { unregister_netdevice_notifier(&ndisc_netdev_notifier); } void ndisc_cleanup(void) { #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(&nd_tbl.parms); #endif neigh_table_clear(NEIGH_ND_TABLE, &nd_tbl); unregister_pernet_subsys(&ndisc_net_ops); }
2 2 2 2 2 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 // SPDX-License-Identifier: GPL-2.0-or-later /* * IPVS: Weighted Round-Robin Scheduling module * * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> * * Changes: * Wensong Zhang : changed the ip_vs_wrr_schedule to return dest * Wensong Zhang : changed some comestics things for debugging * Wensong Zhang : changed for the d-linked destination list * Wensong Zhang : added the ip_vs_wrr_update_svc * Julian Anastasov : fixed the bug of returning destination * with weight 0 when all weights are zero */ #define KMSG_COMPONENT "IPVS" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt #include <linux/module.h> #include <linux/kernel.h> #include <linux/slab.h> #include <linux/net.h> #include <linux/gcd.h> #include <net/ip_vs.h> /* The WRR algorithm depends on some caclulations: * - mw: maximum weight * - di: weight step, greatest common divisor from all weights * - cw: current required weight * As result, all weights are in the [di..mw] range with a step=di. * * First, we start with cw = mw and select dests with weight >= cw. * Then cw is reduced with di and all dests are checked again. * Last pass should be with cw = di. We have mw/di passes in total: * * pass 1: cw = max weight * pass 2: cw = max weight - di * pass 3: cw = max weight - 2 * di * ... * last pass: cw = di * * Weights are supposed to be >= di but we run in parallel with * weight changes, it is possible some dest weight to be reduced * below di, bad if it is the only available dest. * * So, we modify how mw is calculated, now it is reduced with (di - 1), * so that last cw is 1 to catch such dests with weight below di: * pass 1: cw = max weight - (di - 1) * pass 2: cw = max weight - di - (di - 1) * pass 3: cw = max weight - 2 * di - (di - 1) * ... * last pass: cw = 1 * */ /* * current destination pointer for weighted round-robin scheduling */ struct ip_vs_wrr_mark { struct ip_vs_dest *cl; /* current dest or head */ int cw; /* current weight */ int mw; /* maximum weight */ int di; /* decreasing interval */ struct rcu_head rcu_head; }; static int ip_vs_wrr_gcd_weight(struct ip_vs_service *svc) { struct ip_vs_dest *dest; int weight; int g = 0; list_for_each_entry(dest, &svc->destinations, n_list) { weight = atomic_read(&dest->weight); if (weight > 0) { if (g > 0) g = gcd(weight, g); else g = weight; } } return g ? g : 1; } /* * Get the maximum weight of the service destinations. */ static int ip_vs_wrr_max_weight(struct ip_vs_service *svc) { struct ip_vs_dest *dest; int new_weight, weight = 0; list_for_each_entry(dest, &svc->destinations, n_list) { new_weight = atomic_read(&dest->weight); if (new_weight > weight) weight = new_weight; } return weight; } static int ip_vs_wrr_init_svc(struct ip_vs_service *svc) { struct ip_vs_wrr_mark *mark; /* * Allocate the mark variable for WRR scheduling */ mark = kmalloc(sizeof(struct ip_vs_wrr_mark), GFP_KERNEL); if (mark == NULL) return -ENOMEM; mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list); mark->di = ip_vs_wrr_gcd_weight(svc); mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1); mark->cw = mark->mw; svc->sched_data = mark; return 0; } static void ip_vs_wrr_done_svc(struct ip_vs_service *svc) { struct ip_vs_wrr_mark *mark = svc->sched_data; /* * Release the mark variable */ kfree_rcu(mark, rcu_head); } static int ip_vs_wrr_dest_changed(struct ip_vs_service *svc, struct ip_vs_dest *dest) { struct ip_vs_wrr_mark *mark = svc->sched_data; spin_lock_bh(&svc->sched_lock); mark->cl = list_entry(&svc->destinations, struct ip_vs_dest, n_list); mark->di = ip_vs_wrr_gcd_weight(svc); mark->mw = ip_vs_wrr_max_weight(svc) - (mark->di - 1); if (mark->cw > mark->mw || !mark->cw) mark->cw = mark->mw; else if (mark->di > 1) mark->cw = (mark->cw / mark->di) * mark->di + 1; spin_unlock_bh(&svc->sched_lock); return 0; } /* * Weighted Round-Robin Scheduling */ static struct ip_vs_dest * ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb, struct ip_vs_iphdr *iph) { struct ip_vs_dest *dest, *last, *stop = NULL; struct ip_vs_wrr_mark *mark = svc->sched_data; bool last_pass = false, restarted = false; IP_VS_DBG(6, "%s(): Scheduling...\n", __func__); spin_lock_bh(&svc->sched_lock); dest = mark->cl; /* No available dests? */ if (mark->mw == 0) goto err_noavail; last = dest; /* Stop only after all dests were checked for weight >= 1 (last pass) */ while (1) { list_for_each_entry_continue_rcu(dest, &svc->destinations, n_list) { if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) && atomic_read(&dest->weight) >= mark->cw) goto found; if (dest == stop) goto err_over; } mark->cw -= mark->di; if (mark->cw <= 0) { mark->cw = mark->mw; /* Stop if we tried last pass from first dest: * 1. last_pass: we started checks when cw > di but * then all dests were checked for w >= 1 * 2. last was head: the first and only traversal * was for weight >= 1, for all dests. */ if (last_pass || &last->n_list == &svc->destinations) goto err_over; restarted = true; } last_pass = mark->cw <= mark->di; if (last_pass && restarted && &last->n_list != &svc->destinations) { /* First traversal was for w >= 1 but only * for dests after 'last', now do the same * for all dests up to 'last'. */ stop = last; } } found: IP_VS_DBG_BUF(6, "WRR: server %s:%u " "activeconns %d refcnt %d weight %d\n", IP_VS_DBG_ADDR(dest->af, &dest->addr), ntohs(dest->port), atomic_read(&dest->activeconns), refcount_read(&dest->refcnt), atomic_read(&dest->weight)); mark->cl = dest; out: spin_unlock_bh(&svc->sched_lock); return dest; err_noavail: mark->cl = dest; dest = NULL; ip_vs_scheduler_err(svc, "no destination available"); goto out; err_over: mark->cl = dest; dest = NULL; ip_vs_scheduler_err(svc, "no destination available: " "all destinations are overloaded"); goto out; } static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .name = "wrr", .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, .add_dest = ip_vs_wrr_dest_changed, .del_dest = ip_vs_wrr_dest_changed, .upd_dest = ip_vs_wrr_dest_changed, .schedule = ip_vs_wrr_schedule, }; static int __init ip_vs_wrr_init(void) { return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ; } static void __exit ip_vs_wrr_cleanup(void) { unregister_ip_vs_scheduler(&ip_vs_wrr_scheduler); synchronize_rcu(); } module_init(ip_vs_wrr_init); module_exit(ip_vs_wrr_cleanup); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ipvs weighted round-robin scheduler");
37 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 // SPDX-License-Identifier: GPL-2.0-or-later /* Sysfs attributes of bond slaves * * Copyright (c) 2014 Scott Feldman <sfeldma@cumulusnetworks.com> */ #include <linux/capability.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <net/bonding.h> struct slave_attribute { struct attribute attr; ssize_t (*show)(struct slave *, char *); }; #define SLAVE_ATTR_RO(_name) \ const struct slave_attribute slave_attr_##_name = __ATTR_RO(_name) static ssize_t state_show(struct slave *slave, char *buf) { switch (bond_slave_state(slave)) { case BOND_STATE_ACTIVE: return sysfs_emit(buf, "active\n"); case BOND_STATE_BACKUP: return sysfs_emit(buf, "backup\n"); default: return sysfs_emit(buf, "UNKNOWN\n"); } } static SLAVE_ATTR_RO(state); static ssize_t mii_status_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%s\n", bond_slave_link_status(slave->link)); } static SLAVE_ATTR_RO(mii_status); static ssize_t link_failure_count_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%d\n", slave->link_failure_count); } static SLAVE_ATTR_RO(link_failure_count); static ssize_t perm_hwaddr_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%*phC\n", slave->dev->addr_len, slave->perm_hwaddr); } static SLAVE_ATTR_RO(perm_hwaddr); static ssize_t queue_id_show(struct slave *slave, char *buf) { return sysfs_emit(buf, "%d\n", READ_ONCE(slave->queue_id)); } static SLAVE_ATTR_RO(queue_id); static ssize_t ad_aggregator_id_show(struct slave *slave, char *buf) { const struct aggregator *agg; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { agg = SLAVE_AD_INFO(slave)->port.aggregator; if (agg) return sysfs_emit(buf, "%d\n", agg->aggregator_identifier); } return sysfs_emit(buf, "N/A\n"); } static SLAVE_ATTR_RO(ad_aggregator_id); static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf) { const struct port *ad_port; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { ad_port = &SLAVE_AD_INFO(slave)->port; if (ad_port->aggregator) return sysfs_emit(buf, "%u\n", ad_port->actor_oper_port_state); } return sysfs_emit(buf, "N/A\n"); } static SLAVE_ATTR_RO(ad_actor_oper_port_state); static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf) { const struct port *ad_port; if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) { ad_port = &SLAVE_AD_INFO(slave)->port; if (ad_port->aggregator) return sysfs_emit(buf, "%u\n", ad_port->partner_oper.port_state); } return sysfs_emit(buf, "N/A\n"); } static SLAVE_ATTR_RO(ad_partner_oper_port_state); static const struct attribute *slave_attrs[] = { &slave_attr_state.attr, &slave_attr_mii_status.attr, &slave_attr_link_failure_count.attr, &slave_attr_perm_hwaddr.attr, &slave_attr_queue_id.attr, &slave_attr_ad_aggregator_id.attr, &slave_attr_ad_actor_oper_port_state.attr, &slave_attr_ad_partner_oper_port_state.attr, NULL }; #define to_slave_attr(_at) container_of(_at, struct slave_attribute, attr) static ssize_t slave_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct slave_attribute *slave_attr = to_slave_attr(attr); struct slave *slave = to_slave(kobj); return slave_attr->show(slave, buf); } const struct sysfs_ops slave_sysfs_ops = { .show = slave_show, }; int bond_sysfs_slave_add(struct slave *slave) { return sysfs_create_files(&slave->kobj, slave_attrs); } void bond_sysfs_slave_del(struct slave *slave) { sysfs_remove_files(&slave->kobj, slave_attrs); }
26 2 24 21 21 21 21 21 21 21 21 21 10 11 1 1 1 1 3 3 3 4 4 3 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 /* * llc_sap.c - driver routines for SAP component. * * Copyright (c) 1997 by Procom Technology, Inc. * 2001-2003 by Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program can be redistributed or modified under the terms of the * GNU General Public License as published by the Free Software Foundation. * This program is distributed without any warranty or implied warranty * of merchantability or fitness for a particular purpose. * * See the GNU General Public License for more details. */ #include <net/llc.h> #include <net/llc_if.h> #include <net/llc_conn.h> #include <net/llc_pdu.h> #include <net/llc_sap.h> #include <net/llc_s_ac.h> #include <net/llc_s_ev.h> #include <net/llc_s_st.h> #include <net/sock.h> #include <net/tcp_states.h> #include <linux/llc.h> #include <linux/slab.h> static int llc_mac_header_len(unsigned short devtype) { switch (devtype) { case ARPHRD_ETHER: case ARPHRD_LOOPBACK: return sizeof(struct ethhdr); } return 0; } /** * llc_alloc_frame - allocates sk_buff for frame * @sk: socket to allocate frame to * @dev: network device this skb will be sent over * @type: pdu type to allocate * @data_size: data size to allocate * * Allocates an sk_buff for frame and initializes sk_buff fields. * Returns allocated skb or %NULL when out of memory. */ struct sk_buff *llc_alloc_frame(struct sock *sk, struct net_device *dev, u8 type, u32 data_size) { int hlen = type == LLC_PDU_TYPE_U ? 3 : 4; struct sk_buff *skb; hlen += llc_mac_header_len(dev->type); skb = alloc_skb(hlen + data_size, GFP_ATOMIC); if (skb) { skb_reset_mac_header(skb); skb_reserve(skb, hlen); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb->protocol = htons(ETH_P_802_2); skb->dev = dev; if (sk != NULL) skb_set_owner_w(skb, sk); } return skb; } void llc_save_primitive(struct sock *sk, struct sk_buff *skb, u8 prim) { struct sockaddr_llc *addr; /* save primitive for use by the user. */ addr = llc_ui_skb_cb(skb); memset(addr, 0, sizeof(*addr)); addr->sllc_family = sk->sk_family; addr->sllc_arphrd = skb->dev->type; addr->sllc_test = prim == LLC_TEST_PRIM; addr->sllc_xid = prim == LLC_XID_PRIM; addr->sllc_ua = prim == LLC_DATAUNIT_PRIM; llc_pdu_decode_sa(skb, addr->sllc_mac); llc_pdu_decode_ssap(skb, &addr->sllc_sap); } /** * llc_sap_rtn_pdu - Informs upper layer on rx of an UI, XID or TEST pdu. * @sap: pointer to SAP * @skb: received pdu */ void llc_sap_rtn_pdu(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); struct llc_pdu_un *pdu = llc_pdu_un_hdr(skb); switch (LLC_U_PDU_RSP(pdu)) { case LLC_1_PDU_CMD_TEST: ev->prim = LLC_TEST_PRIM; break; case LLC_1_PDU_CMD_XID: ev->prim = LLC_XID_PRIM; break; case LLC_1_PDU_CMD_UI: ev->prim = LLC_DATAUNIT_PRIM; break; } ev->ind_cfm_flag = LLC_IND; } /** * llc_find_sap_trans - finds transition for event * @sap: pointer to SAP * @skb: happened event * * This function finds transition that matches with happened event. * Returns the pointer to found transition on success or %NULL for * failure. */ static const struct llc_sap_state_trans *llc_find_sap_trans(struct llc_sap *sap, struct sk_buff *skb) { int i = 0; const struct llc_sap_state_trans *rc = NULL; const struct llc_sap_state_trans **next_trans; struct llc_sap_state *curr_state = &llc_sap_state_table[sap->state - 1]; /* * Search thru events for this state until list exhausted or until * its obvious the event is not valid for the current state */ for (next_trans = curr_state->transitions; next_trans[i]->ev; i++) if (!next_trans[i]->ev(sap, skb)) { rc = next_trans[i]; /* got event match; return it */ break; } return rc; } /** * llc_exec_sap_trans_actions - execute actions related to event * @sap: pointer to SAP * @trans: pointer to transition that it's actions must be performed * @skb: happened event. * * This function executes actions that is related to happened event. * Returns 0 for success and 1 for failure of at least one action. */ static int llc_exec_sap_trans_actions(struct llc_sap *sap, const struct llc_sap_state_trans *trans, struct sk_buff *skb) { int rc = 0; const llc_sap_action_t *next_action = trans->ev_actions; for (; next_action && *next_action; next_action++) if ((*next_action)(sap, skb)) rc = 1; return rc; } /** * llc_sap_next_state - finds transition, execs actions & change SAP state * @sap: pointer to SAP * @skb: happened event * * This function finds transition that matches with happened event, then * executes related actions and finally changes state of SAP. It returns * 0 on success and 1 for failure. */ static int llc_sap_next_state(struct llc_sap *sap, struct sk_buff *skb) { const struct llc_sap_state_trans *trans; int rc = 1; if (sap->state > LLC_NR_SAP_STATES) goto out; trans = llc_find_sap_trans(sap, skb); if (!trans) goto out; /* * Got the state to which we next transition; perform the actions * associated with this transition before actually transitioning to the * next state */ rc = llc_exec_sap_trans_actions(sap, trans, skb); if (rc) goto out; /* * Transition SAP to next state if all actions execute successfully */ sap->state = trans->next_state; out: return rc; } /** * llc_sap_state_process - sends event to SAP state machine * @sap: sap to use * @skb: pointer to occurred event * * After executing actions of the event, upper layer will be indicated * if needed(on receiving an UI frame). sk can be null for the * datalink_proto case. * * This function always consumes a reference to the skb. */ static void llc_sap_state_process(struct llc_sap *sap, struct sk_buff *skb) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->ind_cfm_flag = 0; llc_sap_next_state(sap, skb); if (ev->ind_cfm_flag == LLC_IND && skb->sk->sk_state != TCP_LISTEN) { llc_save_primitive(skb->sk, skb, ev->prim); /* queue skb to the user. */ if (sock_queue_rcv_skb(skb->sk, skb) == 0) return; } kfree_skb(skb); } /** * llc_build_and_send_test_pkt - TEST interface for upper layers. * @sap: sap to use * @skb: packet to send * @dmac: destination mac address * @dsap: destination sap * * This function is called when upper layer wants to send a TEST pdu. * Returns 0 for success, 1 otherwise. */ void llc_build_and_send_test_pkt(struct llc_sap *sap, struct sk_buff *skb, u8 *dmac, u8 dsap) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->saddr.lsap = sap->laddr.lsap; ev->daddr.lsap = dsap; memcpy(ev->saddr.mac, skb->dev->dev_addr, IFHWADDRLEN); memcpy(ev->daddr.mac, dmac, IFHWADDRLEN); ev->type = LLC_SAP_EV_TYPE_PRIM; ev->prim = LLC_TEST_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; llc_sap_state_process(sap, skb); } /** * llc_build_and_send_xid_pkt - XID interface for upper layers * @sap: sap to use * @skb: packet to send * @dmac: destination mac address * @dsap: destination sap * * This function is called when upper layer wants to send a XID pdu. * Returns 0 for success, 1 otherwise. */ void llc_build_and_send_xid_pkt(struct llc_sap *sap, struct sk_buff *skb, u8 *dmac, u8 dsap) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->saddr.lsap = sap->laddr.lsap; ev->daddr.lsap = dsap; memcpy(ev->saddr.mac, skb->dev->dev_addr, IFHWADDRLEN); memcpy(ev->daddr.mac, dmac, IFHWADDRLEN); ev->type = LLC_SAP_EV_TYPE_PRIM; ev->prim = LLC_XID_PRIM; ev->prim_type = LLC_PRIM_TYPE_REQ; llc_sap_state_process(sap, skb); } /** * llc_sap_rcv - sends received pdus to the sap state machine * @sap: current sap component structure. * @skb: received frame. * @sk: socket to associate to frame * * Sends received pdus to the sap state machine. */ static void llc_sap_rcv(struct llc_sap *sap, struct sk_buff *skb, struct sock *sk) { struct llc_sap_state_ev *ev = llc_sap_ev(skb); ev->type = LLC_SAP_EV_TYPE_PDU; ev->reason = 0; skb_orphan(skb); sock_hold(sk); skb->sk = sk; skb->destructor = sock_efree; llc_sap_state_process(sap, skb); } static inline bool llc_dgram_match(const struct llc_sap *sap, const struct llc_addr *laddr, const struct sock *sk, const struct net *net) { struct llc_sock *llc = llc_sk(sk); return sk->sk_type == SOCK_DGRAM && net_eq(sock_net(sk), net) && llc->laddr.lsap == laddr->lsap && ether_addr_equal(llc->laddr.mac, laddr->mac); } /** * llc_lookup_dgram - Finds dgram socket for the local sap/mac * @sap: SAP * @laddr: address of local LLC (MAC + SAP) * @net: netns to look up a socket in * * Search socket list of the SAP and finds connection using the local * mac, and local sap. Returns pointer for socket found, %NULL otherwise. */ static struct sock *llc_lookup_dgram(struct llc_sap *sap, const struct llc_addr *laddr, const struct net *net) { struct sock *rc; struct hlist_nulls_node *node; int slot = llc_sk_laddr_hashfn(sap, laddr); struct hlist_nulls_head *laddr_hb = &sap->sk_laddr_hash[slot]; rcu_read_lock_bh(); again: sk_nulls_for_each_rcu(rc, node, laddr_hb) { if (llc_dgram_match(sap, laddr, rc, net)) { /* Extra checks required by SLAB_TYPESAFE_BY_RCU */ if (unlikely(!refcount_inc_not_zero(&rc->sk_refcnt))) goto again; if (unlikely(llc_sk(rc)->sap != sap || !llc_dgram_match(sap, laddr, rc, net))) { sock_put(rc); continue; } goto found; } } rc = NULL; /* * if the nulls value we got at the end of this lookup is * not the expected one, we must restart lookup. * We probably met an item that was moved to another chain. */ if (unlikely(get_nulls_value(node) != slot)) goto again; found: rcu_read_unlock_bh(); return rc; } static inline bool llc_mcast_match(const struct llc_sap *sap, const struct llc_addr *laddr, const struct sk_buff *skb, const struct sock *sk) { struct llc_sock *llc = llc_sk(sk); return sk->sk_type == SOCK_DGRAM && llc->laddr.lsap == laddr->lsap && llc->dev == skb->dev; } static void llc_do_mcast(struct llc_sap *sap, struct sk_buff *skb, struct sock **stack, int count) { struct sk_buff *skb1; int i; for (i = 0; i < count; i++) { skb1 = skb_clone(skb, GFP_ATOMIC); if (!skb1) { sock_put(stack[i]); continue; } llc_sap_rcv(sap, skb1, stack[i]); sock_put(stack[i]); } } /** * llc_sap_mcast - Deliver multicast PDU's to all matching datagram sockets. * @sap: SAP * @laddr: address of local LLC (MAC + SAP) * @skb: PDU to deliver * * Search socket list of the SAP and finds connections with same sap. * Deliver clone to each. */ static void llc_sap_mcast(struct llc_sap *sap, const struct llc_addr *laddr, struct sk_buff *skb) { int i = 0; struct sock *sk; struct sock *stack[256 / sizeof(struct sock *)]; struct llc_sock *llc; struct hlist_head *dev_hb = llc_sk_dev_hash(sap, skb->dev->ifindex); spin_lock_bh(&sap->sk_lock); hlist_for_each_entry(llc, dev_hb, dev_hash_node) { sk = &llc->sk; if (!llc_mcast_match(sap, laddr, skb, sk)) continue; sock_hold(sk); if (i < ARRAY_SIZE(stack)) stack[i++] = sk; else { llc_do_mcast(sap, skb, stack, i); i = 0; } } spin_unlock_bh(&sap->sk_lock); llc_do_mcast(sap, skb, stack, i); } void llc_sap_handler(struct llc_sap *sap, struct sk_buff *skb) { struct llc_addr laddr; llc_pdu_decode_da(skb, laddr.mac); llc_pdu_decode_dsap(skb, &laddr.lsap); if (is_multicast_ether_addr(laddr.mac)) { llc_sap_mcast(sap, &laddr, skb); kfree_skb(skb); } else { struct sock *sk = llc_lookup_dgram(sap, &laddr, dev_net(skb->dev)); if (sk) { llc_sap_rcv(sap, skb, sk); sock_put(sk); } else kfree_skb(skb); } }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_VMALLOC_H #define _LINUX_VMALLOC_H #include <linux/alloc_tag.h> #include <linux/sched.h> #include <linux/spinlock.h> #include <linux/init.h> #include <linux/list.h> #include <linux/llist.h> #include <asm/page.h> /* pgprot_t */ #include <linux/rbtree.h> #include <linux/overflow.h> #include <asm/vmalloc.h> struct vm_area_struct; /* vma defining user mapping in mm_types.h */ struct notifier_block; /* in notifier.h */ struct iov_iter; /* in uio.h */ /* bits in flags of vmalloc's vm_struct below */ #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ #define VM_ALLOC 0x00000002 /* vmalloc() */ #define VM_MAP 0x00000004 /* vmap()ed pages */ #define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ #define VM_DMA_COHERENT 0x00000010 /* dma_alloc_coherent */ #define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ #define VM_NO_GUARD 0x00000040 /* ***DANGEROUS*** don't add guard page */ #define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ #define VM_FLUSH_RESET_PERMS 0x00000100 /* reset direct map and flush TLB on unmap, can't be freed in atomic context */ #define VM_MAP_PUT_PAGES 0x00000200 /* put pages and free array in vfree */ #define VM_ALLOW_HUGE_VMAP 0x00000400 /* Allow for huge pages on archs with HAVE_ARCH_HUGE_VMALLOC */ #if (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) && \ !defined(CONFIG_KASAN_VMALLOC) #define VM_DEFER_KMEMLEAK 0x00000800 /* defer kmemleak object creation */ #else #define VM_DEFER_KMEMLEAK 0 #endif #define VM_SPARSE 0x00001000 /* sparse vm_area. not all pages are present. */ /* bits [20..32] reserved for arch specific ioremap internals */ /* * Maximum alignment for ioremap() regions. * Can be overridden by arch-specific value. */ #ifndef IOREMAP_MAX_ORDER #define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */ #endif struct vm_struct { struct vm_struct *next; void *addr; unsigned long size; unsigned long flags; struct page **pages; #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC unsigned int page_order; #endif unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; }; struct vmap_area { unsigned long va_start; unsigned long va_end; struct rb_node rb_node; /* address sorted rbtree */ struct list_head list; /* address sorted list */ /* * The following two variables can be packed, because * a vmap_area object can be either: * 1) in "free" tree (root is free_vmap_area_root) * 2) or "busy" tree (root is vmap_area_root) */ union { unsigned long subtree_max_size; /* in "free" tree */ struct vm_struct *vm; /* in "busy" tree */ }; unsigned long flags; /* mark type of vm_map_ram area */ }; /* archs that select HAVE_ARCH_HUGE_VMAP should override one or more of these */ #ifndef arch_vmap_p4d_supported static inline bool arch_vmap_p4d_supported(pgprot_t prot) { return false; } #endif #ifndef arch_vmap_pud_supported static inline bool arch_vmap_pud_supported(pgprot_t prot) { return false; } #endif #ifndef arch_vmap_pmd_supported static inline bool arch_vmap_pmd_supported(pgprot_t prot) { return false; } #endif #ifndef arch_vmap_pte_range_map_size static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end, u64 pfn, unsigned int max_page_shift) { return PAGE_SIZE; } #endif #ifndef arch_vmap_pte_supported_shift static inline int arch_vmap_pte_supported_shift(unsigned long size) { return PAGE_SHIFT; } #endif #ifndef arch_vmap_pgprot_tagged static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot) { return prot; } #endif /* * Highlevel APIs for driver use */ extern void vm_unmap_ram(const void *mem, unsigned int count); extern void *vm_map_ram(struct page **pages, unsigned int count, int node); extern void vm_unmap_aliases(void); #ifdef CONFIG_MMU extern unsigned long vmalloc_nr_pages(void); #else static inline unsigned long vmalloc_nr_pages(void) { return 0; } #endif extern void *vmalloc_noprof(unsigned long size) __alloc_size(1); #define vmalloc(...) alloc_hooks(vmalloc_noprof(__VA_ARGS__)) extern void *vzalloc_noprof(unsigned long size) __alloc_size(1); #define vzalloc(...) alloc_hooks(vzalloc_noprof(__VA_ARGS__)) extern void *vmalloc_user_noprof(unsigned long size) __alloc_size(1); #define vmalloc_user(...) alloc_hooks(vmalloc_user_noprof(__VA_ARGS__)) extern void *vmalloc_node_noprof(unsigned long size, int node) __alloc_size(1); #define vmalloc_node(...) alloc_hooks(vmalloc_node_noprof(__VA_ARGS__)) extern void *vzalloc_node_noprof(unsigned long size, int node) __alloc_size(1); #define vzalloc_node(...) alloc_hooks(vzalloc_node_noprof(__VA_ARGS__)) extern void *vmalloc_32_noprof(unsigned long size) __alloc_size(1); #define vmalloc_32(...) alloc_hooks(vmalloc_32_noprof(__VA_ARGS__)) extern void *vmalloc_32_user_noprof(unsigned long size) __alloc_size(1); #define vmalloc_32_user(...) alloc_hooks(vmalloc_32_user_noprof(__VA_ARGS__)) extern void *__vmalloc_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); #define __vmalloc(...) alloc_hooks(__vmalloc_noprof(__VA_ARGS__)) extern void *__vmalloc_node_range_noprof(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller) __alloc_size(1); #define __vmalloc_node_range(...) alloc_hooks(__vmalloc_node_range_noprof(__VA_ARGS__)) void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) __alloc_size(1); #define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__)) void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); #define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__)) extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__)) extern void *vmalloc_array_noprof(size_t n, size_t size) __alloc_size(1, 2); #define vmalloc_array(...) alloc_hooks(vmalloc_array_noprof(__VA_ARGS__)) extern void *__vcalloc_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vcalloc(...) alloc_hooks(__vcalloc_noprof(__VA_ARGS__)) extern void *vcalloc_noprof(size_t n, size_t size) __alloc_size(1, 2); #define vcalloc(...) alloc_hooks(vcalloc_noprof(__VA_ARGS__)) extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); extern void *vmap(struct page **pages, unsigned int count, unsigned long flags, pgprot_t prot); void *vmap_pfn(unsigned long *pfns, unsigned int count, pgprot_t prot); extern void vunmap(const void *addr); extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, unsigned long uaddr, void *kaddr, unsigned long pgoff, unsigned long size); extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); /* * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() * needs to be called. */ #ifndef ARCH_PAGE_TABLE_SYNC_MASK #define ARCH_PAGE_TABLE_SYNC_MASK 0 #endif /* * There is no default implementation for arch_sync_kernel_mappings(). It is * relied upon the compiler to optimize calls out if ARCH_PAGE_TABLE_SYNC_MASK * is 0. */ void arch_sync_kernel_mappings(unsigned long start, unsigned long end); /* * Lowlevel-APIs (not for driver use!) */ static inline size_t get_vm_area_size(const struct vm_struct *area) { if (!(area->flags & VM_NO_GUARD)) /* return actual size without guard page */ return area->size - PAGE_SIZE; else return area->size; } extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags); extern struct vm_struct *get_vm_area_caller(unsigned long size, unsigned long flags, const void *caller); extern struct vm_struct *__get_vm_area_caller(unsigned long size, unsigned long flags, unsigned long start, unsigned long end, const void *caller); void free_vm_area(struct vm_struct *area); extern struct vm_struct *remove_vm_area(const void *addr); extern struct vm_struct *find_vm_area(const void *addr); struct vmap_area *find_vmap_area(unsigned long addr); static inline bool is_vm_area_hugepages(const void *addr) { /* * This may not 100% tell if the area is mapped with > PAGE_SIZE * page table entries, if for some reason the architecture indicates * larger sizes are available but decides not to use them, nothing * prevents that. This only indicates the size of the physical page * allocated in the vmalloc layer. */ #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC return find_vm_area(addr)->page_order > 0; #else return false; #endif } #ifdef CONFIG_MMU int vm_area_map_pages(struct vm_struct *area, unsigned long start, unsigned long end, struct page **pages); void vm_area_unmap_pages(struct vm_struct *area, unsigned long start, unsigned long end); void vunmap_range(unsigned long addr, unsigned long end); static inline void set_vm_flush_reset_perms(void *addr) { struct vm_struct *vm = find_vm_area(addr); if (vm) vm->flags |= VM_FLUSH_RESET_PERMS; } #else static inline void set_vm_flush_reset_perms(void *addr) { } #endif /* for /proc/kcore */ extern long vread_iter(struct iov_iter *iter, const char *addr, size_t count); /* * Internals. Don't use.. */ extern __init void vm_area_add_early(struct vm_struct *vm); extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); #ifdef CONFIG_SMP # ifdef CONFIG_MMU struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); # else static inline struct vm_struct ** pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align) { return NULL; } static inline void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) { } # endif #endif #ifdef CONFIG_MMU #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) #else #define VMALLOC_TOTAL 0UL #endif int register_vmap_purge_notifier(struct notifier_block *nb); int unregister_vmap_purge_notifier(struct notifier_block *nb); #if defined(CONFIG_MMU) && defined(CONFIG_PRINTK) bool vmalloc_dump_obj(void *object); #else static inline bool vmalloc_dump_obj(void *object) { return false; } #endif #endif /* _LINUX_VMALLOC_H */
9 9 9 1 1 2 1 1 2 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 // SPDX-License-Identifier: GPL-2.0-only /* * vivid-radio-common.c - common radio rx/tx support functions. * * Copyright 2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. */ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/delay.h> #include <linux/videodev2.h> #include "vivid-core.h" #include "vivid-ctrls.h" #include "vivid-radio-common.h" #include "vivid-rds-gen.h" /* * These functions are shared between the vivid receiver and transmitter * since both use the same frequency bands. */ const struct v4l2_frequency_band vivid_radio_bands[TOT_BANDS] = { /* Band FM */ { .type = V4L2_TUNER_RADIO, .index = 0, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_STEREO | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = FM_FREQ_RANGE_LOW, .rangehigh = FM_FREQ_RANGE_HIGH, .modulation = V4L2_BAND_MODULATION_FM, }, /* Band AM */ { .type = V4L2_TUNER_RADIO, .index = 1, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = AM_FREQ_RANGE_LOW, .rangehigh = AM_FREQ_RANGE_HIGH, .modulation = V4L2_BAND_MODULATION_AM, }, /* Band SW */ { .type = V4L2_TUNER_RADIO, .index = 2, .capability = V4L2_TUNER_CAP_LOW | V4L2_TUNER_CAP_FREQ_BANDS, .rangelow = SW_FREQ_RANGE_LOW, .rangehigh = SW_FREQ_RANGE_HIGH, .modulation = V4L2_BAND_MODULATION_AM, }, }; /* * Initialize the RDS generator. If we can loop, then the RDS generator * is set up with the values from the RDS TX controls, otherwise it * will fill in standard values using one of two alternates. */ void vivid_radio_rds_init(struct vivid_dev *dev) { struct vivid_rds_gen *rds = &dev->rds_gen; bool alt = dev->radio_rx_rds_use_alternates; /* Do nothing, blocks will be filled by the transmitter */ if (dev->radio_rds_loop && !dev->radio_tx_rds_controls) return; if (dev->radio_rds_loop) { v4l2_ctrl_lock(dev->radio_tx_rds_pi); rds->picode = dev->radio_tx_rds_pi->cur.val; rds->pty = dev->radio_tx_rds_pty->cur.val; rds->mono_stereo = dev->radio_tx_rds_mono_stereo->cur.val; rds->art_head = dev->radio_tx_rds_art_head->cur.val; rds->compressed = dev->radio_tx_rds_compressed->cur.val; rds->dyn_pty = dev->radio_tx_rds_dyn_pty->cur.val; rds->ta = dev->radio_tx_rds_ta->cur.val; rds->tp = dev->radio_tx_rds_tp->cur.val; rds->ms = dev->radio_tx_rds_ms->cur.val; strscpy(rds->psname, dev->radio_tx_rds_psname->p_cur.p_char, sizeof(rds->psname)); strscpy(rds->radiotext, dev->radio_tx_rds_radiotext->p_cur.p_char + alt * 64, sizeof(rds->radiotext)); v4l2_ctrl_unlock(dev->radio_tx_rds_pi); } else { vivid_rds_gen_fill(rds, dev->radio_rx_freq, alt); } if (dev->radio_rx_rds_controls) { v4l2_ctrl_s_ctrl(dev->radio_rx_rds_pty, rds->pty); v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ta, rds->ta); v4l2_ctrl_s_ctrl(dev->radio_rx_rds_tp, rds->tp); v4l2_ctrl_s_ctrl(dev->radio_rx_rds_ms, rds->ms); v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_psname, rds->psname); v4l2_ctrl_s_ctrl_string(dev->radio_rx_rds_radiotext, rds->radiotext); if (!dev->radio_rds_loop) dev->radio_rx_rds_use_alternates = !dev->radio_rx_rds_use_alternates; } vivid_rds_generate(rds); } /* * Calculate the emulated signal quality taking into account the frequency * the transmitter is using. */ static void vivid_radio_calc_sig_qual(struct vivid_dev *dev) { int mod = 16000; int delta = 800; int sig_qual, sig_qual_tx = mod; /* * For SW and FM there is a channel every 1000 kHz, for AM there is one * every 100 kHz. */ if (dev->radio_rx_freq <= AM_FREQ_RANGE_HIGH) { mod /= 10; delta /= 10; } sig_qual = (dev->radio_rx_freq + delta) % mod - delta; if (dev->has_radio_tx) sig_qual_tx = dev->radio_rx_freq - dev->radio_tx_freq; if (abs(sig_qual_tx) <= abs(sig_qual)) { sig_qual = sig_qual_tx; /* * Zero the internal rds buffer if we are going to loop * rds blocks. */ if (!dev->radio_rds_loop && !dev->radio_tx_rds_controls) memset(dev->rds_gen.data, 0, sizeof(dev->rds_gen.data)); dev->radio_rds_loop = dev->radio_rx_freq >= FM_FREQ_RANGE_LOW; } else { dev->radio_rds_loop = false; } if (dev->radio_rx_freq <= AM_FREQ_RANGE_HIGH) sig_qual *= 10; dev->radio_rx_sig_qual = sig_qual; } int vivid_radio_g_frequency(struct file *file, const unsigned *pfreq, struct v4l2_frequency *vf) { if (vf->tuner != 0) return -EINVAL; vf->frequency = *pfreq; return 0; } int vivid_radio_s_frequency(struct file *file, unsigned *pfreq, const struct v4l2_frequency *vf) { struct vivid_dev *dev = video_drvdata(file); unsigned freq; unsigned band; if (vf->tuner != 0) return -EINVAL; if (vf->frequency >= (FM_FREQ_RANGE_LOW + SW_FREQ_RANGE_HIGH) / 2) band = BAND_FM; else if (vf->frequency <= (AM_FREQ_RANGE_HIGH + SW_FREQ_RANGE_LOW) / 2) band = BAND_AM; else band = BAND_SW; freq = clamp_t(u32, vf->frequency, vivid_radio_bands[band].rangelow, vivid_radio_bands[band].rangehigh); *pfreq = freq; /* * For both receiver and transmitter recalculate the signal quality * (since that depends on both frequencies) and re-init the rds * generator. */ vivid_radio_calc_sig_qual(dev); vivid_radio_rds_init(dev); return 0; }
913 914 3 911 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 // SPDX-License-Identifier: GPL-2.0 #include <linux/slab.h> #include <linux/kernel.h> #include <linux/bitops.h> #include <linux/cpumask.h> #include <linux/export.h> #include <linux/memblock.h> #include <linux/numa.h> /** * cpumask_next_wrap - helper to implement for_each_cpu_wrap * @n: the cpu prior to the place to search * @mask: the cpumask pointer * @start: the start point of the iteration * @wrap: assume @n crossing @start terminates the iteration * * Return: >= nr_cpu_ids on completion * * Note: the @wrap argument is required for the start condition when * we cannot assume @start is set in @mask. */ unsigned int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) { unsigned int next; again: next = cpumask_next(n, mask); if (wrap && n < start && next >= start) { return nr_cpumask_bits; } else if (next >= nr_cpumask_bits) { wrap = true; n = -1; goto again; } return next; } EXPORT_SYMBOL(cpumask_next_wrap); /* These are not inline because of header tangles. */ #ifdef CONFIG_CPUMASK_OFFSTACK /** * alloc_cpumask_var_node - allocate a struct cpumask on a given node * @mask: pointer to cpumask_var_t where the cpumask is returned * @flags: GFP_ flags * @node: memory node from which to allocate or %NUMA_NO_NODE * * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is * a nop returning a constant 1 (in <linux/cpumask.h>). * * Return: TRUE if memory allocation succeeded, FALSE otherwise. * * In addition, mask will be NULL if this fails. Note that gcc is * usually smart enough to know that mask can never be NULL if * CONFIG_CPUMASK_OFFSTACK=n, so does code elimination in that case * too. */ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) { *mask = kmalloc_node(cpumask_size(), flags, node); #ifdef CONFIG_DEBUG_PER_CPU_MAPS if (!*mask) { printk(KERN_ERR "=> alloc_cpumask_var: failed!\n"); dump_stack(); } #endif return *mask != NULL; } EXPORT_SYMBOL(alloc_cpumask_var_node); /** * alloc_bootmem_cpumask_var - allocate a struct cpumask from the bootmem arena. * @mask: pointer to cpumask_var_t where the cpumask is returned * * Only defined when CONFIG_CPUMASK_OFFSTACK=y, otherwise is * a nop (in <linux/cpumask.h>). * Either returns an allocated (zero-filled) cpumask, or causes the * system to panic. */ void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) { *mask = memblock_alloc(cpumask_size(), SMP_CACHE_BYTES); if (!*mask) panic("%s: Failed to allocate %u bytes\n", __func__, cpumask_size()); } /** * free_cpumask_var - frees memory allocated for a struct cpumask. * @mask: cpumask to free * * This is safe on a NULL mask. */ void free_cpumask_var(cpumask_var_t mask) { kfree(mask); } EXPORT_SYMBOL(free_cpumask_var); /** * free_bootmem_cpumask_var - frees result of alloc_bootmem_cpumask_var * @mask: cpumask to free */ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { memblock_free(mask, cpumask_size()); } #endif /** * cpumask_local_spread - select the i'th cpu based on NUMA distances * @i: index number * @node: local numa_node * * Return: online CPU according to a numa aware policy; local cpus are returned * first, followed by non-local ones, then it wraps around. * * For those who wants to enumerate all CPUs based on their NUMA distances, * i.e. call this function in a loop, like: * * for (i = 0; i < num_online_cpus(); i++) { * cpu = cpumask_local_spread(i, node); * do_something(cpu); * } * * There's a better alternative based on for_each()-like iterators: * * for_each_numa_hop_mask(mask, node) { * for_each_cpu_andnot(cpu, mask, prev) * do_something(cpu); * prev = mask; * } * * It's simpler and more verbose than above. Complexity of iterator-based * enumeration is O(sched_domains_numa_levels * nr_cpu_ids), while * cpumask_local_spread() when called for each cpu is * O(sched_domains_numa_levels * nr_cpu_ids * log(nr_cpu_ids)). */ unsigned int cpumask_local_spread(unsigned int i, int node) { unsigned int cpu; /* Wrap: we always want a cpu. */ i %= num_online_cpus(); cpu = sched_numa_find_nth_cpu(cpu_online_mask, i, node); WARN_ON(cpu >= nr_cpu_ids); return cpu; } EXPORT_SYMBOL(cpumask_local_spread); static DEFINE_PER_CPU(int, distribute_cpu_mask_prev); /** * cpumask_any_and_distribute - Return an arbitrary cpu within src1p & src2p. * @src1p: first &cpumask for intersection * @src2p: second &cpumask for intersection * * Iterated calls using the same srcp1 and srcp2 will be distributed within * their intersection. * * Return: >= nr_cpu_ids if the intersection is empty. */ unsigned int cpumask_any_and_distribute(const struct cpumask *src1p, const struct cpumask *src2p) { unsigned int next, prev; /* NOTE: our first selection will skip 0. */ prev = __this_cpu_read(distribute_cpu_mask_prev); next = find_next_and_bit_wrap(cpumask_bits(src1p), cpumask_bits(src2p), nr_cpumask_bits, prev + 1); if (next < nr_cpu_ids) __this_cpu_write(distribute_cpu_mask_prev, next); return next; } EXPORT_SYMBOL(cpumask_any_and_distribute); /** * cpumask_any_distribute - Return an arbitrary cpu from srcp * @srcp: &cpumask for selection * * Return: >= nr_cpu_ids if the intersection is empty. */ unsigned int cpumask_any_distribute(const struct cpumask *srcp) { unsigned int next, prev; /* NOTE: our first selection will skip 0. */ prev = __this_cpu_read(distribute_cpu_mask_prev); next = find_next_bit_wrap(cpumask_bits(srcp), nr_cpumask_bits, prev + 1); if (next < nr_cpu_ids) __this_cpu_write(distribute_cpu_mask_prev, next); return next; } EXPORT_SYMBOL(cpumask_any_distribute);
9 2 1 1 1 4 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> * * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include <asm/unaligned.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> struct nft_byteorder { u8 sreg; u8 dreg; enum nft_byteorder_ops op:8; u8 len; u8 size; }; void nft_byteorder_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_byteorder *priv = nft_expr_priv(expr); u32 *src = &regs->data[priv->sreg]; u32 *dst = &regs->data[priv->dreg]; u16 *s16, *d16; unsigned int i; s16 = (void *)src; d16 = (void *)dst; switch (priv->size) { case 8: { u64 *dst64 = (void *)dst; u64 src64; switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 8; i++) { src64 = nft_reg_load64(&src[i]); nft_reg_store64(&dst64[i], be64_to_cpu((__force __be64)src64)); } break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 8; i++) { src64 = (__force __u64) cpu_to_be64(nft_reg_load64(&src[i])); nft_reg_store64(&dst64[i], src64); } break; } break; } case 4: switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 4; i++) dst[i] = ntohl((__force __be32)src[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 4; i++) dst[i] = (__force __u32)htonl(src[i]); break; } break; case 2: switch (priv->op) { case NFT_BYTEORDER_NTOH: for (i = 0; i < priv->len / 2; i++) d16[i] = ntohs((__force __be16)s16[i]); break; case NFT_BYTEORDER_HTON: for (i = 0; i < priv->len / 2; i++) d16[i] = (__force __u16)htons(s16[i]); break; } break; } } static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, [NFTA_BYTEORDER_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BYTEORDER_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BYTEORDER_SIZE] = NLA_POLICY_MAX(NLA_BE32, 255), }; static int nft_byteorder_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) { struct nft_byteorder *priv = nft_expr_priv(expr); u32 size, len; int err; if (tb[NFTA_BYTEORDER_SREG] == NULL || tb[NFTA_BYTEORDER_DREG] == NULL || tb[NFTA_BYTEORDER_LEN] == NULL || tb[NFTA_BYTEORDER_SIZE] == NULL || tb[NFTA_BYTEORDER_OP] == NULL) return -EINVAL; priv->op = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_OP])); switch (priv->op) { case NFT_BYTEORDER_NTOH: case NFT_BYTEORDER_HTON: break; default: return -EINVAL; } err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size); if (err < 0) return err; priv->size = size; switch (priv->size) { case 2: case 4: case 8: break; default: return -EINVAL; } err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); if (err < 0) return err; priv->len = len; err = nft_parse_register_load(tb[NFTA_BYTEORDER_SREG], &priv->sreg, priv->len); if (err < 0) return err; return nft_parse_register_store(ctx, tb[NFTA_BYTEORDER_DREG], &priv->dreg, NULL, NFT_DATA_VALUE, priv->len); } static int nft_byteorder_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { const struct nft_byteorder *priv = nft_expr_priv(expr); if (nft_dump_register(skb, NFTA_BYTEORDER_SREG, priv->sreg)) goto nla_put_failure; if (nft_dump_register(skb, NFTA_BYTEORDER_DREG, priv->dreg)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_OP, htonl(priv->op))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_LEN, htonl(priv->len))) goto nla_put_failure; if (nla_put_be32(skb, NFTA_BYTEORDER_SIZE, htonl(priv->size))) goto nla_put_failure; return 0; nla_put_failure: return -1; } static bool nft_byteorder_reduce(struct nft_regs_track *track, const struct nft_expr *expr) { struct nft_byteorder *priv = nft_expr_priv(expr); nft_reg_track_cancel(track, priv->dreg, priv->len); return false; } static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, .reduce = nft_byteorder_reduce, }; struct nft_expr_type nft_byteorder_type __read_mostly = { .name = "byteorder", .ops = &nft_byteorder_ops, .policy = nft_byteorder_policy, .maxattr = NFTA_BYTEORDER_MAX, .owner = THIS_MODULE, };
1 3 3 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2007-2008 BalaBit IT Ltd. * Author: Krisztian Kovacs */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> #include <net/tcp.h> #include <net/udp.h> #include <net/icmp.h> #include <net/sock.h> #include <net/inet_sock.h> #include <net/netfilter/nf_socket.h> #if IS_ENABLED(CONFIG_NF_CONNTRACK) #include <net/netfilter/nf_conntrack.h> #endif static int extract_icmp4_fields(const struct sk_buff *skb, u8 *protocol, __be32 *raddr, __be32 *laddr, __be16 *rport, __be16 *lport) { unsigned int outside_hdrlen = ip_hdrlen(skb); struct iphdr *inside_iph, _inside_iph; struct icmphdr *icmph, _icmph; __be16 *ports, _ports[2]; icmph = skb_header_pointer(skb, outside_hdrlen, sizeof(_icmph), &_icmph); if (icmph == NULL) return 1; if (!icmp_is_err(icmph->type)) return 1; inside_iph = skb_header_pointer(skb, outside_hdrlen + sizeof(struct icmphdr), sizeof(_inside_iph), &_inside_iph); if (inside_iph == NULL) return 1; if (inside_iph->protocol != IPPROTO_TCP && inside_iph->protocol != IPPROTO_UDP) return 1; ports = skb_header_pointer(skb, outside_hdrlen + sizeof(struct icmphdr) + (inside_iph->ihl << 2), sizeof(_ports), &_ports); if (ports == NULL) return 1; /* the inside IP packet is the one quoted from our side, thus * its saddr is the local address */ *protocol = inside_iph->protocol; *laddr = inside_iph->saddr; *lport = ports[0]; *raddr = inside_iph->daddr; *rport = ports[1]; return 0; } static struct sock * nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, const struct net_device *in) { switch (protocol) { case IPPROTO_TCP: return inet_lookup(net, net->ipv4.tcp_death_row.hashinfo, skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: return udp4_lib_lookup(net, saddr, sport, daddr, dport, in->ifindex); } return NULL; } struct sock *nf_sk_lookup_slow_v4(struct net *net, const struct sk_buff *skb, const struct net_device *indev) { __be32 daddr, saddr; __be16 dport, sport; const struct iphdr *iph = ip_hdr(skb); struct sk_buff *data_skb = NULL; u8 protocol; #if IS_ENABLED(CONFIG_NF_CONNTRACK) enum ip_conntrack_info ctinfo; struct nf_conn const *ct; #endif int doff = 0; if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { struct tcphdr _hdr; struct udphdr *hp; hp = skb_header_pointer(skb, ip_hdrlen(skb), iph->protocol == IPPROTO_UDP ? sizeof(*hp) : sizeof(_hdr), &_hdr); if (hp == NULL) return NULL; protocol = iph->protocol; saddr = iph->saddr; sport = hp->source; daddr = iph->daddr; dport = hp->dest; data_skb = (struct sk_buff *)skb; doff = iph->protocol == IPPROTO_TCP ? ip_hdrlen(skb) + __tcp_hdrlen((struct tcphdr *)hp) : ip_hdrlen(skb) + sizeof(*hp); } else if (iph->protocol == IPPROTO_ICMP) { if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr, &sport, &dport)) return NULL; } else { return NULL; } #if IS_ENABLED(CONFIG_NF_CONNTRACK) /* Do the lookup with the original socket address in * case this is a reply packet of an established * SNAT-ted connection. */ ct = nf_ct_get(skb, &ctinfo); if (ct && ((iph->protocol != IPPROTO_ICMP && ctinfo == IP_CT_ESTABLISHED_REPLY) || (iph->protocol == IPPROTO_ICMP && ctinfo == IP_CT_RELATED_REPLY)) && (ct->status & IPS_SRC_NAT_DONE)) { daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; dport = (iph->protocol == IPPROTO_TCP) ? ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; } #endif return nf_socket_get_sock_v4(net, data_skb, doff, protocol, saddr, daddr, sport, dport, indev); } EXPORT_SYMBOL_GPL(nf_sk_lookup_slow_v4); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); MODULE_DESCRIPTION("Netfilter IPv4 socket lookup infrastructure");
1709 1750 1518 1517 1 1 1 4 4 4 2 12 1 3 6 2 1521 1527 1528 1530 14 1529 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/filesystems.c * * Copyright (C) 1991, 1992 Linus Torvalds * * table of configured filesystems */ #include <linux/syscalls.h> #include <linux/fs.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/kmod.h> #include <linux/init.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/uaccess.h> #include <linux/fs_parser.h> /* * Handling of filesystem drivers list. * Rules: * Inclusion to/removals from/scanning of list are protected by spinlock. * During the unload module must call unregister_filesystem(). * We can access the fields of list element if: * 1) spinlock is held or * 2) we hold the reference to the module. * The latter can be guaranteed by call of try_module_get(); if it * returned 0 we must skip the element, otherwise we got the reference. * Once the reference is obtained we can drop the spinlock. */ static struct file_system_type *file_systems; static DEFINE_RWLOCK(file_systems_lock); /* WARNING: This can be used only if we _already_ own a reference */ struct file_system_type *get_filesystem(struct file_system_type *fs) { __module_get(fs->owner); return fs; } void put_filesystem(struct file_system_type *fs) { module_put(fs->owner); } static struct file_system_type **find_filesystem(const char *name, unsigned len) { struct file_system_type **p; for (p = &file_systems; *p; p = &(*p)->next) if (strncmp((*p)->name, name, len) == 0 && !(*p)->name[len]) break; return p; } /** * register_filesystem - register a new filesystem * @fs: the file system structure * * Adds the file system passed to the list of file systems the kernel * is aware of for mount and other syscalls. Returns 0 on success, * or a negative errno code on an error. * * The &struct file_system_type that is passed is linked into the kernel * structures and must not be freed until the file system has been * unregistered. */ int register_filesystem(struct file_system_type * fs) { int res = 0; struct file_system_type ** p; if (fs->parameters && !fs_validate_description(fs->name, fs->parameters)) return -EINVAL; BUG_ON(strchr(fs->name, '.')); if (fs->next) return -EBUSY; write_lock(&file_systems_lock); p = find_filesystem(fs->name, strlen(fs->name)); if (*p) res = -EBUSY; else *p = fs; write_unlock(&file_systems_lock); return res; } EXPORT_SYMBOL(register_filesystem); /** * unregister_filesystem - unregister a file system * @fs: filesystem to unregister * * Remove a file system that was previously successfully registered * with the kernel. An error is returned if the file system is not found. * Zero is returned on a success. * * Once this function has returned the &struct file_system_type structure * may be freed or reused. */ int unregister_filesystem(struct file_system_type * fs) { struct file_system_type ** tmp; write_lock(&file_systems_lock); tmp = &file_systems; while (*tmp) { if (fs == *tmp) { *tmp = fs->next; fs->next = NULL; write_unlock(&file_systems_lock); synchronize_rcu(); return 0; } tmp = &(*tmp)->next; } write_unlock(&file_systems_lock); return -EINVAL; } EXPORT_SYMBOL(unregister_filesystem); #ifdef CONFIG_SYSFS_SYSCALL static int fs_index(const char __user * __name) { struct file_system_type * tmp; struct filename *name; int err, index; name = getname(__name); err = PTR_ERR(name); if (IS_ERR(name)) return err; err = -EINVAL; read_lock(&file_systems_lock); for (tmp=file_systems, index=0 ; tmp ; tmp=tmp->next, index++) { if (strcmp(tmp->name, name->name) == 0) { err = index; break; } } read_unlock(&file_systems_lock); putname(name); return err; } static int fs_name(unsigned int index, char __user * buf) { struct file_system_type * tmp; int len, res; read_lock(&file_systems_lock); for (tmp = file_systems; tmp; tmp = tmp->next, index--) if (index <= 0 && try_module_get(tmp->owner)) break; read_unlock(&file_systems_lock); if (!tmp) return -EINVAL; /* OK, we got the reference, so we can safely block */ len = strlen(tmp->name) + 1; res = copy_to_user(buf, tmp->name, len) ? -EFAULT : 0; put_filesystem(tmp); return res; } static int fs_maxindex(void) { struct file_system_type * tmp; int index; read_lock(&file_systems_lock); for (tmp = file_systems, index = 0 ; tmp ; tmp = tmp->next, index++) ; read_unlock(&file_systems_lock); return index; } /* * Whee.. Weird sysv syscall. */ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) { int retval = -EINVAL; switch (option) { case 1: retval = fs_index((const char __user *) arg1); break; case 2: retval = fs_name(arg1, (char __user *) arg2); break; case 3: retval = fs_maxindex(); break; } return retval; } #endif int __init list_bdev_fs_names(char *buf, size_t size) { struct file_system_type *p; size_t len; int count = 0; read_lock(&file_systems_lock); for (p = file_systems; p; p = p->next) { if (!(p->fs_flags & FS_REQUIRES_DEV)) continue; len = strlen(p->name) + 1; if (len > size) { pr_warn("%s: truncating file system list\n", __func__); break; } memcpy(buf, p->name, len); buf += len; size -= len; count++; } read_unlock(&file_systems_lock); return count; } #ifdef CONFIG_PROC_FS static int filesystems_proc_show(struct seq_file *m, void *v) { struct file_system_type * tmp; read_lock(&file_systems_lock); tmp = file_systems; while (tmp) { seq_printf(m, "%s\t%s\n", (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev", tmp->name); tmp = tmp->next; } read_unlock(&file_systems_lock); return 0; } static int __init proc_filesystems_init(void) { proc_create_single("filesystems", 0, NULL, filesystems_proc_show); return 0; } module_init(proc_filesystems_init); #endif static struct file_system_type *__get_fs_type(const char *name, int len) { struct file_system_type *fs; read_lock(&file_systems_lock); fs = *(find_filesystem(name, len)); if (fs && !try_module_get(fs->owner)) fs = NULL; read_unlock(&file_systems_lock); return fs; } struct file_system_type *get_fs_type(const char *name) { struct file_system_type *fs; const char *dot = strchr(name, '.'); int len = dot ? dot - name : strlen(name); fs = __get_fs_type(name, len); if (!fs && (request_module("fs-%.*s", len, name) == 0)) { fs = __get_fs_type(name, len); if (!fs) pr_warn_once("request_module fs-%.*s succeeded, but still no fs?\n", len, name); } if (dot && fs && !(fs->fs_flags & FS_HAS_SUBTYPE)) { put_filesystem(fs); fs = NULL; } return fs; } EXPORT_SYMBOL(get_fs_type);
2 42 2 2 2 2 4 3 30 28 20 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 // SPDX-License-Identifier: GPL-2.0 /* * rtc and date/time utility functions * * Copyright (C) 2005-06 Tower Technologies * Author: Alessandro Zummo <a.zummo@towertech.it> * * based on arch/arm/common/rtctime.c and other bits * * Author: Cassio Neri <cassio.neri@gmail.com> (rtc_time64_to_tm) */ #include <linux/export.h> #include <linux/rtc.h> static const unsigned char rtc_days_in_month[] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 }; static const unsigned short rtc_ydays[2][13] = { /* Normal years */ { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, /* Leap years */ { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } }; /* * The number of days in the month. */ int rtc_month_days(unsigned int month, unsigned int year) { return rtc_days_in_month[month] + (is_leap_year(year) && month == 1); } EXPORT_SYMBOL(rtc_month_days); /* * The number of days since January 1. (0 to 365) */ int rtc_year_days(unsigned int day, unsigned int month, unsigned int year) { return rtc_ydays[is_leap_year(year)][month] + day - 1; } EXPORT_SYMBOL(rtc_year_days); /** * rtc_time64_to_tm - converts time64_t to rtc_time. * * @time: The number of seconds since 01-01-1970 00:00:00. * (Must be positive.) * @tm: Pointer to the struct rtc_time. */ void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) { unsigned int secs; int days; u64 u64tmp; u32 u32tmp, udays, century, day_of_century, year_of_century, year, day_of_year, month, day; bool is_Jan_or_Feb, is_leap_year; /* time must be positive */ days = div_s64_rem(time, 86400, &secs); /* day of the week, 1970-01-01 was a Thursday */ tm->tm_wday = (days + 4) % 7; /* * The following algorithm is, basically, Proposition 6.3 of Neri * and Schneider [1]. In a few words: it works on the computational * (fictitious) calendar where the year starts in March, month = 2 * (*), and finishes in February, month = 13. This calendar is * mathematically convenient because the day of the year does not * depend on whether the year is leap or not. For instance: * * March 1st 0-th day of the year; * ... * April 1st 31-st day of the year; * ... * January 1st 306-th day of the year; (Important!) * ... * February 28th 364-th day of the year; * February 29th 365-th day of the year (if it exists). * * After having worked out the date in the computational calendar * (using just arithmetics) it's easy to convert it to the * corresponding date in the Gregorian calendar. * * [1] "Euclidean Affine Functions and Applications to Calendar * Algorithms". https://arxiv.org/abs/2102.06959 * * (*) The numbering of months follows rtc_time more closely and * thus, is slightly different from [1]. */ udays = ((u32) days) + 719468; u32tmp = 4 * udays + 3; century = u32tmp / 146097; day_of_century = u32tmp % 146097 / 4; u32tmp = 4 * day_of_century + 3; u64tmp = 2939745ULL * u32tmp; year_of_century = upper_32_bits(u64tmp); day_of_year = lower_32_bits(u64tmp) / 2939745 / 4; year = 100 * century + year_of_century; is_leap_year = year_of_century != 0 ? year_of_century % 4 == 0 : century % 4 == 0; u32tmp = 2141 * day_of_year + 132377; month = u32tmp >> 16; day = ((u16) u32tmp) / 2141; /* * Recall that January 01 is the 306-th day of the year in the * computational (not Gregorian) calendar. */ is_Jan_or_Feb = day_of_year >= 306; /* Converts to the Gregorian calendar. */ year = year + is_Jan_or_Feb; month = is_Jan_or_Feb ? month - 12 : month; day = day + 1; day_of_year = is_Jan_or_Feb ? day_of_year - 306 : day_of_year + 31 + 28 + is_leap_year; /* Converts to rtc_time's format. */ tm->tm_year = (int) (year - 1900); tm->tm_mon = (int) month; tm->tm_mday = (int) day; tm->tm_yday = (int) day_of_year + 1; tm->tm_hour = secs / 3600; secs -= tm->tm_hour * 3600; tm->tm_min = secs / 60; tm->tm_sec = secs - tm->tm_min * 60; tm->tm_isdst = 0; } EXPORT_SYMBOL(rtc_time64_to_tm); /* * Does the rtc_time represent a valid date/time? */ int rtc_valid_tm(struct rtc_time *tm) { if (tm->tm_year < 70 || tm->tm_year > (INT_MAX - 1900) || ((unsigned int)tm->tm_mon) >= 12 || tm->tm_mday < 1 || tm->tm_mday > rtc_month_days(tm->tm_mon, ((unsigned int)tm->tm_year + 1900)) || ((unsigned int)tm->tm_hour) >= 24 || ((unsigned int)tm->tm_min) >= 60 || ((unsigned int)tm->tm_sec) >= 60) return -EINVAL; return 0; } EXPORT_SYMBOL(rtc_valid_tm); /* * rtc_tm_to_time64 - Converts rtc_time to time64_t. * Convert Gregorian date to seconds since 01-01-1970 00:00:00. */ time64_t rtc_tm_to_time64(struct rtc_time *tm) { return mktime64(((unsigned int)tm->tm_year + 1900), tm->tm_mon + 1, tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec); } EXPORT_SYMBOL(rtc_tm_to_time64); /* * Convert rtc_time to ktime */ ktime_t rtc_tm_to_ktime(struct rtc_time tm) { return ktime_set(rtc_tm_to_time64(&tm), 0); } EXPORT_SYMBOL_GPL(rtc_tm_to_ktime); /* * Convert ktime to rtc_time */ struct rtc_time rtc_ktime_to_tm(ktime_t kt) { struct timespec64 ts; struct rtc_time ret; ts = ktime_to_timespec64(kt); /* Round up any ns */ if (ts.tv_nsec) ts.tv_sec++; rtc_time64_to_tm(ts.tv_sec, &ret); return ret; } EXPORT_SYMBOL_GPL(rtc_ktime_to_tm);
166 106 65 32 13 56 56 51 52 42 8 10 55 1 4 50 50 12 50 28 6 17 23 23 23 2 13 11 6 10 3 4 23 3 26 28 26 4 3 28 37 37 37 14 23 13 29 29 1 28 37 37 37 37 14 22 6 1 610 2 23 604 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 /* * Copyright (c) 2005 Voltaire Inc. All rights reserved. * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved. * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved. * Copyright (c) 2005 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include <linux/mutex.h> #include <linux/inetdevice.h> #include <linux/slab.h> #include <linux/workqueue.h> #include <net/arp.h> #include <net/neighbour.h> #include <net/route.h> #include <net/netevent.h> #include <net/ipv6_stubs.h> #include <net/ip6_route.h> #include <rdma/ib_addr.h> #include <rdma/ib_cache.h> #include <rdma/ib_sa.h> #include <rdma/ib.h> #include <rdma/rdma_netlink.h> #include <net/netlink.h> #include "core_priv.h" struct addr_req { struct list_head list; struct sockaddr_storage src_addr; struct sockaddr_storage dst_addr; struct rdma_dev_addr *addr; void *context; void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context); unsigned long timeout; struct delayed_work work; bool resolve_by_gid_attr; /* Consider gid attr in resolve phase */ int status; u32 seq; }; static atomic_t ib_nl_addr_request_seq = ATOMIC_INIT(0); static DEFINE_SPINLOCK(lock); static LIST_HEAD(req_list); static struct workqueue_struct *addr_wq; static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = { [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY, .len = sizeof(struct rdma_nla_ls_gid), .validation_type = NLA_VALIDATE_MIN, .min = sizeof(struct rdma_nla_ls_gid)}, }; static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh) { struct nlattr *tb[LS_NLA_TYPE_MAX] = {}; int ret; if (nlh->nlmsg_flags & RDMA_NL_LS_F_ERR) return false; ret = nla_parse_deprecated(tb, LS_NLA_TYPE_MAX - 1, nlmsg_data(nlh), nlmsg_len(nlh), ib_nl_addr_policy, NULL); if (ret) return false; return true; } static void ib_nl_process_good_ip_rsep(const struct nlmsghdr *nlh) { const struct nlattr *head, *curr; union ib_gid gid; struct addr_req *req; int len, rem; int found = 0; head = (const struct nlattr *)nlmsg_data(nlh); len = nlmsg_len(nlh); nla_for_each_attr(curr, head, len, rem) { if (curr->nla_type == LS_NLA_TYPE_DGID) memcpy(&gid, nla_data(curr), nla_len(curr)); } spin_lock_bh(&lock); list_for_each_entry(req, &req_list, list) { if (nlh->nlmsg_seq != req->seq) continue; /* We set the DGID part, the rest was set earlier */ rdma_addr_set_dgid(req->addr, &gid); req->status = 0; found = 1; break; } spin_unlock_bh(&lock); if (!found) pr_info("Couldn't find request waiting for DGID: %pI6\n", &gid); } int ib_nl_handle_ip_res_resp(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { if ((nlh->nlmsg_flags & NLM_F_REQUEST) || !(NETLINK_CB(skb).sk)) return -EPERM; if (ib_nl_is_good_ip_resp(nlh)) ib_nl_process_good_ip_rsep(nlh); return 0; } static int ib_nl_ip_send_msg(struct rdma_dev_addr *dev_addr, const void *daddr, u32 seq, u16 family) { struct sk_buff *skb = NULL; struct nlmsghdr *nlh; struct rdma_ls_ip_resolve_header *header; void *data; size_t size; int attrtype; int len; if (family == AF_INET) { size = sizeof(struct in_addr); attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV4; } else { size = sizeof(struct in6_addr); attrtype = RDMA_NLA_F_MANDATORY | LS_NLA_TYPE_IPV6; } len = nla_total_size(sizeof(size)); len += NLMSG_ALIGN(sizeof(*header)); skb = nlmsg_new(len, GFP_KERNEL); if (!skb) return -ENOMEM; data = ibnl_put_msg(skb, &nlh, seq, 0, RDMA_NL_LS, RDMA_NL_LS_OP_IP_RESOLVE, NLM_F_REQUEST); if (!data) { nlmsg_free(skb); return -ENODATA; } /* Construct the family header first */ header = skb_put(skb, NLMSG_ALIGN(sizeof(*header))); header->ifindex = dev_addr->bound_dev_if; nla_put(skb, attrtype, size, daddr); /* Repair the nlmsg header length */ nlmsg_end(skb, nlh); rdma_nl_multicast(&init_net, skb, RDMA_NL_GROUP_LS, GFP_KERNEL); /* Make the request retry, so when we get the response from userspace * we will have something. */ return -ENODATA; } int rdma_addr_size(const struct sockaddr *addr) { switch (addr->sa_family) { case AF_INET: return sizeof(struct sockaddr_in); case AF_INET6: return sizeof(struct sockaddr_in6); case AF_IB: return sizeof(struct sockaddr_ib); default: return 0; } } EXPORT_SYMBOL(rdma_addr_size); int rdma_addr_size_in6(struct sockaddr_in6 *addr) { int ret = rdma_addr_size((struct sockaddr *) addr); return ret <= sizeof(*addr) ? ret : 0; } EXPORT_SYMBOL(rdma_addr_size_in6); int rdma_addr_size_kss(struct __kernel_sockaddr_storage *addr) { int ret = rdma_addr_size((struct sockaddr *) addr); return ret <= sizeof(*addr) ? ret : 0; } EXPORT_SYMBOL(rdma_addr_size_kss); /** * rdma_copy_src_l2_addr - Copy netdevice source addresses * @dev_addr: Destination address pointer where to copy the addresses * @dev: Netdevice whose source addresses to copy * * rdma_copy_src_l2_addr() copies source addresses from the specified netdevice. * This includes unicast address, broadcast address, device type and * interface index. */ void rdma_copy_src_l2_addr(struct rdma_dev_addr *dev_addr, const struct net_device *dev) { dev_addr->dev_type = dev->type; memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN); memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN); dev_addr->bound_dev_if = dev->ifindex; } EXPORT_SYMBOL(rdma_copy_src_l2_addr); static struct net_device * rdma_find_ndev_for_src_ip_rcu(struct net *net, const struct sockaddr *src_in) { struct net_device *dev = NULL; int ret = -EADDRNOTAVAIL; switch (src_in->sa_family) { case AF_INET: dev = __ip_dev_find(net, ((const struct sockaddr_in *)src_in)->sin_addr.s_addr, false); if (dev) ret = 0; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: for_each_netdev_rcu(net, dev) { if (ipv6_chk_addr(net, &((const struct sockaddr_in6 *)src_in)->sin6_addr, dev, 1)) { ret = 0; break; } } break; #endif } return ret ? ERR_PTR(ret) : dev; } int rdma_translate_ip(const struct sockaddr *addr, struct rdma_dev_addr *dev_addr) { struct net_device *dev; if (dev_addr->bound_dev_if) { dev = dev_get_by_index(dev_addr->net, dev_addr->bound_dev_if); if (!dev) return -ENODEV; rdma_copy_src_l2_addr(dev_addr, dev); dev_put(dev); return 0; } rcu_read_lock(); dev = rdma_find_ndev_for_src_ip_rcu(dev_addr->net, addr); if (!IS_ERR(dev)) rdma_copy_src_l2_addr(dev_addr, dev); rcu_read_unlock(); return PTR_ERR_OR_ZERO(dev); } EXPORT_SYMBOL(rdma_translate_ip); static void set_timeout(struct addr_req *req, unsigned long time) { unsigned long delay; delay = time - jiffies; if ((long)delay < 0) delay = 0; mod_delayed_work(addr_wq, &req->work, delay); } static void queue_req(struct addr_req *req) { spin_lock_bh(&lock); list_add_tail(&req->list, &req_list); set_timeout(req, req->timeout); spin_unlock_bh(&lock); } static int ib_nl_fetch_ha(struct rdma_dev_addr *dev_addr, const void *daddr, u32 seq, u16 family) { if (!rdma_nl_chk_listeners(RDMA_NL_GROUP_LS)) return -EADDRNOTAVAIL; return ib_nl_ip_send_msg(dev_addr, daddr, seq, family); } static int dst_fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const void *daddr) { struct neighbour *n; int ret = 0; n = dst_neigh_lookup(dst, daddr); if (!n) return -ENODATA; if (!(n->nud_state & NUD_VALID)) { neigh_event_send(n, NULL); ret = -ENODATA; } else { neigh_ha_snapshot(dev_addr->dst_dev_addr, n, dst->dev); } neigh_release(n); return ret; } static bool has_gateway(const struct dst_entry *dst, sa_family_t family) { if (family == AF_INET) return dst_rtable(dst)->rt_uses_gateway; return dst_rt6_info(dst)->rt6i_flags & RTF_GATEWAY; } static int fetch_ha(const struct dst_entry *dst, struct rdma_dev_addr *dev_addr, const struct sockaddr *dst_in, u32 seq) { const struct sockaddr_in *dst_in4 = (const struct sockaddr_in *)dst_in; const struct sockaddr_in6 *dst_in6 = (const struct sockaddr_in6 *)dst_in; const void *daddr = (dst_in->sa_family == AF_INET) ? (const void *)&dst_in4->sin_addr.s_addr : (const void *)&dst_in6->sin6_addr; sa_family_t family = dst_in->sa_family; might_sleep(); /* If we have a gateway in IB mode then it must be an IB network */ if (has_gateway(dst, family) && dev_addr->network == RDMA_NETWORK_IB) return ib_nl_fetch_ha(dev_addr, daddr, seq, family); else return dst_fetch_ha(dst, dev_addr, daddr); } static int addr4_resolve(struct sockaddr *src_sock, const struct sockaddr *dst_sock, struct rdma_dev_addr *addr, struct rtable **prt) { struct sockaddr_in *src_in = (struct sockaddr_in *)src_sock; const struct sockaddr_in *dst_in = (const struct sockaddr_in *)dst_sock; __be32 src_ip = src_in->sin_addr.s_addr; __be32 dst_ip = dst_in->sin_addr.s_addr; struct rtable *rt; struct flowi4 fl4; int ret; memset(&fl4, 0, sizeof(fl4)); fl4.daddr = dst_ip; fl4.saddr = src_ip; fl4.flowi4_oif = addr->bound_dev_if; rt = ip_route_output_key(addr->net, &fl4); ret = PTR_ERR_OR_ZERO(rt); if (ret) return ret; src_in->sin_addr.s_addr = fl4.saddr; addr->hoplimit = ip4_dst_hoplimit(&rt->dst); *prt = rt; return 0; } #if IS_ENABLED(CONFIG_IPV6) static int addr6_resolve(struct sockaddr *src_sock, const struct sockaddr *dst_sock, struct rdma_dev_addr *addr, struct dst_entry **pdst) { struct sockaddr_in6 *src_in = (struct sockaddr_in6 *)src_sock; const struct sockaddr_in6 *dst_in = (const struct sockaddr_in6 *)dst_sock; struct flowi6 fl6; struct dst_entry *dst; memset(&fl6, 0, sizeof fl6); fl6.daddr = dst_in->sin6_addr; fl6.saddr = src_in->sin6_addr; fl6.flowi6_oif = addr->bound_dev_if; dst = ipv6_stub->ipv6_dst_lookup_flow(addr->net, NULL, &fl6, NULL); if (IS_ERR(dst)) return PTR_ERR(dst); if (ipv6_addr_any(&src_in->sin6_addr)) src_in->sin6_addr = fl6.saddr; addr->hoplimit = ip6_dst_hoplimit(dst); *pdst = dst; return 0; } #else static int addr6_resolve(struct sockaddr *src_sock, const struct sockaddr *dst_sock, struct rdma_dev_addr *addr, struct dst_entry **pdst) { return -EADDRNOTAVAIL; } #endif static int addr_resolve_neigh(const struct dst_entry *dst, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, unsigned int ndev_flags, u32 seq) { int ret = 0; if (ndev_flags & IFF_LOOPBACK) { memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN); } else { if (!(ndev_flags & IFF_NOARP)) { /* If the device doesn't do ARP internally */ ret = fetch_ha(dst, addr, dst_in, seq); } } return ret; } static int copy_src_l2_addr(struct rdma_dev_addr *dev_addr, const struct sockaddr *dst_in, const struct dst_entry *dst, const struct net_device *ndev) { int ret = 0; if (dst->dev->flags & IFF_LOOPBACK) ret = rdma_translate_ip(dst_in, dev_addr); else rdma_copy_src_l2_addr(dev_addr, dst->dev); /* * If there's a gateway and type of device not ARPHRD_INFINIBAND, * we're definitely in RoCE v2 (as RoCE v1 isn't routable) set the * network type accordingly. */ if (has_gateway(dst, dst_in->sa_family) && ndev->type != ARPHRD_INFINIBAND) dev_addr->network = dst_in->sa_family == AF_INET ? RDMA_NETWORK_IPV4 : RDMA_NETWORK_IPV6; else dev_addr->network = RDMA_NETWORK_IB; return ret; } static int rdma_set_src_addr_rcu(struct rdma_dev_addr *dev_addr, unsigned int *ndev_flags, const struct sockaddr *dst_in, const struct dst_entry *dst) { struct net_device *ndev = READ_ONCE(dst->dev); *ndev_flags = ndev->flags; /* A physical device must be the RDMA device to use */ if (ndev->flags & IFF_LOOPBACK) { /* * RDMA (IB/RoCE, iWarp) doesn't run on lo interface or * loopback IP address. So if route is resolved to loopback * interface, translate that to a real ndev based on non * loopback IP address. */ ndev = rdma_find_ndev_for_src_ip_rcu(dev_net(ndev), dst_in); if (IS_ERR(ndev)) return -ENODEV; } return copy_src_l2_addr(dev_addr, dst_in, dst, ndev); } static int set_addr_netns_by_gid_rcu(struct rdma_dev_addr *addr) { struct net_device *ndev; ndev = rdma_read_gid_attr_ndev_rcu(addr->sgid_attr); if (IS_ERR(ndev)) return PTR_ERR(ndev); /* * Since we are holding the rcu, reading net and ifindex * are safe without any additional reference; because * change_net_namespace() in net/core/dev.c does rcu sync * after it changes the state to IFF_DOWN and before * updating netdev fields {net, ifindex}. */ addr->net = dev_net(ndev); addr->bound_dev_if = ndev->ifindex; return 0; } static void rdma_addr_set_net_defaults(struct rdma_dev_addr *addr) { addr->net = &init_net; addr->bound_dev_if = 0; } static int addr_resolve(struct sockaddr *src_in, const struct sockaddr *dst_in, struct rdma_dev_addr *addr, bool resolve_neigh, bool resolve_by_gid_attr, u32 seq) { struct dst_entry *dst = NULL; unsigned int ndev_flags = 0; struct rtable *rt = NULL; int ret; if (!addr->net) { pr_warn_ratelimited("%s: missing namespace\n", __func__); return -EINVAL; } rcu_read_lock(); if (resolve_by_gid_attr) { if (!addr->sgid_attr) { rcu_read_unlock(); pr_warn_ratelimited("%s: missing gid_attr\n", __func__); return -EINVAL; } /* * If the request is for a specific gid attribute of the * rdma_dev_addr, derive net from the netdevice of the * GID attribute. */ ret = set_addr_netns_by_gid_rcu(addr); if (ret) { rcu_read_unlock(); return ret; } } if (src_in->sa_family == AF_INET) { ret = addr4_resolve(src_in, dst_in, addr, &rt); dst = &rt->dst; } else { ret = addr6_resolve(src_in, dst_in, addr, &dst); } if (ret) { rcu_read_unlock(); goto done; } ret = rdma_set_src_addr_rcu(addr, &ndev_flags, dst_in, dst); rcu_read_unlock(); /* * Resolve neighbor destination address if requested and * only if src addr translation didn't fail. */ if (!ret && resolve_neigh) ret = addr_resolve_neigh(dst, dst_in, addr, ndev_flags, seq); if (src_in->sa_family == AF_INET) ip_rt_put(rt); else dst_release(dst); done: /* * Clear the addr net to go back to its original state, only if it was * derived from GID attribute in this context. */ if (resolve_by_gid_attr) rdma_addr_set_net_defaults(addr); return ret; } static void process_one_req(struct work_struct *_work) { struct addr_req *req; struct sockaddr *src_in, *dst_in; req = container_of(_work, struct addr_req, work.work); if (req->status == -ENODATA) { src_in = (struct sockaddr *)&req->src_addr; dst_in = (struct sockaddr *)&req->dst_addr; req->status = addr_resolve(src_in, dst_in, req->addr, true, req->resolve_by_gid_attr, req->seq); if (req->status && time_after_eq(jiffies, req->timeout)) { req->status = -ETIMEDOUT; } else if (req->status == -ENODATA) { /* requeue the work for retrying again */ spin_lock_bh(&lock); if (!list_empty(&req->list)) set_timeout(req, req->timeout); spin_unlock_bh(&lock); return; } } req->callback(req->status, (struct sockaddr *)&req->src_addr, req->addr, req->context); req->callback = NULL; spin_lock_bh(&lock); /* * Although the work will normally have been canceled by the workqueue, * it can still be requeued as long as it is on the req_list. */ cancel_delayed_work(&req->work); if (!list_empty(&req->list)) { list_del_init(&req->list); kfree(req); } spin_unlock_bh(&lock); } int rdma_resolve_ip(struct sockaddr *src_addr, const struct sockaddr *dst_addr, struct rdma_dev_addr *addr, unsigned long timeout_ms, void (*callback)(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context), bool resolve_by_gid_attr, void *context) { struct sockaddr *src_in, *dst_in; struct addr_req *req; int ret = 0; req = kzalloc(sizeof *req, GFP_KERNEL); if (!req) return -ENOMEM; src_in = (struct sockaddr *) &req->src_addr; dst_in = (struct sockaddr *) &req->dst_addr; if (src_addr) { if (src_addr->sa_family != dst_addr->sa_family) { ret = -EINVAL; goto err; } memcpy(src_in, src_addr, rdma_addr_size(src_addr)); } else { src_in->sa_family = dst_addr->sa_family; } memcpy(dst_in, dst_addr, rdma_addr_size(dst_addr)); req->addr = addr; req->callback = callback; req->context = context; req->resolve_by_gid_attr = resolve_by_gid_attr; INIT_DELAYED_WORK(&req->work, process_one_req); req->seq = (u32)atomic_inc_return(&ib_nl_addr_request_seq); req->status = addr_resolve(src_in, dst_in, addr, true, req->resolve_by_gid_attr, req->seq); switch (req->status) { case 0: req->timeout = jiffies; queue_req(req); break; case -ENODATA: req->timeout = msecs_to_jiffies(timeout_ms) + jiffies; queue_req(req); break; default: ret = req->status; goto err; } return ret; err: kfree(req); return ret; } EXPORT_SYMBOL(rdma_resolve_ip); int roce_resolve_route_from_path(struct sa_path_rec *rec, const struct ib_gid_attr *attr) { union { struct sockaddr _sockaddr; struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid, dgid; struct rdma_dev_addr dev_addr = {}; int ret; might_sleep(); if (rec->roce.route_resolved) return 0; rdma_gid2ip((struct sockaddr *)&sgid, &rec->sgid); rdma_gid2ip((struct sockaddr *)&dgid, &rec->dgid); if (sgid._sockaddr.sa_family != dgid._sockaddr.sa_family) return -EINVAL; if (!attr || !attr->ndev) return -EINVAL; dev_addr.net = &init_net; dev_addr.sgid_attr = attr; ret = addr_resolve((struct sockaddr *)&sgid, (struct sockaddr *)&dgid, &dev_addr, false, true, 0); if (ret) return ret; if ((dev_addr.network == RDMA_NETWORK_IPV4 || dev_addr.network == RDMA_NETWORK_IPV6) && rec->rec_type != SA_PATH_REC_TYPE_ROCE_V2) return -EINVAL; rec->roce.route_resolved = true; return 0; } /** * rdma_addr_cancel - Cancel resolve ip request * @addr: Pointer to address structure given previously * during rdma_resolve_ip(). * rdma_addr_cancel() is synchronous function which cancels any pending * request if there is any. */ void rdma_addr_cancel(struct rdma_dev_addr *addr) { struct addr_req *req, *temp_req; struct addr_req *found = NULL; spin_lock_bh(&lock); list_for_each_entry_safe(req, temp_req, &req_list, list) { if (req->addr == addr) { /* * Removing from the list means we take ownership of * the req */ list_del_init(&req->list); found = req; break; } } spin_unlock_bh(&lock); if (!found) return; /* * sync canceling the work after removing it from the req_list * guarentees no work is running and none will be started. */ cancel_delayed_work_sync(&found->work); kfree(found); } EXPORT_SYMBOL(rdma_addr_cancel); struct resolve_cb_context { struct completion comp; int status; }; static void resolve_cb(int status, struct sockaddr *src_addr, struct rdma_dev_addr *addr, void *context) { ((struct resolve_cb_context *)context)->status = status; complete(&((struct resolve_cb_context *)context)->comp); } int rdma_addr_find_l2_eth_by_grh(const union ib_gid *sgid, const union ib_gid *dgid, u8 *dmac, const struct ib_gid_attr *sgid_attr, int *hoplimit) { struct rdma_dev_addr dev_addr; struct resolve_cb_context ctx; union { struct sockaddr_in _sockaddr_in; struct sockaddr_in6 _sockaddr_in6; } sgid_addr, dgid_addr; int ret; rdma_gid2ip((struct sockaddr *)&sgid_addr, sgid); rdma_gid2ip((struct sockaddr *)&dgid_addr, dgid); memset(&dev_addr, 0, sizeof(dev_addr)); dev_addr.net = &init_net; dev_addr.sgid_attr = sgid_attr; init_completion(&ctx.comp); ret = rdma_resolve_ip((struct sockaddr *)&sgid_addr, (struct sockaddr *)&dgid_addr, &dev_addr, 1000, resolve_cb, true, &ctx); if (ret) return ret; wait_for_completion(&ctx.comp); ret = ctx.status; if (ret) return ret; memcpy(dmac, dev_addr.dst_dev_addr, ETH_ALEN); *hoplimit = dev_addr.hoplimit; return 0; } static int netevent_callback(struct notifier_block *self, unsigned long event, void *ctx) { struct addr_req *req; if (event == NETEVENT_NEIGH_UPDATE) { struct neighbour *neigh = ctx; if (neigh->nud_state & NUD_VALID) { spin_lock_bh(&lock); list_for_each_entry(req, &req_list, list) set_timeout(req, jiffies); spin_unlock_bh(&lock); } } return 0; } static struct notifier_block nb = { .notifier_call = netevent_callback }; int addr_init(void) { addr_wq = alloc_ordered_workqueue("ib_addr", 0); if (!addr_wq) return -ENOMEM; register_netevent_notifier(&nb); return 0; } void addr_cleanup(void) { unregister_netevent_notifier(&nb); destroy_workqueue(addr_wq); WARN_ON(!list_empty(&req_list)); }
727 729 729 728 721 721 720 720 721 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 // SPDX-License-Identifier: GPL-2.0-only /* * Common framework for low-level network console, dump, and debugger code * * Sep 8 2003 Matt Mackall <mpm@selenic.com> * * based on the netconsole code from: * * Copyright (C) 2001 Ingo Molnar <mingo@redhat.com> * Copyright (C) 2002 Red Hat, Inc. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/moduleparam.h> #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/string.h> #include <linux/if_arp.h> #include <linux/inetdevice.h> #include <linux/inet.h> #include <linux/interrupt.h> #include <linux/netpoll.h> #include <linux/sched.h> #include <linux/delay.h> #include <linux/rcupdate.h> #include <linux/workqueue.h> #include <linux/slab.h> #include <linux/export.h> #include <linux/if_vlan.h> #include <net/tcp.h> #include <net/udp.h> #include <net/addrconf.h> #include <net/ndisc.h> #include <net/ip6_checksum.h> #include <asm/unaligned.h> #include <trace/events/napi.h> #include <linux/kconfig.h> /* * We maintain a small pool of fully-sized skbs, to make sure the * message gets out even in extreme OOM situations. */ #define MAX_UDP_CHUNK 1460 #define MAX_SKBS 32 static struct sk_buff_head skb_pool; DEFINE_STATIC_SRCU(netpoll_srcu); #define USEC_PER_POLL 50 #define MAX_SKB_SIZE \ (sizeof(struct ethhdr) + \ sizeof(struct iphdr) + \ sizeof(struct udphdr) + \ MAX_UDP_CHUNK) static void zap_completion_queue(void); static unsigned int carrier_timeout = 4; module_param(carrier_timeout, uint, 0644); #define np_info(np, fmt, ...) \ pr_info("%s: " fmt, np->name, ##__VA_ARGS__) #define np_err(np, fmt, ...) \ pr_err("%s: " fmt, np->name, ##__VA_ARGS__) #define np_notice(np, fmt, ...) \ pr_notice("%s: " fmt, np->name, ##__VA_ARGS__) static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq) { netdev_tx_t status = NETDEV_TX_OK; netdev_features_t features; features = netif_skb_features(skb); if (skb_vlan_tag_present(skb) && !vlan_hw_offload_capable(features, skb->vlan_proto)) { skb = __vlan_hwaccel_push_inside(skb); if (unlikely(!skb)) { /* This is actually a packet drop, but we * don't want the code that calls this * function to try and operate on a NULL skb. */ goto out; } } status = netdev_start_xmit(skb, dev, txq, false); out: return status; } static void queue_process(struct work_struct *work) { struct netpoll_info *npinfo = container_of(work, struct netpoll_info, tx_work.work); struct sk_buff *skb; unsigned long flags; while ((skb = skb_dequeue(&npinfo->txq))) { struct net_device *dev = skb->dev; struct netdev_queue *txq; unsigned int q_index; if (!netif_device_present(dev) || !netif_running(dev)) { kfree_skb(skb); continue; } local_irq_save(flags); /* check if skb->queue_mapping is still valid */ q_index = skb_get_queue_mapping(skb); if (unlikely(q_index >= dev->real_num_tx_queues)) { q_index = q_index % dev->real_num_tx_queues; skb_set_queue_mapping(skb, q_index); } txq = netdev_get_tx_queue(dev, q_index); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) { skb_queue_head(&npinfo->txq, skb); HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); schedule_delayed_work(&npinfo->tx_work, HZ/10); return; } HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); } } static int netif_local_xmit_active(struct net_device *dev) { int i; for (i = 0; i < dev->num_tx_queues; i++) { struct netdev_queue *txq = netdev_get_tx_queue(dev, i); if (READ_ONCE(txq->xmit_lock_owner) == smp_processor_id()) return 1; } return 0; } static void poll_one_napi(struct napi_struct *napi) { int work; /* If we set this bit but see that it has already been set, * that indicates that napi has been disabled and we need * to abort this operation */ if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state)) return; /* We explicilty pass the polling call a budget of 0 to * indicate that we are clearing the Tx path only. */ work = napi->poll(napi, 0); WARN_ONCE(work, "%pS exceeded budget in poll\n", napi->poll); trace_napi_poll(napi, work, 0); clear_bit(NAPI_STATE_NPSVC, &napi->state); } static void poll_napi(struct net_device *dev) { struct napi_struct *napi; int cpu = smp_processor_id(); list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) { if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) { poll_one_napi(napi); smp_store_release(&napi->poll_owner, -1); } } } void netpoll_poll_dev(struct net_device *dev) { struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo); const struct net_device_ops *ops; /* Don't do any rx activity if the dev_lock mutex is held * the dev_open/close paths use this to block netpoll activity * while changing device state */ if (!ni || down_trylock(&ni->dev_lock)) return; /* Some drivers will take the same locks in poll and xmit, * we can't poll if local CPU is already in xmit. */ if (!netif_running(dev) || netif_local_xmit_active(dev)) { up(&ni->dev_lock); return; } ops = dev->netdev_ops; if (ops->ndo_poll_controller) ops->ndo_poll_controller(dev); poll_napi(dev); up(&ni->dev_lock); zap_completion_queue(); } EXPORT_SYMBOL(netpoll_poll_dev); void netpoll_poll_disable(struct net_device *dev) { struct netpoll_info *ni; int idx; might_sleep(); idx = srcu_read_lock(&netpoll_srcu); ni = srcu_dereference(dev->npinfo, &netpoll_srcu); if (ni) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); } EXPORT_SYMBOL(netpoll_poll_disable); void netpoll_poll_enable(struct net_device *dev) { struct netpoll_info *ni; rcu_read_lock(); ni = rcu_dereference(dev->npinfo); if (ni) up(&ni->dev_lock); rcu_read_unlock(); } EXPORT_SYMBOL(netpoll_poll_enable); static void refill_skbs(void) { struct sk_buff *skb; unsigned long flags; spin_lock_irqsave(&skb_pool.lock, flags); while (skb_pool.qlen < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; __skb_queue_tail(&skb_pool, skb); } spin_unlock_irqrestore(&skb_pool.lock, flags); } static void zap_completion_queue(void) { unsigned long flags; struct softnet_data *sd = &get_cpu_var(softnet_data); if (sd->completion_queue) { struct sk_buff *clist; local_irq_save(flags); clist = sd->completion_queue; sd->completion_queue = NULL; local_irq_restore(flags); while (clist != NULL) { struct sk_buff *skb = clist; clist = clist->next; if (!skb_irq_freeable(skb)) { refcount_set(&skb->users, 1); dev_kfree_skb_any(skb); /* put this one back */ } else { __kfree_skb(skb); } } } put_cpu_var(softnet_data); } static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve) { int count = 0; struct sk_buff *skb; zap_completion_queue(); refill_skbs(); repeat: skb = alloc_skb(len, GFP_ATOMIC); if (!skb) skb = skb_dequeue(&skb_pool); if (!skb) { if (++count < 10) { netpoll_poll_dev(np->dev); goto repeat; } return NULL; } refcount_set(&skb->users, 1); skb_reserve(skb, reserve); return skb; } static int netpoll_owner_active(struct net_device *dev) { struct napi_struct *napi; list_for_each_entry_rcu(napi, &dev->napi_list, dev_list) { if (READ_ONCE(napi->poll_owner) == smp_processor_id()) return 1; } return 0; } /* call with IRQ disabled */ static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { netdev_tx_t status = NETDEV_TX_BUSY; struct net_device *dev; unsigned long tries; /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; lockdep_assert_irqs_disabled(); dev = np->dev; npinfo = rcu_dereference_bh(dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { dev_kfree_skb_irq(skb); return NET_XMIT_DROP; } /* don't get messages out of order, and no recursion */ if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) { struct netdev_queue *txq; txq = netdev_core_pick_tx(dev, skb, NULL); /* try until next clock tick */ for (tries = jiffies_to_usecs(1)/USEC_PER_POLL; tries > 0; --tries) { if (HARD_TX_TRYLOCK(dev, txq)) { if (!netif_xmit_stopped(txq)) status = netpoll_start_xmit(skb, dev, txq); HARD_TX_UNLOCK(dev, txq); if (dev_xmit_complete(status)) break; } /* tickle device maybe there is some cleanup */ netpoll_poll_dev(np->dev); udelay(USEC_PER_POLL); } WARN_ONCE(!irqs_disabled(), "netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pS)\n", dev->name, dev->netdev_ops->ndo_start_xmit); } if (!dev_xmit_complete(status)) { skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } return NETDEV_TX_OK; } netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) { unsigned long flags; netdev_tx_t ret; if (unlikely(!np)) { dev_kfree_skb_irq(skb); ret = NET_XMIT_DROP; } else { local_irq_save(flags); ret = __netpoll_send_skb(np, skb); local_irq_restore(flags); } return ret; } EXPORT_SYMBOL(netpoll_send_skb); void netpoll_send_udp(struct netpoll *np, const char *msg, int len) { int total_len, ip_len, udp_len; struct sk_buff *skb; struct udphdr *udph; struct iphdr *iph; struct ethhdr *eth; static atomic_t ip_ident; struct ipv6hdr *ip6h; if (!IS_ENABLED(CONFIG_PREEMPT_RT)) WARN_ON_ONCE(!irqs_disabled()); udp_len = len + sizeof(*udph); if (np->ipv6) ip_len = udp_len + sizeof(*ip6h); else ip_len = udp_len + sizeof(*iph); total_len = ip_len + LL_RESERVED_SPACE(np->dev); skb = find_skb(np, total_len + np->dev->needed_tailroom, total_len - len); if (!skb) return; skb_copy_to_linear_data(skb, msg, len); skb_put(skb, len); skb_push(skb, sizeof(*udph)); skb_reset_transport_header(skb); udph = udp_hdr(skb); udph->source = htons(np->local_port); udph->dest = htons(np->remote_port); udph->len = htons(udp_len); if (np->ipv6) { udph->check = 0; udph->check = csum_ipv6_magic(&np->local_ip.in6, &np->remote_ip.in6, udp_len, IPPROTO_UDP, csum_partial(udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); ip6h = ipv6_hdr(skb); /* ip6h->version = 6; ip6h->priority = 0; */ *(unsigned char *)ip6h = 0x60; ip6h->flow_lbl[0] = 0; ip6h->flow_lbl[1] = 0; ip6h->flow_lbl[2] = 0; ip6h->payload_len = htons(sizeof(struct udphdr) + len); ip6h->nexthdr = IPPROTO_UDP; ip6h->hop_limit = 32; ip6h->saddr = np->local_ip.in6; ip6h->daddr = np->remote_ip.in6; eth = skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IPV6); } else { udph->check = 0; udph->check = csum_tcpudp_magic(np->local_ip.ip, np->remote_ip.ip, udp_len, IPPROTO_UDP, csum_partial(udph, udp_len, 0)); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb_push(skb, sizeof(*iph)); skb_reset_network_header(skb); iph = ip_hdr(skb); /* iph->version = 4; iph->ihl = 5; */ *(unsigned char *)iph = 0x45; iph->tos = 0; put_unaligned(htons(ip_len), &(iph->tot_len)); iph->id = htons(atomic_inc_return(&ip_ident)); iph->frag_off = 0; iph->ttl = 64; iph->protocol = IPPROTO_UDP; iph->check = 0; put_unaligned(np->local_ip.ip, &(iph->saddr)); put_unaligned(np->remote_ip.ip, &(iph->daddr)); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); eth = skb_push(skb, ETH_HLEN); skb_reset_mac_header(skb); skb->protocol = eth->h_proto = htons(ETH_P_IP); } ether_addr_copy(eth->h_source, np->dev->dev_addr); ether_addr_copy(eth->h_dest, np->remote_mac); skb->dev = np->dev; netpoll_send_skb(np, skb); } EXPORT_SYMBOL(netpoll_send_udp); void netpoll_print_options(struct netpoll *np) { np_info(np, "local port %d\n", np->local_port); if (np->ipv6) np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6); else np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip); np_info(np, "interface '%s'\n", np->dev_name); np_info(np, "remote port %d\n", np->remote_port); if (np->ipv6) np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6); else np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip); np_info(np, "remote ethernet address %pM\n", np->remote_mac); } EXPORT_SYMBOL(netpoll_print_options); static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr) { const char *end; if (!strchr(str, ':') && in4_pton(str, -1, (void *)addr, -1, &end) > 0) { if (!*end) return 0; } if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) { #if IS_ENABLED(CONFIG_IPV6) if (!*end) return 1; #else return -1; #endif } return -1; } int netpoll_parse_options(struct netpoll *np, char *opt) { char *cur=opt, *delim; int ipv6; bool ipversion_set = false; if (*cur != '@') { if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; *delim = 0; if (kstrtou16(cur, 10, &np->local_port)) goto parse_failed; cur = delim; } cur++; if (*cur != '/') { ipversion_set = true; if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip); if (ipv6 < 0) goto parse_failed; else np->ipv6 = (bool)ipv6; cur = delim; } cur++; if (*cur != ',') { /* parse out dev name */ if ((delim = strchr(cur, ',')) == NULL) goto parse_failed; *delim = 0; strscpy(np->dev_name, cur, sizeof(np->dev_name)); cur = delim; } cur++; if (*cur != '@') { /* dst port */ if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; *delim = 0; if (*cur == ' ' || *cur == '\t') np_info(np, "warning: whitespace is not allowed\n"); if (kstrtou16(cur, 10, &np->remote_port)) goto parse_failed; cur = delim; } cur++; /* dst ip */ if ((delim = strchr(cur, '/')) == NULL) goto parse_failed; *delim = 0; ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip); if (ipv6 < 0) goto parse_failed; else if (ipversion_set && np->ipv6 != (bool)ipv6) goto parse_failed; else np->ipv6 = (bool)ipv6; cur = delim + 1; if (*cur != 0) { /* MAC address */ if (!mac_pton(cur, np->remote_mac)) goto parse_failed; } netpoll_print_options(np); return 0; parse_failed: np_info(np, "couldn't parse config at '%s'!\n", cur); return -1; } EXPORT_SYMBOL(netpoll_parse_options); int __netpoll_setup(struct netpoll *np, struct net_device *ndev) { struct netpoll_info *npinfo; const struct net_device_ops *ops; int err; np->dev = ndev; strscpy(np->dev_name, ndev->name, IFNAMSIZ); if (ndev->priv_flags & IFF_DISABLE_NETPOLL) { np_err(np, "%s doesn't support polling, aborting\n", np->dev_name); err = -ENOTSUPP; goto out; } if (!ndev->npinfo) { npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL); if (!npinfo) { err = -ENOMEM; goto out; } sema_init(&npinfo->dev_lock, 1); skb_queue_head_init(&npinfo->txq); INIT_DELAYED_WORK(&npinfo->tx_work, queue_process); refcount_set(&npinfo->refcnt, 1); ops = np->dev->netdev_ops; if (ops->ndo_netpoll_setup) { err = ops->ndo_netpoll_setup(ndev, npinfo); if (err) goto free_npinfo; } } else { npinfo = rtnl_dereference(ndev->npinfo); refcount_inc(&npinfo->refcnt); } npinfo->netpoll = np; /* last thing to do is link it to the net device structure */ rcu_assign_pointer(ndev->npinfo, npinfo); return 0; free_npinfo: kfree(npinfo); out: return err; } EXPORT_SYMBOL_GPL(__netpoll_setup); int netpoll_setup(struct netpoll *np) { struct net_device *ndev = NULL; struct in_device *in_dev; int err; rtnl_lock(); if (np->dev_name[0]) { struct net *net = current->nsproxy->net_ns; ndev = __dev_get_by_name(net, np->dev_name); } if (!ndev) { np_err(np, "%s doesn't exist, aborting\n", np->dev_name); err = -ENODEV; goto unlock; } netdev_hold(ndev, &np->dev_tracker, GFP_KERNEL); if (netdev_master_upper_dev_get(ndev)) { np_err(np, "%s is a slave device, aborting\n", np->dev_name); err = -EBUSY; goto put; } if (!netif_running(ndev)) { unsigned long atmost; np_info(np, "device %s not up yet, forcing it\n", np->dev_name); err = dev_open(ndev, NULL); if (err) { np_err(np, "failed to open %s\n", ndev->name); goto put; } rtnl_unlock(); atmost = jiffies + carrier_timeout * HZ; while (!netif_carrier_ok(ndev)) { if (time_after(jiffies, atmost)) { np_notice(np, "timeout waiting for carrier\n"); break; } msleep(1); } rtnl_lock(); } if (!np->local_ip.ip) { if (!np->ipv6) { const struct in_ifaddr *ifa; in_dev = __in_dev_get_rtnl(ndev); if (!in_dev) goto put_noaddr; ifa = rtnl_dereference(in_dev->ifa_list); if (!ifa) { put_noaddr: np_err(np, "no IP address for %s, aborting\n", np->dev_name); err = -EDESTADDRREQ; goto put; } np->local_ip.ip = ifa->ifa_local; np_info(np, "local IP %pI4\n", &np->local_ip.ip); } else { #if IS_ENABLED(CONFIG_IPV6) struct inet6_dev *idev; err = -EDESTADDRREQ; idev = __in6_dev_get(ndev); if (idev) { struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); list_for_each_entry(ifp, &idev->addr_list, if_list) { if (!!(ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL) != !!(ipv6_addr_type(&np->remote_ip.in6) & IPV6_ADDR_LINKLOCAL)) continue; np->local_ip.in6 = ifp->addr; err = 0; break; } read_unlock_bh(&idev->lock); } if (err) { np_err(np, "no IPv6 address for %s, aborting\n", np->dev_name); goto put; } else np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6); #else np_err(np, "IPv6 is not supported %s, aborting\n", np->dev_name); err = -EINVAL; goto put; #endif } } /* fill up the skb queue */ refill_skbs(); err = __netpoll_setup(np, ndev); if (err) goto put; rtnl_unlock(); return 0; put: netdev_put(ndev, &np->dev_tracker); unlock: rtnl_unlock(); return err; } EXPORT_SYMBOL(netpoll_setup); static int __init netpoll_init(void) { skb_queue_head_init(&skb_pool); return 0; } core_initcall(netpoll_init); static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head) { struct netpoll_info *npinfo = container_of(rcu_head, struct netpoll_info, rcu); skb_queue_purge(&npinfo->txq); /* we can't call cancel_delayed_work_sync here, as we are in softirq */ cancel_delayed_work(&npinfo->tx_work); /* clean after last, unfinished work */ __skb_queue_purge(&npinfo->txq); /* now cancel it again */ cancel_delayed_work(&npinfo->tx_work); kfree(npinfo); } void __netpoll_cleanup(struct netpoll *np) { struct netpoll_info *npinfo; npinfo = rtnl_dereference(np->dev->npinfo); if (!npinfo) return; synchronize_srcu(&netpoll_srcu); if (refcount_dec_and_test(&npinfo->refcnt)) { const struct net_device_ops *ops; ops = np->dev->netdev_ops; if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); RCU_INIT_POINTER(np->dev->npinfo, NULL); call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); } else RCU_INIT_POINTER(np->dev->npinfo, NULL); } EXPORT_SYMBOL_GPL(__netpoll_cleanup); void __netpoll_free(struct netpoll *np) { ASSERT_RTNL(); /* Wait for transmitting packets to finish before freeing. */ synchronize_rcu(); __netpoll_cleanup(np); kfree(np); } EXPORT_SYMBOL_GPL(__netpoll_free); void netpoll_cleanup(struct netpoll *np) { rtnl_lock(); if (!np->dev) goto out; __netpoll_cleanup(np); netdev_put(np->dev, &np->dev_tracker); np->dev = NULL; out: rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup);
277 129 123 118 118 8 105 219 207 198 193 15 180 347 129 219 87 87 87 87 328 6 6 170 169 169 101 100 57 57 57 41 21 57 20 231 410 101 101 101 93 12 563 565 563 187 565 72 1 72 72 72 54 49 49 72 72 17 80 21 13 8 59 15 20 2 18 19 15 8 9 1 5 4 8 17 55 55 11 11 6 55 55 40 40 18 18 18 14 14 71 72 72 53 19 7 13 13 5 72 72 10 71 72 72 4 5 69 115 20 44 22 23 3 632 434 123 20 2 62 38 344 71 2 2 37 3 21 21 31 20 28 31 60 72 75 73 1 27 3 32 10 22 18 18 34 18 71 75 4 72 10 40 35 22 72 75 75 22 72 30 21 25 8 3 13 37 28 8 21 6 9 21 8 3 5 18 12 10 1 6 3 4 3 1 8 1 22 10 6 14 25 13 2 22 22 22 5 11 10 14 25 25 25 1 14 14 26 26 26 23 3 18 7 13 15 12 26 21 19 3 1 1 8 63 305 305 305 305 305 18 305 28 27 28 284 283 124 71 191 3 284 284 191 553 553 552 552 554 1 1 549 284 110 191 553 554 16 541 110 110 110 3 26 85 110 121 121 120 121 57 14 44 44 44 56 57 13 13 49 49 49 6 43 49 2 1 48 3 3 3 3 3 3 3 3 24 41 112 112 44 24 24 41 41 19 72 41 112 112 41 72 75 16 59 75 339 196 156 128 28 112 112 41 108 7 69 239 170 42 34 75 75 4 31 2309 2312 1911 26 2 38 38 2246 239 77 4 75 43 32 83 85 84 56 2264 2262 2231 2227 129 200 320 454 2192 31 2144 99 31 18 2 19 83 193 174 34 49 162 252 245 38 96 628 256 376 244 444 394 92 92 328 327 328 327 326 328 327 119 9 321 327 237 237 231 108 10 6 110 6 110 108 104 6 20 2 18 92 31 32 86 86 86 85 75 74 22 22 4 1 1 1 1 12 3 3 14 24 7 19 3 6 4 5 3 3 4 24 24 1 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 // SPDX-License-Identifier: GPL-2.0-only /* * xfrm_policy.c * * Changes: * Mitsuru KANDA @USAGI * Kazunori MIYAZAWA @USAGI * Kunihiro Ishiguro <kunihiro@ipinfusion.com> * IPv6 support * Kazunori MIYAZAWA @USAGI * YOSHIFUJI Hideaki * Split up af-specific portion * Derek Atkins <derek@ihtfp.com> Add the post_input processor * */ #include <linux/err.h> #include <linux/slab.h> #include <linux/kmod.h> #include <linux/list.h> #include <linux/spinlock.h> #include <linux/workqueue.h> #include <linux/notifier.h> #include <linux/netdevice.h> #include <linux/netfilter.h> #include <linux/module.h> #include <linux/cache.h> #include <linux/cpu.h> #include <linux/audit.h> #include <linux/rhashtable.h> #include <linux/if_tunnel.h> #include <linux/icmp.h> #include <net/dst.h> #include <net/flow.h> #include <net/inet_ecn.h> #include <net/xfrm.h> #include <net/ip.h> #include <net/gre.h> #if IS_ENABLED(CONFIG_IPV6_MIP6) #include <net/mip6.h> #endif #ifdef CONFIG_XFRM_STATISTICS #include <net/snmp.h> #endif #ifdef CONFIG_XFRM_ESPINTCP #include <net/espintcp.h> #endif #include "xfrm_hash.h" #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10)) #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) #define XFRM_MAX_QUEUE_LEN 100 struct xfrm_flo { struct dst_entry *dst_orig; u8 flags; }; /* prefixes smaller than this are stored in lists, not trees. */ #define INEXACT_PREFIXLEN_IPV4 16 #define INEXACT_PREFIXLEN_IPV6 48 struct xfrm_pol_inexact_node { struct rb_node node; union { xfrm_address_t addr; struct rcu_head rcu; }; u8 prefixlen; struct rb_root root; /* the policies matching this node, can be empty list */ struct hlist_head hhead; }; /* xfrm inexact policy search tree: * xfrm_pol_inexact_bin = hash(dir,type,family,if_id); * | * +---- root_d: sorted by daddr:prefix * | | * | xfrm_pol_inexact_node * | | * | +- root: sorted by saddr/prefix * | | | * | | xfrm_pol_inexact_node * | | | * | | + root: unused * | | | * | | + hhead: saddr:daddr policies * | | * | +- coarse policies and all any:daddr policies * | * +---- root_s: sorted by saddr:prefix * | | * | xfrm_pol_inexact_node * | | * | + root: unused * | | * | + hhead: saddr:any policies * | * +---- coarse policies and all any:any policies * * Lookups return four candidate lists: * 1. any:any list from top-level xfrm_pol_inexact_bin * 2. any:daddr list from daddr tree * 3. saddr:daddr list from 2nd level daddr tree * 4. saddr:any list from saddr tree * * This result set then needs to be searched for the policy with * the lowest priority. If two results have same prio, youngest one wins. */ struct xfrm_pol_inexact_key { possible_net_t net; u32 if_id; u16 family; u8 dir, type; }; struct xfrm_pol_inexact_bin { struct xfrm_pol_inexact_key k; struct rhash_head head; /* list containing '*:*' policies */ struct hlist_head hhead; seqcount_spinlock_t count; /* tree sorted by daddr/prefix */ struct rb_root root_d; /* tree sorted by saddr/prefix */ struct rb_root root_s; /* slow path below */ struct list_head inexact_bins; struct rcu_head rcu; }; enum xfrm_pol_inexact_candidate_type { XFRM_POL_CAND_BOTH, XFRM_POL_CAND_SADDR, XFRM_POL_CAND_DADDR, XFRM_POL_CAND_ANY, XFRM_POL_CAND_MAX, }; struct xfrm_pol_inexact_candidates { struct hlist_head *res[XFRM_POL_CAND_MAX]; }; struct xfrm_flow_keys { struct flow_dissector_key_basic basic; struct flow_dissector_key_control control; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; } addrs; struct flow_dissector_key_ip ip; struct flow_dissector_key_icmp icmp; struct flow_dissector_key_ports ports; struct flow_dissector_key_keyid gre; }; static struct flow_dissector xfrm_session_dissector __ro_after_init; static DEFINE_SPINLOCK(xfrm_if_cb_lock); static struct xfrm_if_cb const __rcu *xfrm_if_cb __read_mostly; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; static struct kmem_cache *xfrm_dst_cache __ro_after_init; static struct rhashtable xfrm_policy_inexact_table; static const struct rhashtable_params xfrm_pol_inexact_params; static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); static void xfrm_policy_queue_process(struct timer_list *t); static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); static struct xfrm_pol_inexact_bin * xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family, u8 dir, u32 if_id); static struct xfrm_pol_inexact_bin * xfrm_policy_inexact_lookup_rcu(struct net *net, u8 type, u16 family, u8 dir, u32 if_id); static struct xfrm_policy * xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl); static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, struct xfrm_policy *policy); static bool xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand, struct xfrm_pol_inexact_bin *b, const xfrm_address_t *saddr, const xfrm_address_t *daddr); static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy) { return refcount_inc_not_zero(&policy->refcnt); } static inline bool __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { const struct flowi4 *fl4 = &fl->u.ip4; return addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) && addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) && !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) && !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) && (fl4->flowi4_proto == sel->proto || !sel->proto) && (fl4->flowi4_oif == sel->ifindex || !sel->ifindex); } static inline bool __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { const struct flowi6 *fl6 = &fl->u.ip6; return addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) && addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) && !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) && !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) && (fl6->flowi6_proto == sel->proto || !sel->proto) && (fl6->flowi6_oif == sel->ifindex || !sel->ifindex); } bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl, unsigned short family) { switch (family) { case AF_INET: return __xfrm4_selector_match(sel, fl); case AF_INET6: return __xfrm6_selector_match(sel, fl); } return false; } static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family) { const struct xfrm_policy_afinfo *afinfo; if (unlikely(family >= ARRAY_SIZE(xfrm_policy_afinfo))) return NULL; rcu_read_lock(); afinfo = rcu_dereference(xfrm_policy_afinfo[family]); if (unlikely(!afinfo)) rcu_read_unlock(); return afinfo; } /* Called with rcu_read_lock(). */ static const struct xfrm_if_cb *xfrm_if_get_cb(void) { return rcu_dereference(xfrm_if_cb); } struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, int family, u32 mark) { const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark); rcu_read_unlock(); return dst; } EXPORT_SYMBOL(__xfrm_dst_lookup); static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, int oif, xfrm_address_t *prev_saddr, xfrm_address_t *prev_daddr, int family, u32 mark) { struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; xfrm_address_t *daddr = &x->id.daddr; struct dst_entry *dst; if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) { saddr = x->coaddr; daddr = prev_daddr; } if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) { saddr = prev_saddr; daddr = x->coaddr; } dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark); if (!IS_ERR(dst)) { if (prev_saddr != saddr) memcpy(prev_saddr, saddr, sizeof(*prev_saddr)); if (prev_daddr != daddr) memcpy(prev_daddr, daddr, sizeof(*prev_daddr)); } return dst; } static inline unsigned long make_jiffies(long secs) { if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ) return MAX_SCHEDULE_TIMEOUT-1; else return secs*HZ; } static void xfrm_policy_timer(struct timer_list *t) { struct xfrm_policy *xp = from_timer(xp, t, timer); time64_t now = ktime_get_real_seconds(); time64_t next = TIME64_MAX; int warn = 0; int dir; read_lock(&xp->lock); if (unlikely(xp->walk.dead)) goto out; dir = xfrm_policy_id2dir(xp->index); if (xp->lft.hard_add_expires_seconds) { time64_t tmo = xp->lft.hard_add_expires_seconds + xp->curlft.add_time - now; if (tmo <= 0) goto expired; if (tmo < next) next = tmo; } if (xp->lft.hard_use_expires_seconds) { time64_t tmo = xp->lft.hard_use_expires_seconds + (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now; if (tmo <= 0) goto expired; if (tmo < next) next = tmo; } if (xp->lft.soft_add_expires_seconds) { time64_t tmo = xp->lft.soft_add_expires_seconds + xp->curlft.add_time - now; if (tmo <= 0) { warn = 1; tmo = XFRM_KM_TIMEOUT; } if (tmo < next) next = tmo; } if (xp->lft.soft_use_expires_seconds) { time64_t tmo = xp->lft.soft_use_expires_seconds + (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now; if (tmo <= 0) { warn = 1; tmo = XFRM_KM_TIMEOUT; } if (tmo < next) next = tmo; } if (warn) km_policy_expired(xp, dir, 0, 0); if (next != TIME64_MAX && !mod_timer(&xp->timer, jiffies + make_jiffies(next))) xfrm_pol_hold(xp); out: read_unlock(&xp->lock); xfrm_pol_put(xp); return; expired: read_unlock(&xp->lock); if (!xfrm_policy_delete(xp, dir)) km_policy_expired(xp, dir, 1, 0); xfrm_pol_put(xp); } /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 * SPD calls. */ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) { struct xfrm_policy *policy; policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { write_pnet(&policy->xp_net, net); INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst_inexact_list); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); refcount_set(&policy->refcnt, 1); skb_queue_head_init(&policy->polq.hold_queue); timer_setup(&policy->timer, xfrm_policy_timer, 0); timer_setup(&policy->polq.hold_timer, xfrm_policy_queue_process, 0); } return policy; } EXPORT_SYMBOL(xfrm_policy_alloc); static void xfrm_policy_destroy_rcu(struct rcu_head *head) { struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu); security_xfrm_policy_free(policy->security); kfree(policy); } /* Destroy xfrm_policy: descendant resources must be released to this moment. */ void xfrm_policy_destroy(struct xfrm_policy *policy) { BUG_ON(!policy->walk.dead); if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer)) BUG(); xfrm_dev_policy_free(policy); call_rcu(&policy->rcu, xfrm_policy_destroy_rcu); } EXPORT_SYMBOL(xfrm_policy_destroy); /* Rule must be locked. Release descendant resources, announce * entry dead. The rule must be unlinked from lists to the moment. */ static void xfrm_policy_kill(struct xfrm_policy *policy) { xfrm_dev_policy_delete(policy); write_lock_bh(&policy->lock); policy->walk.dead = 1; write_unlock_bh(&policy->lock); atomic_inc(&policy->genid); if (del_timer(&policy->polq.hold_timer)) xfrm_pol_put(policy); skb_queue_purge(&policy->polq.hold_queue); if (del_timer(&policy->timer)) xfrm_pol_put(policy); xfrm_pol_put(policy); } static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; static inline unsigned int idx_hash(struct net *net, u32 index) { return __idx_hash(index, net->xfrm.policy_idx_hmask); } /* calculate policy hash thresholds */ static void __get_hash_thresh(struct net *net, unsigned short family, int dir, u8 *dbits, u8 *sbits) { switch (family) { case AF_INET: *dbits = net->xfrm.policy_bydst[dir].dbits4; *sbits = net->xfrm.policy_bydst[dir].sbits4; break; case AF_INET6: *dbits = net->xfrm.policy_bydst[dir].dbits6; *sbits = net->xfrm.policy_bydst[dir].sbits6; break; default: *dbits = 0; *sbits = 0; } } static struct hlist_head *policy_hash_bysel(struct net *net, const struct xfrm_selector *sel, unsigned short family, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash; u8 dbits; u8 sbits; __get_hash_thresh(net, family, dir, &dbits, &sbits); hash = __sel_hash(sel, family, hmask, dbits, sbits); if (hash == hmask + 1) return NULL; return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; } static struct hlist_head *policy_hash_direct(struct net *net, const xfrm_address_t *daddr, const xfrm_address_t *saddr, unsigned short family, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int hash; u8 dbits; u8 sbits; __get_hash_thresh(net, family, dir, &dbits, &sbits); hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits); return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; } static void xfrm_dst_hash_transfer(struct net *net, struct hlist_head *list, struct hlist_head *ndsttable, unsigned int nhashmask, int dir) { struct hlist_node *tmp, *entry0 = NULL; struct xfrm_policy *pol; unsigned int h0 = 0; u8 dbits; u8 sbits; redo: hlist_for_each_entry_safe(pol, tmp, list, bydst) { unsigned int h; __get_hash_thresh(net, pol->family, dir, &dbits, &sbits); h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, pol->family, nhashmask, dbits, sbits); if (!entry0 || pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { hlist_del_rcu(&pol->bydst); hlist_add_head_rcu(&pol->bydst, ndsttable + h); h0 = h; } else { if (h != h0) continue; hlist_del_rcu(&pol->bydst); hlist_add_behind_rcu(&pol->bydst, entry0); } entry0 = &pol->bydst; } if (!hlist_empty(list)) { entry0 = NULL; goto redo; } } static void xfrm_idx_hash_transfer(struct hlist_head *list, struct hlist_head *nidxtable, unsigned int nhashmask) { struct hlist_node *tmp; struct xfrm_policy *pol; hlist_for_each_entry_safe(pol, tmp, list, byidx) { unsigned int h; h = __idx_hash(pol->index, nhashmask); hlist_add_head(&pol->byidx, nidxtable+h); } } static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) { return ((old_hmask + 1) << 1) - 1; } static void xfrm_bydst_resize(struct net *net, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); struct hlist_head *ndst = xfrm_hash_alloc(nsize); struct hlist_head *odst; int i; if (!ndst) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, lockdep_is_held(&net->xfrm.xfrm_policy_lock)); for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir); rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); synchronize_rcu(); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } static void xfrm_byidx_resize(struct net *net) { unsigned int hmask = net->xfrm.policy_idx_hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); struct hlist_head *oidx = net->xfrm.policy_byidx; struct hlist_head *nidx = xfrm_hash_alloc(nsize); int i; if (!nidx) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); net->xfrm.policy_byidx = nidx; net->xfrm.policy_idx_hmask = nhashmask; spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total) { unsigned int cnt = net->xfrm.policy_count[dir]; unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; if (total) *total += cnt; if ((hmask + 1) < xfrm_policy_hashmax && cnt > hmask) return 1; return 0; } static inline int xfrm_byidx_should_resize(struct net *net, int total) { unsigned int hmask = net->xfrm.policy_idx_hmask; if ((hmask + 1) < xfrm_policy_hashmax && total > hmask) return 1; return 0; } void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX]; si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX]; si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; } EXPORT_SYMBOL(xfrm_spd_getinfo); static DEFINE_MUTEX(hash_resize_mutex); static void xfrm_hash_resize(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.policy_hash_work); int dir, total; mutex_lock(&hash_resize_mutex); total = 0; for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { if (xfrm_bydst_should_resize(net, dir, &total)) xfrm_bydst_resize(net, dir); } if (xfrm_byidx_should_resize(net, total)) xfrm_byidx_resize(net); mutex_unlock(&hash_resize_mutex); } /* Make sure *pol can be inserted into fastbin. * Useful to check that later insert requests will be successful * (provided xfrm_policy_lock is held throughout). */ static struct xfrm_pol_inexact_bin * xfrm_policy_inexact_alloc_bin(const struct xfrm_policy *pol, u8 dir) { struct xfrm_pol_inexact_bin *bin, *prev; struct xfrm_pol_inexact_key k = { .family = pol->family, .type = pol->type, .dir = dir, .if_id = pol->if_id, }; struct net *net = xp_net(pol); lockdep_assert_held(&net->xfrm.xfrm_policy_lock); write_pnet(&k.net, net); bin = rhashtable_lookup_fast(&xfrm_policy_inexact_table, &k, xfrm_pol_inexact_params); if (bin) return bin; bin = kzalloc(sizeof(*bin), GFP_ATOMIC); if (!bin) return NULL; bin->k = k; INIT_HLIST_HEAD(&bin->hhead); bin->root_d = RB_ROOT; bin->root_s = RB_ROOT; seqcount_spinlock_init(&bin->count, &net->xfrm.xfrm_policy_lock); prev = rhashtable_lookup_get_insert_key(&xfrm_policy_inexact_table, &bin->k, &bin->head, xfrm_pol_inexact_params); if (!prev) { list_add(&bin->inexact_bins, &net->xfrm.inexact_bins); return bin; } kfree(bin); return IS_ERR(prev) ? NULL : prev; } static bool xfrm_pol_inexact_addr_use_any_list(const xfrm_address_t *addr, int family, u8 prefixlen) { if (xfrm_addr_any(addr, family)) return true; if (family == AF_INET6 && prefixlen < INEXACT_PREFIXLEN_IPV6) return true; if (family == AF_INET && prefixlen < INEXACT_PREFIXLEN_IPV4) return true; return false; } static bool xfrm_policy_inexact_insert_use_any_list(const struct xfrm_policy *policy) { const xfrm_address_t *addr; bool saddr_any, daddr_any; u8 prefixlen; addr = &policy->selector.saddr; prefixlen = policy->selector.prefixlen_s; saddr_any = xfrm_pol_inexact_addr_use_any_list(addr, policy->family, prefixlen); addr = &policy->selector.daddr; prefixlen = policy->selector.prefixlen_d; daddr_any = xfrm_pol_inexact_addr_use_any_list(addr, policy->family, prefixlen); return saddr_any && daddr_any; } static void xfrm_pol_inexact_node_init(struct xfrm_pol_inexact_node *node, const xfrm_address_t *addr, u8 prefixlen) { node->addr = *addr; node->prefixlen = prefixlen; } static struct xfrm_pol_inexact_node * xfrm_pol_inexact_node_alloc(const xfrm_address_t *addr, u8 prefixlen) { struct xfrm_pol_inexact_node *node; node = kzalloc(sizeof(*node), GFP_ATOMIC); if (node) xfrm_pol_inexact_node_init(node, addr, prefixlen); return node; } static int xfrm_policy_addr_delta(const xfrm_address_t *a, const xfrm_address_t *b, u8 prefixlen, u16 family) { u32 ma, mb, mask; unsigned int pdw, pbi; int delta = 0; switch (family) { case AF_INET: if (prefixlen == 0) return 0; mask = ~0U << (32 - prefixlen); ma = ntohl(a->a4) & mask; mb = ntohl(b->a4) & mask; if (ma < mb) delta = -1; else if (ma > mb) delta = 1; break; case AF_INET6: pdw = prefixlen >> 5; pbi = prefixlen & 0x1f; if (pdw) { delta = memcmp(a->a6, b->a6, pdw << 2); if (delta) return delta; } if (pbi) { mask = ~0U << (32 - pbi); ma = ntohl(a->a6[pdw]) & mask; mb = ntohl(b->a6[pdw]) & mask; if (ma < mb) delta = -1; else if (ma > mb) delta = 1; } break; default: break; } return delta; } static void xfrm_policy_inexact_list_reinsert(struct net *net, struct xfrm_pol_inexact_node *n, u16 family) { unsigned int matched_s, matched_d; struct xfrm_policy *policy, *p; matched_s = 0; matched_d = 0; list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { struct hlist_node *newpos = NULL; bool matches_s, matches_d; if (policy->walk.dead || !policy->bydst_reinsert) continue; WARN_ON_ONCE(policy->family != family); policy->bydst_reinsert = false; hlist_for_each_entry(p, &n->hhead, bydst) { if (policy->priority > p->priority) newpos = &p->bydst; else if (policy->priority == p->priority && policy->pos > p->pos) newpos = &p->bydst; else break; } if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) hlist_add_behind_rcu(&policy->bydst, newpos); else hlist_add_head_rcu(&policy->bydst, &n->hhead); /* paranoia checks follow. * Check that the reinserted policy matches at least * saddr or daddr for current node prefix. * * Matching both is fine, matching saddr in one policy * (but not daddr) and then matching only daddr in another * is a bug. */ matches_s = xfrm_policy_addr_delta(&policy->selector.saddr, &n->addr, n->prefixlen, family) == 0; matches_d = xfrm_policy_addr_delta(&policy->selector.daddr, &n->addr, n->prefixlen, family) == 0; if (matches_s && matches_d) continue; WARN_ON_ONCE(!matches_s && !matches_d); if (matches_s) matched_s++; if (matches_d) matched_d++; WARN_ON_ONCE(matched_s && matched_d); } } static void xfrm_policy_inexact_node_reinsert(struct net *net, struct xfrm_pol_inexact_node *n, struct rb_root *new, u16 family) { struct xfrm_pol_inexact_node *node; struct rb_node **p, *parent; /* we should not have another subtree here */ WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root)); restart: parent = NULL; p = &new->rb_node; while (*p) { u8 prefixlen; int delta; parent = *p; node = rb_entry(*p, struct xfrm_pol_inexact_node, node); prefixlen = min(node->prefixlen, n->prefixlen); delta = xfrm_policy_addr_delta(&n->addr, &node->addr, prefixlen, family); if (delta < 0) { p = &parent->rb_left; } else if (delta > 0) { p = &parent->rb_right; } else { bool same_prefixlen = node->prefixlen == n->prefixlen; struct xfrm_policy *tmp; hlist_for_each_entry(tmp, &n->hhead, bydst) { tmp->bydst_reinsert = true; hlist_del_rcu(&tmp->bydst); } node->prefixlen = prefixlen; xfrm_policy_inexact_list_reinsert(net, node, family); if (same_prefixlen) { kfree_rcu(n, rcu); return; } rb_erase(*p, new); kfree_rcu(n, rcu); n = node; goto restart; } } rb_link_node_rcu(&n->node, parent, p); rb_insert_color(&n->node, new); } /* merge nodes v and n */ static void xfrm_policy_inexact_node_merge(struct net *net, struct xfrm_pol_inexact_node *v, struct xfrm_pol_inexact_node *n, u16 family) { struct xfrm_pol_inexact_node *node; struct xfrm_policy *tmp; struct rb_node *rnode; /* To-be-merged node v has a subtree. * * Dismantle it and insert its nodes to n->root. */ while ((rnode = rb_first(&v->root)) != NULL) { node = rb_entry(rnode, struct xfrm_pol_inexact_node, node); rb_erase(&node->node, &v->root); xfrm_policy_inexact_node_reinsert(net, node, &n->root, family); } hlist_for_each_entry(tmp, &v->hhead, bydst) { tmp->bydst_reinsert = true; hlist_del_rcu(&tmp->bydst); } xfrm_policy_inexact_list_reinsert(net, n, family); } static struct xfrm_pol_inexact_node * xfrm_policy_inexact_insert_node(struct net *net, struct rb_root *root, xfrm_address_t *addr, u16 family, u8 prefixlen, u8 dir) { struct xfrm_pol_inexact_node *cached = NULL; struct rb_node **p, *parent = NULL; struct xfrm_pol_inexact_node *node; p = &root->rb_node; while (*p) { int delta; parent = *p; node = rb_entry(*p, struct xfrm_pol_inexact_node, node); delta = xfrm_policy_addr_delta(addr, &node->addr, node->prefixlen, family); if (delta == 0 && prefixlen >= node->prefixlen) { WARN_ON_ONCE(cached); /* ipsec policies got lost */ return node; } if (delta < 0) p = &parent->rb_left; else p = &parent->rb_right; if (prefixlen < node->prefixlen) { delta = xfrm_policy_addr_delta(addr, &node->addr, prefixlen, family); if (delta) continue; /* This node is a subnet of the new prefix. It needs * to be removed and re-inserted with the smaller * prefix and all nodes that are now also covered * by the reduced prefixlen. */ rb_erase(&node->node, root); if (!cached) { xfrm_pol_inexact_node_init(node, addr, prefixlen); cached = node; } else { /* This node also falls within the new * prefixlen. Merge the to-be-reinserted * node and this one. */ xfrm_policy_inexact_node_merge(net, node, cached, family); kfree_rcu(node, rcu); } /* restart */ p = &root->rb_node; parent = NULL; } } node = cached; if (!node) { node = xfrm_pol_inexact_node_alloc(addr, prefixlen); if (!node) return NULL; } rb_link_node_rcu(&node->node, parent, p); rb_insert_color(&node->node, root); return node; } static void xfrm_policy_inexact_gc_tree(struct rb_root *r, bool rm) { struct xfrm_pol_inexact_node *node; struct rb_node *rn = rb_first(r); while (rn) { node = rb_entry(rn, struct xfrm_pol_inexact_node, node); xfrm_policy_inexact_gc_tree(&node->root, rm); rn = rb_next(rn); if (!hlist_empty(&node->hhead) || !RB_EMPTY_ROOT(&node->root)) { WARN_ON_ONCE(rm); continue; } rb_erase(&node->node, r); kfree_rcu(node, rcu); } } static void __xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b, bool net_exit) { write_seqcount_begin(&b->count); xfrm_policy_inexact_gc_tree(&b->root_d, net_exit); xfrm_policy_inexact_gc_tree(&b->root_s, net_exit); write_seqcount_end(&b->count); if (!RB_EMPTY_ROOT(&b->root_d) || !RB_EMPTY_ROOT(&b->root_s) || !hlist_empty(&b->hhead)) { WARN_ON_ONCE(net_exit); return; } if (rhashtable_remove_fast(&xfrm_policy_inexact_table, &b->head, xfrm_pol_inexact_params) == 0) { list_del(&b->inexact_bins); kfree_rcu(b, rcu); } } static void xfrm_policy_inexact_prune_bin(struct xfrm_pol_inexact_bin *b) { struct net *net = read_pnet(&b->k.net); spin_lock_bh(&net->xfrm.xfrm_policy_lock); __xfrm_policy_inexact_prune_bin(b, false); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); } static void __xfrm_policy_inexact_flush(struct net *net) { struct xfrm_pol_inexact_bin *bin, *t; lockdep_assert_held(&net->xfrm.xfrm_policy_lock); list_for_each_entry_safe(bin, t, &net->xfrm.inexact_bins, inexact_bins) __xfrm_policy_inexact_prune_bin(bin, false); } static struct hlist_head * xfrm_policy_inexact_alloc_chain(struct xfrm_pol_inexact_bin *bin, struct xfrm_policy *policy, u8 dir) { struct xfrm_pol_inexact_node *n; struct net *net; net = xp_net(policy); lockdep_assert_held(&net->xfrm.xfrm_policy_lock); if (xfrm_policy_inexact_insert_use_any_list(policy)) return &bin->hhead; if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.daddr, policy->family, policy->selector.prefixlen_d)) { write_seqcount_begin(&bin->count); n = xfrm_policy_inexact_insert_node(net, &bin->root_s, &policy->selector.saddr, policy->family, policy->selector.prefixlen_s, dir); write_seqcount_end(&bin->count); if (!n) return NULL; return &n->hhead; } /* daddr is fixed */ write_seqcount_begin(&bin->count); n = xfrm_policy_inexact_insert_node(net, &bin->root_d, &policy->selector.daddr, policy->family, policy->selector.prefixlen_d, dir); write_seqcount_end(&bin->count); if (!n) return NULL; /* saddr is wildcard */ if (xfrm_pol_inexact_addr_use_any_list(&policy->selector.saddr, policy->family, policy->selector.prefixlen_s)) return &n->hhead; write_seqcount_begin(&bin->count); n = xfrm_policy_inexact_insert_node(net, &n->root, &policy->selector.saddr, policy->family, policy->selector.prefixlen_s, dir); write_seqcount_end(&bin->count); if (!n) return NULL; return &n->hhead; } static struct xfrm_policy * xfrm_policy_inexact_insert(struct xfrm_policy *policy, u8 dir, int excl) { struct xfrm_pol_inexact_bin *bin; struct xfrm_policy *delpol; struct hlist_head *chain; struct net *net; bin = xfrm_policy_inexact_alloc_bin(policy, dir); if (!bin) return ERR_PTR(-ENOMEM); net = xp_net(policy); lockdep_assert_held(&net->xfrm.xfrm_policy_lock); chain = xfrm_policy_inexact_alloc_chain(bin, policy, dir); if (!chain) { __xfrm_policy_inexact_prune_bin(bin, false); return ERR_PTR(-ENOMEM); } delpol = xfrm_policy_insert_list(chain, policy, excl); if (delpol && excl) { __xfrm_policy_inexact_prune_bin(bin, false); return ERR_PTR(-EEXIST); } chain = &net->xfrm.policy_inexact[dir]; xfrm_policy_insert_inexact_list(chain, policy); if (delpol) __xfrm_policy_inexact_prune_bin(bin, false); return delpol; } static void xfrm_hash_rebuild(struct work_struct *work) { struct net *net = container_of(work, struct net, xfrm.policy_hthresh.work); unsigned int hmask; struct xfrm_policy *pol; struct xfrm_policy *policy; struct hlist_head *chain; struct hlist_head *odst; struct hlist_node *newpos; int i; int dir; unsigned seq; u8 lbits4, rbits4, lbits6, rbits6; mutex_lock(&hash_resize_mutex); /* read selector prefixlen thresholds */ do { seq = read_seqbegin(&net->xfrm.policy_hthresh.lock); lbits4 = net->xfrm.policy_hthresh.lbits4; rbits4 = net->xfrm.policy_hthresh.rbits4; lbits6 = net->xfrm.policy_hthresh.lbits6; rbits6 = net->xfrm.policy_hthresh.rbits6; } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); write_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); /* make sure that we can insert the indirect policies again before * we start with destructive action. */ list_for_each_entry(policy, &net->xfrm.policy_all, walk.all) { struct xfrm_pol_inexact_bin *bin; u8 dbits, sbits; if (policy->walk.dead) continue; dir = xfrm_policy_id2dir(policy->index); if (dir >= XFRM_POLICY_MAX) continue; if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { if (policy->family == AF_INET) { dbits = rbits4; sbits = lbits4; } else { dbits = rbits6; sbits = lbits6; } } else { if (policy->family == AF_INET) { dbits = lbits4; sbits = rbits4; } else { dbits = lbits6; sbits = rbits6; } } if (policy->selector.prefixlen_d < dbits || policy->selector.prefixlen_s < sbits) continue; bin = xfrm_policy_inexact_alloc_bin(policy, dir); if (!bin) goto out_unlock; if (!xfrm_policy_inexact_alloc_chain(bin, policy, dir)) goto out_unlock; } /* reset the bydst and inexact table in all directions */ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct hlist_node *n; hlist_for_each_entry_safe(policy, n, &net->xfrm.policy_inexact[dir], bydst_inexact_list) { hlist_del_rcu(&policy->bydst); hlist_del_init(&policy->bydst_inexact_list); } hmask = net->xfrm.policy_bydst[dir].hmask; odst = net->xfrm.policy_bydst[dir].table; for (i = hmask; i >= 0; i--) { hlist_for_each_entry_safe(policy, n, odst + i, bydst) hlist_del_rcu(&policy->bydst); } if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { /* dir out => dst = remote, src = local */ net->xfrm.policy_bydst[dir].dbits4 = rbits4; net->xfrm.policy_bydst[dir].sbits4 = lbits4; net->xfrm.policy_bydst[dir].dbits6 = rbits6; net->xfrm.policy_bydst[dir].sbits6 = lbits6; } else { /* dir in/fwd => dst = local, src = remote */ net->xfrm.policy_bydst[dir].dbits4 = lbits4; net->xfrm.policy_bydst[dir].sbits4 = rbits4; net->xfrm.policy_bydst[dir].dbits6 = lbits6; net->xfrm.policy_bydst[dir].sbits6 = rbits6; } } /* re-insert all policies by order of creation */ list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { if (policy->walk.dead) continue; dir = xfrm_policy_id2dir(policy->index); if (dir >= XFRM_POLICY_MAX) { /* skip socket policies */ continue; } newpos = NULL; chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); if (!chain) { void *p = xfrm_policy_inexact_insert(policy, dir, 0); WARN_ONCE(IS_ERR(p), "reinsert: %ld\n", PTR_ERR(p)); continue; } hlist_for_each_entry(pol, chain, bydst) { if (policy->priority >= pol->priority) newpos = &pol->bydst; else break; } if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) hlist_add_behind_rcu(&policy->bydst, newpos); else hlist_add_head_rcu(&policy->bydst, chain); } out_unlock: __xfrm_policy_inexact_flush(net); write_seqcount_end(&net->xfrm.xfrm_policy_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); } void xfrm_policy_hash_rebuild(struct net *net) { schedule_work(&net->xfrm.policy_hthresh.work); } EXPORT_SYMBOL(xfrm_policy_hash_rebuild); /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { for (;;) { struct hlist_head *list; struct xfrm_policy *p; u32 idx; int found; if (!index) { idx = (net->xfrm.idx_generator | dir); net->xfrm.idx_generator += 8; } else { idx = index; index = 0; } if (idx == 0) idx = 8; list = net->xfrm.policy_byidx + idx_hash(net, idx); found = 0; hlist_for_each_entry(p, list, byidx) { if (p->index == idx) { found = 1; break; } } if (!found) return idx; } } static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) { u32 *p1 = (u32 *) s1; u32 *p2 = (u32 *) s2; int len = sizeof(struct xfrm_selector) / sizeof(u32); int i; for (i = 0; i < len; i++) { if (p1[i] != p2[i]) return 1; } return 0; } static void xfrm_policy_requeue(struct xfrm_policy *old, struct xfrm_policy *new) { struct xfrm_policy_queue *pq = &old->polq; struct sk_buff_head list; if (skb_queue_empty(&pq->hold_queue)) return; __skb_queue_head_init(&list); spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice_init(&pq->hold_queue, &list); if (del_timer(&pq->hold_timer)) xfrm_pol_put(old); spin_unlock_bh(&pq->hold_queue.lock); pq = &new->polq; spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice(&list, &pq->hold_queue); pq->timeout = XFRM_QUEUE_TMO_MIN; if (!mod_timer(&pq->hold_timer, jiffies)) xfrm_pol_hold(new); spin_unlock_bh(&pq->hold_queue.lock); } static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark, struct xfrm_policy *pol) { return mark->v == pol->mark.v && mark->m == pol->mark.m; } static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed) { const struct xfrm_pol_inexact_key *k = data; u32 a = k->type << 24 | k->dir << 16 | k->family; return jhash_3words(a, k->if_id, net_hash_mix(read_pnet(&k->net)), seed); } static u32 xfrm_pol_bin_obj(const void *data, u32 len, u32 seed) { const struct xfrm_pol_inexact_bin *b = data; return xfrm_pol_bin_key(&b->k, 0, seed); } static int xfrm_pol_bin_cmp(struct rhashtable_compare_arg *arg, const void *ptr) { const struct xfrm_pol_inexact_key *key = arg->key; const struct xfrm_pol_inexact_bin *b = ptr; int ret; if (!net_eq(read_pnet(&b->k.net), read_pnet(&key->net))) return -1; ret = b->k.dir ^ key->dir; if (ret) return ret; ret = b->k.type ^ key->type; if (ret) return ret; ret = b->k.family ^ key->family; if (ret) return ret; return b->k.if_id ^ key->if_id; } static const struct rhashtable_params xfrm_pol_inexact_params = { .head_offset = offsetof(struct xfrm_pol_inexact_bin, head), .hashfn = xfrm_pol_bin_key, .obj_hashfn = xfrm_pol_bin_obj, .obj_cmpfn = xfrm_pol_bin_cmp, .automatic_shrinking = true, }; static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, struct xfrm_policy *policy) { struct xfrm_policy *pol, *delpol = NULL; struct hlist_node *newpos = NULL; int i = 0; hlist_for_each_entry(pol, chain, bydst_inexact_list) { if (pol->type == policy->type && pol->if_id == policy->if_id && !selector_cmp(&pol->selector, &policy->selector) && xfrm_policy_mark_match(&policy->mark, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { delpol = pol; if (policy->priority > pol->priority) continue; } else if (policy->priority >= pol->priority) { newpos = &pol->bydst_inexact_list; continue; } if (delpol) break; } if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) hlist_add_behind_rcu(&policy->bydst_inexact_list, newpos); else hlist_add_head_rcu(&policy->bydst_inexact_list, chain); hlist_for_each_entry(pol, chain, bydst_inexact_list) { pol->pos = i; i++; } } static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain, struct xfrm_policy *policy, bool excl) { struct xfrm_policy *pol, *newpos = NULL, *delpol = NULL; hlist_for_each_entry(pol, chain, bydst) { if (pol->type == policy->type && pol->if_id == policy->if_id && !selector_cmp(&pol->selector, &policy->selector) && xfrm_policy_mark_match(&policy->mark, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) return ERR_PTR(-EEXIST); delpol = pol; if (policy->priority > pol->priority) continue; } else if (policy->priority >= pol->priority) { newpos = pol; continue; } if (delpol) break; } if (newpos && policy->xdo.type != XFRM_DEV_OFFLOAD_PACKET) hlist_add_behind_rcu(&policy->bydst, &newpos->bydst); else /* Packet offload policies enter to the head * to speed-up lookups. */ hlist_add_head_rcu(&policy->bydst, chain); return delpol; } int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { struct net *net = xp_net(policy); struct xfrm_policy *delpol; struct hlist_head *chain; spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); if (chain) delpol = xfrm_policy_insert_list(chain, policy, excl); else delpol = xfrm_policy_inexact_insert(policy, dir, excl); if (IS_ERR(delpol)) { spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return PTR_ERR(delpol); } __xfrm_policy_link(policy, dir); /* After previous checking, family can either be AF_INET or AF_INET6 */ if (policy->family == AF_INET) rt_genid_bump_ipv4(net); else rt_genid_bump_ipv6(net); if (delpol) { xfrm_policy_requeue(delpol, policy); __xfrm_policy_unlink(delpol, dir); } policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = ktime_get_real_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); else if (xfrm_bydst_should_resize(net, dir, NULL)) schedule_work(&net->xfrm.policy_hash_work); return 0; } EXPORT_SYMBOL(xfrm_policy_insert); static struct xfrm_policy * __xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark, u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx) { struct xfrm_policy *pol; if (!chain) return NULL; hlist_for_each_entry(pol, chain, bydst) { if (pol->type == type && pol->if_id == if_id && xfrm_policy_mark_match(mark, pol) && !selector_cmp(sel, &pol->selector) && xfrm_sec_ctx_match(ctx, pol->security)) return pol; } return NULL; } struct xfrm_policy * xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) { struct xfrm_pol_inexact_bin *bin = NULL; struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; *err = 0; spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); if (!chain) { struct xfrm_pol_inexact_candidates cand; int i; bin = xfrm_policy_inexact_lookup(net, type, sel->family, dir, if_id); if (!bin) { spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return NULL; } if (!xfrm_policy_find_inexact_candidates(&cand, bin, &sel->saddr, &sel->daddr)) { spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return NULL; } pol = NULL; for (i = 0; i < ARRAY_SIZE(cand.res); i++) { struct xfrm_policy *tmp; tmp = __xfrm_policy_bysel_ctx(cand.res[i], mark, if_id, type, dir, sel, ctx); if (!tmp) continue; if (!pol || tmp->pos < pol->pos) pol = tmp; } } else { pol = __xfrm_policy_bysel_ctx(chain, mark, if_id, type, dir, sel, ctx); } if (pol) { xfrm_pol_hold(pol); if (delete) { *err = security_xfrm_policy_delete(pol->security); if (*err) { spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); } ret = pol; } spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); if (bin && delete) xfrm_policy_inexact_prune_bin(bin); return ret; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); struct xfrm_policy * xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id, u8 type, int dir, u32 id, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; *err = -ENOENT; if (xfrm_policy_id2dir(id) != dir) return NULL; *err = 0; spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, chain, byidx) { if (pol->type == type && pol->index == id && pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) { xfrm_pol_hold(pol); if (delete) { *err = security_xfrm_policy_delete( pol->security); if (*err) { spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); } ret = pol; break; } } spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); return ret; } EXPORT_SYMBOL(xfrm_policy_byid); #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { struct xfrm_policy *pol; int err = 0; list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { if (pol->walk.dead || xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX || pol->type != type) continue; err = security_xfrm_policy_delete(pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, task_valid); return err; } } return err; } static inline int xfrm_dev_policy_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) { struct xfrm_policy *pol; int err = 0; list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { if (pol->walk.dead || xfrm_policy_id2dir(pol->index) >= XFRM_POLICY_MAX || pol->xdo.dev != dev) continue; err = security_xfrm_policy_delete(pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, task_valid); return err; } } return err; } #else static inline int xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { return 0; } static inline int xfrm_dev_policy_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid) { return 0; } #endif int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { int dir, err = 0, cnt = 0; struct xfrm_policy *pol; spin_lock_bh(&net->xfrm.xfrm_policy_lock); err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) goto out; again: list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { if (pol->walk.dead) continue; dir = xfrm_policy_id2dir(pol->index); if (dir >= XFRM_POLICY_MAX || pol->type != type) continue; __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again; } if (cnt) __xfrm_policy_inexact_flush(net); else err = -ESRCH; out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, bool task_valid) { int dir, err = 0, cnt = 0; struct xfrm_policy *pol; spin_lock_bh(&net->xfrm.xfrm_policy_lock); err = xfrm_dev_policy_flush_secctx_check(net, dev, task_valid); if (err) goto out; again: list_for_each_entry(pol, &net->xfrm.policy_all, walk.all) { if (pol->walk.dead) continue; dir = xfrm_policy_id2dir(pol->index); if (dir >= XFRM_POLICY_MAX || pol->xdo.dev != dev) continue; __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again; } if (cnt) __xfrm_policy_inexact_flush(net); else err = -ESRCH; out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_dev_policy_flush); int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { struct xfrm_policy *pol; struct xfrm_policy_walk_entry *x; int error = 0; if (walk->type >= XFRM_POLICY_TYPE_MAX && walk->type != XFRM_POLICY_TYPE_ANY) return -EINVAL; if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; spin_lock_bh(&net->xfrm.xfrm_policy_lock); if (list_empty(&walk->walk.all)) x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else x = list_first_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); list_for_each_entry_from(x, &net->xfrm.policy_all, all) { if (x->dead) continue; pol = container_of(x, struct xfrm_policy, walk); if (walk->type != XFRM_POLICY_TYPE_ANY && walk->type != pol->type) continue; error = func(pol, xfrm_policy_id2dir(pol->index), walk->seq, data); if (error) { list_move_tail(&walk->walk.all, &x->all); goto out; } walk->seq++; } if (walk->seq == 0) { error = -ENOENT; goto out; } list_del_init(&walk->walk.all); out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) { INIT_LIST_HEAD(&walk->walk.all); walk->walk.dead = 1; walk->type = type; walk->seq = 0; } EXPORT_SYMBOL(xfrm_policy_walk_init); void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) { if (list_empty(&walk->walk.all)) return; spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ list_del(&walk->walk.all); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_walk_done); /* * Find policy to apply to this flow. * * Returns 0 if policy found, else an -errno. */ static int xfrm_policy_match(const struct xfrm_policy *pol, const struct flowi *fl, u8 type, u16 family, u32 if_id) { const struct xfrm_selector *sel = &pol->selector; int ret = -ESRCH; bool match; if (pol->family != family || pol->if_id != if_id || (fl->flowi_mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; match = xfrm_selector_match(sel, fl, family); if (match) ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid); return ret; } static struct xfrm_pol_inexact_node * xfrm_policy_lookup_inexact_addr(const struct rb_root *r, seqcount_spinlock_t *count, const xfrm_address_t *addr, u16 family) { const struct rb_node *parent; int seq; again: seq = read_seqcount_begin(count); parent = rcu_dereference_raw(r->rb_node); while (parent) { struct xfrm_pol_inexact_node *node; int delta; node = rb_entry(parent, struct xfrm_pol_inexact_node, node); delta = xfrm_policy_addr_delta(addr, &node->addr, node->prefixlen, family); if (delta < 0) { parent = rcu_dereference_raw(parent->rb_left); continue; } else if (delta > 0) { parent = rcu_dereference_raw(parent->rb_right); continue; } return node; } if (read_seqcount_retry(count, seq)) goto again; return NULL; } static bool xfrm_policy_find_inexact_candidates(struct xfrm_pol_inexact_candidates *cand, struct xfrm_pol_inexact_bin *b, const xfrm_address_t *saddr, const xfrm_address_t *daddr) { struct xfrm_pol_inexact_node *n; u16 family; if (!b) return false; family = b->k.family; memset(cand, 0, sizeof(*cand)); cand->res[XFRM_POL_CAND_ANY] = &b->hhead; n = xfrm_policy_lookup_inexact_addr(&b->root_d, &b->count, daddr, family); if (n) { cand->res[XFRM_POL_CAND_DADDR] = &n->hhead; n = xfrm_policy_lookup_inexact_addr(&n->root, &b->count, saddr, family); if (n) cand->res[XFRM_POL_CAND_BOTH] = &n->hhead; } n = xfrm_policy_lookup_inexact_addr(&b->root_s, &b->count, saddr, family); if (n) cand->res[XFRM_POL_CAND_SADDR] = &n->hhead; return true; } static struct xfrm_pol_inexact_bin * xfrm_policy_inexact_lookup_rcu(struct net *net, u8 type, u16 family, u8 dir, u32 if_id) { struct xfrm_pol_inexact_key k = { .family = family, .type = type, .dir = dir, .if_id = if_id, }; write_pnet(&k.net, net); return rhashtable_lookup(&xfrm_policy_inexact_table, &k, xfrm_pol_inexact_params); } static struct xfrm_pol_inexact_bin * xfrm_policy_inexact_lookup(struct net *net, u8 type, u16 family, u8 dir, u32 if_id) { struct xfrm_pol_inexact_bin *bin; lockdep_assert_held(&net->xfrm.xfrm_policy_lock); rcu_read_lock(); bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id); rcu_read_unlock(); return bin; } static struct xfrm_policy * __xfrm_policy_eval_candidates(struct hlist_head *chain, struct xfrm_policy *prefer, const struct flowi *fl, u8 type, u16 family, u32 if_id) { u32 priority = prefer ? prefer->priority : ~0u; struct xfrm_policy *pol; if (!chain) return NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { int err; if (pol->priority > priority) break; err = xfrm_policy_match(pol, fl, type, family, if_id); if (err) { if (err != -ESRCH) return ERR_PTR(err); continue; } if (prefer) { /* matches. Is it older than *prefer? */ if (pol->priority == priority && prefer->pos < pol->pos) return prefer; } return pol; } return NULL; } static struct xfrm_policy * xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand, struct xfrm_policy *prefer, const struct flowi *fl, u8 type, u16 family, u32 if_id) { struct xfrm_policy *tmp; int i; for (i = 0; i < ARRAY_SIZE(cand->res); i++) { tmp = __xfrm_policy_eval_candidates(cand->res[i], prefer, fl, type, family, if_id); if (!tmp) continue; if (IS_ERR(tmp)) return tmp; prefer = tmp; } return prefer; } static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, const struct flowi *fl, u16 family, u8 dir, u32 if_id) { struct xfrm_pol_inexact_candidates cand; const xfrm_address_t *daddr, *saddr; struct xfrm_pol_inexact_bin *bin; struct xfrm_policy *pol, *ret; struct hlist_head *chain; unsigned int sequence; int err; daddr = xfrm_flowi_daddr(fl, family); saddr = xfrm_flowi_saddr(fl, family); if (unlikely(!daddr || !saddr)) return NULL; rcu_read_lock(); retry: do { sequence = read_seqcount_begin(&net->xfrm.xfrm_policy_hash_generation); chain = policy_hash_direct(net, daddr, saddr, family, dir); } while (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)); ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, if_id); if (err) { if (err == -ESRCH) continue; else { ret = ERR_PTR(err); goto fail; } } else { ret = pol; break; } } if (ret && ret->xdo.type == XFRM_DEV_OFFLOAD_PACKET) goto skip_inexact; bin = xfrm_policy_inexact_lookup_rcu(net, type, family, dir, if_id); if (!bin || !xfrm_policy_find_inexact_candidates(&cand, bin, saddr, daddr)) goto skip_inexact; pol = xfrm_policy_eval_candidates(&cand, ret, fl, type, family, if_id); if (pol) { ret = pol; if (IS_ERR(pol)) goto fail; } skip_inexact: if (read_seqcount_retry(&net->xfrm.xfrm_policy_hash_generation, sequence)) goto retry; if (ret && !xfrm_pol_hold_rcu(ret)) goto retry; fail: rcu_read_unlock(); return ret; } static struct xfrm_policy *xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, u32 if_id) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir, if_id); if (pol != NULL) return pol; #endif return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir, if_id); } static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl, u16 family, u32 if_id) { struct xfrm_policy *pol; rcu_read_lock(); again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { bool match; int err = 0; if (pol->family != family) { pol = NULL; goto out; } match = xfrm_selector_match(&pol->selector, fl, family); if (match) { if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v || pol->if_id != if_id) { pol = NULL; goto out; } err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid); if (!err) { if (!xfrm_pol_hold_rcu(pol)) goto again; } else if (err == -ESRCH) { pol = NULL; } else { pol = ERR_PTR(err); } } else pol = NULL; } out: rcu_read_unlock(); return pol; } static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); list_add(&pol->walk.all, &net->xfrm.policy_all); net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); if (list_empty(&pol->walk.all)) return NULL; /* Socket policies are not hashed. */ if (!hlist_unhashed(&pol->bydst)) { hlist_del_rcu(&pol->bydst); hlist_del_init(&pol->bydst_inexact_list); hlist_del(&pol->byidx); } list_del_init(&pol->walk.all); net->xfrm.policy_count[dir]--; return pol; } static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir) { __xfrm_policy_link(pol, XFRM_POLICY_MAX + dir); } static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir) { __xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir); } int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); spin_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { xfrm_policy_kill(pol); return 0; } return -ENOENT; } EXPORT_SYMBOL(xfrm_policy_delete); int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { struct net *net = sock_net(sk); struct xfrm_policy *old_pol; #ifdef CONFIG_XFRM_SUB_POLICY if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) return -EINVAL; #endif spin_lock_bh(&net->xfrm.xfrm_policy_lock); old_pol = rcu_dereference_protected(sk->sk_policy[dir], lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { pol->curlft.add_time = ktime_get_real_seconds(); pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); xfrm_sk_policy_link(pol, dir); } rcu_assign_pointer(sk->sk_policy[dir], pol); if (old_pol) { if (pol) xfrm_policy_requeue(old_pol, pol); /* Unlinking succeeds always. This is the only function * allowed to delete or replace socket policy. */ xfrm_sk_policy_unlink(old_pol, dir); } spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); } return 0; } static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) { struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); struct net *net = xp_net(old); if (newp) { newp->selector = old->selector; if (security_xfrm_policy_clone(old->security, &newp->security)) { kfree(newp); return NULL; /* ENOMEM */ } newp->lft = old->lft; newp->curlft = old->curlft; newp->mark = old->mark; newp->if_id = old->if_id; newp->action = old->action; newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; newp->type = old->type; newp->family = old->family; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); xfrm_sk_policy_link(newp, dir); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; } int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { const struct xfrm_policy *p; struct xfrm_policy *np; int i, ret = 0; rcu_read_lock(); for (i = 0; i < 2; i++) { p = rcu_dereference(osk->sk_policy[i]); if (p) { np = clone_policy(p, i); if (unlikely(!np)) { ret = -ENOMEM; break; } rcu_assign_pointer(sk->sk_policy[i], np); } } rcu_read_unlock(); return ret; } static int xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, xfrm_address_t *remote, unsigned short family, u32 mark) { int err; const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; err = afinfo->get_saddr(net, oif, local, remote, mark); rcu_read_unlock(); return err; } /* Resolve list of templates for the flow, given policy. */ static int xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { struct net *net = xp_net(policy); int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); xfrm_address_t tmp; for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; xfrm_address_t *remote = daddr; xfrm_address_t *local = saddr; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; if (tmpl->mode == XFRM_MODE_TUNNEL || tmpl->mode == XFRM_MODE_BEET) { remote = &tmpl->id.daddr; local = &tmpl->saddr; if (xfrm_addr_any(local, tmpl->encap_family)) { error = xfrm_get_saddr(net, fl->flowi_oif, &tmp, remote, tmpl->encap_family, 0); if (error) goto fail; local = &tmp; } } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family, policy->if_id); if (x && x->dir && x->dir != XFRM_SA_DIR_OUT) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEDIRERROR); xfrm_state_put(x); error = -EINVAL; goto fail; } if (x && x->km.state == XFRM_STATE_VALID) { xfrm[nx++] = x; daddr = remote; saddr = local; continue; } if (x) { error = (x->km.state == XFRM_STATE_ERROR ? -EINVAL : -EAGAIN); xfrm_state_put(x); } else if (error == -ESRCH) { error = -EAGAIN; } if (!tmpl->optional) goto fail; } return nx; fail: for (nx--; nx >= 0; nx--) xfrm_state_put(xfrm[nx]); return error; } static int xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { struct xfrm_state *tp[XFRM_MAX_DEPTH]; struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; int cnx = 0; int error; int ret; int i; for (i = 0; i < npols; i++) { if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { error = -ENOBUFS; goto fail; } ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); if (ret < 0) { error = ret; goto fail; } else cnx += ret; } /* found states are sorted for outbound processing */ if (npols > 1) xfrm_state_sort(xfrm, tpp, cnx, family); return cnx; fail: for (cnx--; cnx >= 0; cnx--) xfrm_state_put(tpp[cnx]); return error; } static int xfrm_get_tos(const struct flowi *fl, int family) { if (family == AF_INET) return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; return 0; } static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct dst_ops *dst_ops; struct xfrm_dst *xdst; if (!afinfo) return ERR_PTR(-EINVAL); switch (family) { case AF_INET: dst_ops = &net->xfrm.xfrm4_dst_ops; break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: dst_ops = &net->xfrm.xfrm6_dst_ops; break; #endif default: BUG(); } xdst = dst_alloc(dst_ops, NULL, DST_OBSOLETE_NONE, 0); if (likely(xdst)) { memset_after(xdst, 0, u.dst); } else xdst = ERR_PTR(-ENOBUFS); rcu_read_unlock(); return xdst; } static void xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { if (dst->ops->family == AF_INET6) { path->path_cookie = rt6_get_cookie(dst_rt6_info(dst)); path->u.rt6.rt6i_nfheader_len = nfheader_len; } } static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, const struct flowi *fl) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(xdst->u.dst.ops->family); int err; if (!afinfo) return -EINVAL; err = afinfo->fill_dst(xdst, dev, fl); rcu_read_unlock(); return err; } /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, struct xfrm_dst **bundle, int nx, const struct flowi *fl, struct dst_entry *dst) { const struct xfrm_state_afinfo *afinfo; const struct xfrm_mode *inner_mode; struct net *net = xp_net(policy); unsigned long now = jiffies; struct net_device *dev; struct xfrm_dst *xdst_prev = NULL; struct xfrm_dst *xdst0 = NULL; int i = 0; int err; int header_len = 0; int nfheader_len = 0; int trailer_len = 0; int tos; int family = policy->selector.family; xfrm_address_t saddr, daddr; xfrm_flowi_addr_get(fl, &saddr, &daddr, family); tos = xfrm_get_tos(fl, family); dst_hold(dst); for (; i < nx; i++) { struct xfrm_dst *xdst = xfrm_alloc_dst(net, family); struct dst_entry *dst1 = &xdst->u.dst; err = PTR_ERR(xdst); if (IS_ERR(xdst)) { dst_release(dst); goto put_states; } bundle[i] = xdst; if (!xdst_prev) xdst0 = xdst; else /* Ref count is taken during xfrm_alloc_dst() * No need to do dst_clone() on dst1 */ xfrm_dst_set_child(xdst_prev, &xdst->u.dst); if (xfrm[i]->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(xfrm[i], xfrm_af2proto(family)); if (!inner_mode) { err = -EAFNOSUPPORT; dst_release(dst); goto put_states; } } else inner_mode = &xfrm[i]->inner_mode; xdst->route = dst; dst_copy_metrics(dst1, dst); if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { __u32 mark = 0; int oif; if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m) mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); if (xfrm[i]->xso.type != XFRM_DEV_OFFLOAD_PACKET) family = xfrm[i]->props.family; oif = fl->flowi_oif ? : fl->flowi_l3mdev; dst = xfrm_dst_lookup(xfrm[i], tos, oif, &saddr, &daddr, family, mark); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; } else dst_hold(dst); dst1->xfrm = xfrm[i]; xdst->xfrm_genid = xfrm[i]->genid; dst1->obsolete = DST_OBSOLETE_FORCE_CHK; dst1->lastuse = now; dst1->input = dst_discard; rcu_read_lock(); afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family); if (likely(afinfo)) dst1->output = afinfo->output; else dst1->output = dst_discard_out; rcu_read_unlock(); xdst_prev = xdst; header_len += xfrm[i]->props.header_len; if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT) nfheader_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; } xfrm_dst_set_child(xdst_prev, dst); xdst0->path = dst; err = -ENODEV; dev = dst->dev; if (!dev) goto free_dst; xfrm_init_path(xdst0, dst, nfheader_len); xfrm_init_pmtu(bundle, nx); for (xdst_prev = xdst0; xdst_prev != (struct xfrm_dst *)dst; xdst_prev = (struct xfrm_dst *) xfrm_dst_child(&xdst_prev->u.dst)) { err = xfrm_fill_dst(xdst_prev, dev, fl); if (err) goto free_dst; xdst_prev->u.dst.header_len = header_len; xdst_prev->u.dst.trailer_len = trailer_len; header_len -= xdst_prev->u.dst.xfrm->props.header_len; trailer_len -= xdst_prev->u.dst.xfrm->props.trailer_len; } return &xdst0->u.dst; put_states: for (; i < nx; i++) xfrm_state_put(xfrm[i]); free_dst: if (xdst0) dst_release_immediate(&xdst0->u.dst); return ERR_PTR(err); } static int xfrm_expand_policies(const struct flowi *fl, u16 family, struct xfrm_policy **pols, int *num_pols, int *num_xfrms) { int i; if (*num_pols == 0 || !pols[0]) { *num_pols = 0; *num_xfrms = 0; return 0; } if (IS_ERR(pols[0])) { *num_pols = 0; return PTR_ERR(pols[0]); } *num_xfrms = pols[0]->xfrm_nr; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->action == XFRM_POLICY_ALLOW && pols[0]->type != XFRM_POLICY_TYPE_MAIN) { pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]), XFRM_POLICY_TYPE_MAIN, fl, family, XFRM_POLICY_OUT, pols[0]->if_id); if (pols[1]) { if (IS_ERR(pols[1])) { xfrm_pols_put(pols, *num_pols); *num_pols = 0; return PTR_ERR(pols[1]); } (*num_pols)++; (*num_xfrms) += pols[1]->xfrm_nr; } } #endif for (i = 0; i < *num_pols; i++) { if (pols[i]->action != XFRM_POLICY_ALLOW) { *num_xfrms = -1; break; } } return 0; } static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, const struct flowi *fl, u16 family, struct dst_entry *dst_orig) { struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct xfrm_dst *bundle[XFRM_MAX_DEPTH]; struct xfrm_dst *xdst; struct dst_entry *dst; int err; /* Try to instantiate a bundle */ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family); if (err <= 0) { if (err == 0) return NULL; if (err != -EAGAIN) XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); return ERR_PTR(err); } dst = xfrm_bundle_create(pols[0], xfrm, bundle, err, fl, dst_orig); if (IS_ERR(dst)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); return ERR_CAST(dst); } xdst = (struct xfrm_dst *)dst; xdst->num_xfrms = err; xdst->num_pols = num_pols; memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); return xdst; } static void xfrm_policy_queue_process(struct timer_list *t) { struct sk_buff *skb; struct sock *sk; struct dst_entry *dst; struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer); struct net *net = xp_net(pol); struct xfrm_policy_queue *pq = &pol->polq; struct flowi fl; struct sk_buff_head list; __u32 skb_mark; spin_lock(&pq->hold_queue.lock); skb = skb_peek(&pq->hold_queue); if (!skb) { spin_unlock(&pq->hold_queue.lock); goto out; } dst = skb_dst(skb); sk = skb->sk; /* Fixup the mark to support VTI. */ skb_mark = skb->mark; skb->mark = pol->mark.v; xfrm_decode_session(net, skb, &fl, dst->ops->family); skb->mark = skb_mark; spin_unlock(&pq->hold_queue.lock); dst_hold(xfrm_dst_path(dst)); dst = xfrm_lookup(net, xfrm_dst_path(dst), &fl, sk, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) goto purge_queue; if (dst->flags & DST_XFRM_QUEUE) { dst_release(dst); if (pq->timeout >= XFRM_QUEUE_TMO_MAX) goto purge_queue; pq->timeout = pq->timeout << 1; if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) xfrm_pol_hold(pol); goto out; } dst_release(dst); __skb_queue_head_init(&list); spin_lock(&pq->hold_queue.lock); pq->timeout = 0; skb_queue_splice_init(&pq->hold_queue, &list); spin_unlock(&pq->hold_queue.lock); while (!skb_queue_empty(&list)) { skb = __skb_dequeue(&list); /* Fixup the mark to support VTI. */ skb_mark = skb->mark; skb->mark = pol->mark.v; xfrm_decode_session(net, skb, &fl, skb_dst(skb)->ops->family); skb->mark = skb_mark; dst_hold(xfrm_dst_path(skb_dst(skb))); dst = xfrm_lookup(net, xfrm_dst_path(skb_dst(skb)), &fl, skb->sk, 0); if (IS_ERR(dst)) { kfree_skb(skb); continue; } nf_reset_ct(skb); skb_dst_drop(skb); skb_dst_set(skb, dst); dst_output(net, skb->sk, skb); } out: xfrm_pol_put(pol); return; purge_queue: pq->timeout = 0; skb_queue_purge(&pq->hold_queue); xfrm_pol_put(pol); } static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned long sched_next; struct dst_entry *dst = skb_dst(skb); struct xfrm_dst *xdst = (struct xfrm_dst *) dst; struct xfrm_policy *pol = xdst->pols[0]; struct xfrm_policy_queue *pq = &pol->polq; if (unlikely(skb_fclone_busy(sk, skb))) { kfree_skb(skb); return 0; } if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { kfree_skb(skb); return -EAGAIN; } skb_dst_force(skb); spin_lock_bh(&pq->hold_queue.lock); if (!pq->timeout) pq->timeout = XFRM_QUEUE_TMO_MIN; sched_next = jiffies + pq->timeout; if (del_timer(&pq->hold_timer)) { if (time_before(pq->hold_timer.expires, sched_next)) sched_next = pq->hold_timer.expires; xfrm_pol_put(pol); } __skb_queue_tail(&pq->hold_queue, skb); if (!mod_timer(&pq->hold_timer, sched_next)) xfrm_pol_hold(pol); spin_unlock_bh(&pq->hold_queue.lock); return 0; } static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, struct xfrm_flo *xflo, const struct flowi *fl, int num_xfrms, u16 family) { int err; struct net_device *dev; struct dst_entry *dst; struct dst_entry *dst1; struct xfrm_dst *xdst; xdst = xfrm_alloc_dst(net, family); if (IS_ERR(xdst)) return xdst; if (!(xflo->flags & XFRM_LOOKUP_QUEUE) || net->xfrm.sysctl_larval_drop || num_xfrms <= 0) return xdst; dst = xflo->dst_orig; dst1 = &xdst->u.dst; dst_hold(dst); xdst->route = dst; dst_copy_metrics(dst1, dst); dst1->obsolete = DST_OBSOLETE_FORCE_CHK; dst1->flags |= DST_XFRM_QUEUE; dst1->lastuse = jiffies; dst1->input = dst_discard; dst1->output = xdst_queue_output; dst_hold(dst); xfrm_dst_set_child(xdst, dst); xdst->path = dst; xfrm_init_path((struct xfrm_dst *)dst1, dst, 0); err = -ENODEV; dev = dst->dev; if (!dev) goto free_dst; err = xfrm_fill_dst(xdst, dev, fl); if (err) goto free_dst; out: return xdst; free_dst: dst_release(dst1); xdst = ERR_PTR(err); goto out; } static struct xfrm_dst *xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo, u32 if_id) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int num_pols = 0, num_xfrms = 0, err; struct xfrm_dst *xdst; /* Resolve policies to use if we couldn't get them from * previous cache entry */ num_pols = 1; pols[0] = xfrm_policy_lookup(net, fl, family, dir, if_id); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) goto inc_error; if (num_pols == 0) return NULL; if (num_xfrms <= 0) goto make_dummy_bundle; xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xflo->dst_orig); if (IS_ERR(xdst)) { err = PTR_ERR(xdst); if (err == -EREMOTE) { xfrm_pols_put(pols, num_pols); return NULL; } if (err != -EAGAIN) goto error; goto make_dummy_bundle; } else if (xdst == NULL) { num_xfrms = 0; goto make_dummy_bundle; } return xdst; make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: * either because the policy blocks, has no transformations or * we could not build template (no xfrm_states).*/ xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); return ERR_CAST(xdst); } xdst->num_pols = num_pols; xdst->num_xfrms = num_xfrms; memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); return xdst; inc_error: XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); error: xfrm_pols_put(pols, num_pols); return ERR_PTR(err); } static struct dst_entry *make_blackhole(struct net *net, u16 family, struct dst_entry *dst_orig) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); struct dst_entry *ret; if (!afinfo) { dst_release(dst_orig); return ERR_PTR(-EINVAL); } else { ret = afinfo->blackhole_route(net, dst_orig); } rcu_read_unlock(); return ret; } /* Finds/creates a bundle for given flow and if_id * * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. * * xfrm_lookup uses an if_id of 0 by default, and is provided for * compatibility */ struct dst_entry *xfrm_lookup_with_ifid(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, const struct sock *sk, int flags, u32 if_id) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_dst *xdst; struct dst_entry *dst, *route; u16 family = dst_orig->ops->family; u8 dir = XFRM_POLICY_OUT; int i, err, num_pols, num_xfrms = 0, drop_pols = 0; dst = NULL; xdst = NULL; route = NULL; sk = sk_const_to_full_sk(sk); if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { num_pols = 1; pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family, if_id); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) goto dropdst; if (num_pols) { if (num_xfrms <= 0) { drop_pols = num_pols; goto no_transform; } xdst = xfrm_resolve_and_create_bundle( pols, num_pols, fl, family, dst_orig); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); err = PTR_ERR(xdst); if (err == -EREMOTE) goto nopol; goto dropdst; } else if (xdst == NULL) { num_xfrms = 0; drop_pols = num_pols; goto no_transform; } route = xdst->route; } } if (xdst == NULL) { struct xfrm_flo xflo; xflo.dst_orig = dst_orig; xflo.flags = flags; /* To accelerate a bit... */ if (!if_id && ((dst_orig->flags & DST_NOXFRM) || !net->xfrm.policy_count[XFRM_POLICY_OUT])) goto nopol; xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo, if_id); if (xdst == NULL) goto nopol; if (IS_ERR(xdst)) { err = PTR_ERR(xdst); goto dropdst; } num_pols = xdst->num_pols; num_xfrms = xdst->num_xfrms; memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols); route = xdst->route; } dst = &xdst->u.dst; if (route == NULL && num_xfrms > 0) { /* The only case when xfrm_bundle_lookup() returns a * bundle with null route, is when the template could * not be resolved. It means policies are there, but * bundle could not be created, since we don't yet * have the xfrm_state's. We need to wait for KM to * negotiate new SA's or bail out with error.*/ if (net->xfrm.sysctl_larval_drop) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); err = -EREMOTE; goto error; } err = -EAGAIN; XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); goto error; } no_transform: if (num_pols == 0) goto nopol; if ((flags & XFRM_LOOKUP_ICMP) && !(pols[0]->flags & XFRM_POLICY_ICMP)) { err = -ENOENT; goto error; } for (i = 0; i < num_pols; i++) WRITE_ONCE(pols[i]->curlft.use_time, ktime_get_real_seconds()); if (num_xfrms < 0) { /* Prohibit the flow */ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK); err = -EPERM; goto error; } else if (num_xfrms > 0) { /* Flow transformed */ dst_release(dst_orig); } else { /* Flow passes untransformed */ dst_release(dst); dst = dst_orig; } ok: xfrm_pols_put(pols, drop_pols); if (dst && dst->xfrm && dst->xfrm->props.mode == XFRM_MODE_TUNNEL) dst->flags |= DST_XFRM_TUNNEL; return dst; nopol: if ((!dst_orig->dev || !(dst_orig->dev->flags & IFF_LOOPBACK)) && net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { err = -EPERM; goto error; } if (!(flags & XFRM_LOOKUP_ICMP)) { dst = dst_orig; goto ok; } err = -ENOENT; error: dst_release(dst); dropdst: if (!(flags & XFRM_LOOKUP_KEEP_DST_REF)) dst_release(dst_orig); xfrm_pols_put(pols, drop_pols); return ERR_PTR(err); } EXPORT_SYMBOL(xfrm_lookup_with_ifid); /* Main function: finds/creates a bundle for given flow. * * At the moment we eat a raw IP route. Mostly to speed up lookups * on interfaces with disabled IPsec. */ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, const struct sock *sk, int flags) { return xfrm_lookup_with_ifid(net, dst_orig, fl, sk, flags, 0); } EXPORT_SYMBOL(xfrm_lookup); /* Callers of xfrm_lookup_route() must ensure a call to dst_output(). * Otherwise we may send out blackholed packets. */ struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, const struct sock *sk, int flags) { struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, flags | XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_KEEP_DST_REF); if (PTR_ERR(dst) == -EREMOTE) return make_blackhole(net, dst_orig->ops->family, dst_orig); if (IS_ERR(dst)) dst_release(dst_orig); return dst; } EXPORT_SYMBOL(xfrm_lookup_route); static inline int xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { struct sec_path *sp = skb_sec_path(skb); struct xfrm_state *x; if (!sp || idx < 0 || idx >= sp->len) return 0; x = sp->xvec[idx]; if (!x->type->reject) return 0; return x->type->reject(x, skb, fl); } /* When skb is transformed back to its "native" form, we have to * check policy restrictions. At the moment we make this in maximally * stupid way. Shame on me. :-) Of course, connected sockets must * have policy cached at them. */ static inline int xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, unsigned short family, u32 if_id) { if (xfrm_state_kern(x)) return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family); return x->id.proto == tmpl->id.proto && (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) || !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && !(x->props.mode != XFRM_MODE_TRANSPORT && xfrm_state_addr_cmp(tmpl, x, family)) && (if_id == 0 || if_id == x->if_id); } /* * 0 or more than 0 is returned when validation is succeeded (either bypass * because of optional transport mode, or next index of the matched secpath * state with the template. * -1 is returned when no matching template is found. * Otherwise "-2 - errored_index" is returned. */ static inline int xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start, unsigned short family, u32 if_id) { int idx = start; if (tmpl->optional) { if (tmpl->mode == XFRM_MODE_TRANSPORT) return start; } else start = -1; for (; idx < sp->len; idx++) { if (xfrm_state_ok(tmpl, sp->xvec[idx], family, if_id)) return ++idx; if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { if (idx < sp->verified_cnt) { /* Secpath entry previously verified, consider optional and * continue searching */ continue; } if (start == -1) start = -2-idx; break; } } return start; } static void decode_session4(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { struct flowi4 *fl4 = &fl->u.ip4; memset(fl4, 0, sizeof(struct flowi4)); if (reverse) { fl4->saddr = flkeys->addrs.ipv4.dst; fl4->daddr = flkeys->addrs.ipv4.src; fl4->fl4_sport = flkeys->ports.dst; fl4->fl4_dport = flkeys->ports.src; } else { fl4->saddr = flkeys->addrs.ipv4.src; fl4->daddr = flkeys->addrs.ipv4.dst; fl4->fl4_sport = flkeys->ports.src; fl4->fl4_dport = flkeys->ports.dst; } switch (flkeys->basic.ip_proto) { case IPPROTO_GRE: fl4->fl4_gre_key = flkeys->gre.keyid; break; case IPPROTO_ICMP: fl4->fl4_icmp_type = flkeys->icmp.type; fl4->fl4_icmp_code = flkeys->icmp.code; break; } fl4->flowi4_proto = flkeys->basic.ip_proto; fl4->flowi4_tos = flkeys->ip.tos & ~INET_ECN_MASK; } #if IS_ENABLED(CONFIG_IPV6) static void decode_session6(const struct xfrm_flow_keys *flkeys, struct flowi *fl, bool reverse) { struct flowi6 *fl6 = &fl->u.ip6; memset(fl6, 0, sizeof(struct flowi6)); if (reverse) { fl6->saddr = flkeys->addrs.ipv6.dst; fl6->daddr = flkeys->addrs.ipv6.src; fl6->fl6_sport = flkeys->ports.dst; fl6->fl6_dport = flkeys->ports.src; } else { fl6->saddr = flkeys->addrs.ipv6.src; fl6->daddr = flkeys->addrs.ipv6.dst; fl6->fl6_sport = flkeys->ports.src; fl6->fl6_dport = flkeys->ports.dst; } switch (flkeys->basic.ip_proto) { case IPPROTO_GRE: fl6->fl6_gre_key = flkeys->gre.keyid; break; case IPPROTO_ICMPV6: fl6->fl6_icmp_type = flkeys->icmp.type; fl6->fl6_icmp_code = flkeys->icmp.code; break; } fl6->flowi6_proto = flkeys->basic.ip_proto; } #endif int __xfrm_decode_session(struct net *net, struct sk_buff *skb, struct flowi *fl, unsigned int family, int reverse) { struct xfrm_flow_keys flkeys; memset(&flkeys, 0, sizeof(flkeys)); __skb_flow_dissect(net, skb, &xfrm_session_dissector, &flkeys, NULL, 0, 0, 0, FLOW_DISSECTOR_F_STOP_AT_ENCAP); switch (family) { case AF_INET: decode_session4(&flkeys, fl, reverse); break; #if IS_ENABLED(CONFIG_IPV6) case AF_INET6: decode_session6(&flkeys, fl, reverse); break; #endif default: return -EAFNOSUPPORT; } fl->flowi_mark = skb->mark; if (reverse) { fl->flowi_oif = skb->skb_iif; } else { int oif = 0; if (skb_dst(skb) && skb_dst(skb)->dev) oif = skb_dst(skb)->dev->ifindex; fl->flowi_oif = oif; } return security_xfrm_decode_session(skb, &fl->flowi_secid); } EXPORT_SYMBOL(__xfrm_decode_session); static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp) { for (; k < sp->len; k++) { if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { *idxp = k; return 1; } } return 0; } static bool icmp_err_packet(const struct flowi *fl, unsigned short family) { const struct flowi4 *fl4 = &fl->u.ip4; if (family == AF_INET && fl4->flowi4_proto == IPPROTO_ICMP && (fl4->fl4_icmp_type == ICMP_DEST_UNREACH || fl4->fl4_icmp_type == ICMP_TIME_EXCEEDED)) return true; #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) { const struct flowi6 *fl6 = &fl->u.ip6; if (fl6->flowi6_proto == IPPROTO_ICMPV6 && (fl6->fl6_icmp_type == ICMPV6_DEST_UNREACH || fl6->fl6_icmp_type == ICMPV6_PKT_TOOBIG || fl6->fl6_icmp_type == ICMPV6_TIME_EXCEED)) return true; } #endif return false; } static bool xfrm_icmp_flow_decode(struct sk_buff *skb, unsigned short family, const struct flowi *fl, struct flowi *fl1) { bool ret = true; struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC); int hl = family == AF_INET ? (sizeof(struct iphdr) + sizeof(struct icmphdr)) : (sizeof(struct ipv6hdr) + sizeof(struct icmp6hdr)); if (!newskb) return true; if (!pskb_pull(newskb, hl)) goto out; skb_reset_network_header(newskb); if (xfrm_decode_session_reverse(dev_net(skb->dev), newskb, fl1, family) < 0) goto out; fl1->flowi_oif = fl->flowi_oif; fl1->flowi_mark = fl->flowi_mark; fl1->flowi_tos = fl->flowi_tos; nf_nat_decode_session(newskb, fl1, family); ret = false; out: consume_skb(newskb); return ret; } static bool xfrm_selector_inner_icmp_match(struct sk_buff *skb, unsigned short family, const struct xfrm_selector *sel, const struct flowi *fl) { bool ret = false; if (icmp_err_packet(fl, family)) { struct flowi fl1; if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) return ret; ret = xfrm_selector_match(sel, &fl1, family); } return ret; } static inline struct xfrm_policy *xfrm_in_fwd_icmp(struct sk_buff *skb, const struct flowi *fl, unsigned short family, u32 if_id) { struct xfrm_policy *pol = NULL; if (icmp_err_packet(fl, family)) { struct flowi fl1; struct net *net = dev_net(skb->dev); if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) return pol; pol = xfrm_policy_lookup(net, &fl1, family, XFRM_POLICY_FWD, if_id); if (IS_ERR(pol)) pol = NULL; } return pol; } static inline struct dst_entry *xfrm_out_fwd_icmp(struct sk_buff *skb, struct flowi *fl, unsigned short family, struct dst_entry *dst) { if (icmp_err_packet(fl, family)) { struct net *net = dev_net(skb->dev); struct dst_entry *dst2; struct flowi fl1; if (xfrm_icmp_flow_decode(skb, family, fl, &fl1)) return dst; dst_hold(dst); dst2 = xfrm_lookup(net, dst, &fl1, NULL, (XFRM_LOOKUP_QUEUE | XFRM_LOOKUP_ICMP)); if (IS_ERR(dst2)) return dst; if (dst2->xfrm) { dst_release(dst); dst = dst2; } else { dst_release(dst2); } } return dst; } int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); struct xfrm_policy *pol; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols = 0; int xfrm_nr; int pi; int reverse; struct flowi fl; int xerr_idx = -1; const struct xfrm_if_cb *ifcb; struct sec_path *sp; u32 if_id = 0; rcu_read_lock(); ifcb = xfrm_if_get_cb(); if (ifcb) { struct xfrm_if_decode_session_result r; if (ifcb->decode_session(skb, family, &r)) { if_id = r.if_id; net = r.net; } } rcu_read_unlock(); reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; if (__xfrm_decode_session(net, skb, &fl, family, reverse) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); return 0; } nf_nat_decode_session(skb, &fl, family); /* First, check used SA against their selectors. */ sp = skb_sec_path(skb); if (sp) { int i; for (i = sp->len - 1; i >= 0; i--) { struct xfrm_state *x = sp->xvec[i]; int ret = 0; if (!xfrm_selector_match(&x->sel, &fl, family)) { ret = 1; if (x->props.flags & XFRM_STATE_ICMP && xfrm_selector_inner_icmp_match(skb, family, &x->sel, &fl)) ret = 0; if (ret) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); return 0; } } } } pol = NULL; sk = sk_to_full_sk(sk); if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl, family, if_id); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } } if (!pol) pol = xfrm_policy_lookup(net, &fl, family, dir, if_id); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; } if (!pol && dir == XFRM_POLICY_FWD) pol = xfrm_in_fwd_icmp(skb, &fl, family, if_id); if (!pol) { const bool is_crypto_offload = sp && (xfrm_input_state(skb)->xso.type == XFRM_DEV_OFFLOAD_CRYPTO); if (net->xfrm.policy_default[dir] == XFRM_USERPOLICY_BLOCK) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } if (sp && secpath_has_nontransport(sp, 0, &xerr_idx) && !is_crypto_offload) { xfrm_secpath_reject(xerr_idx, skb, &fl); XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS); return 0; } return 1; } /* This lockless write can happen from different cpus. */ WRITE_ONCE(pol->curlft.use_time, ktime_get_real_seconds()); pols[0] = pol; npols++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN, if_id); if (pols[1]) { if (IS_ERR(pols[1])) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); xfrm_pol_put(pols[0]); return 0; } /* This write can happen from different cpus. */ WRITE_ONCE(pols[1]->curlft.use_time, ktime_get_real_seconds()); npols++; } } #endif if (pol->action == XFRM_POLICY_ALLOW) { static struct sec_path dummy; struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; struct xfrm_tmpl **tpp = tp; int ti = 0; int i, k; sp = skb_sec_path(skb); if (!sp) sp = &dummy; for (pi = 0; pi < npols; pi++) { if (pols[pi] != pol && pols[pi]->action != XFRM_POLICY_ALLOW) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); goto reject; } if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto reject_error; } for (i = 0; i < pols[pi]->xfrm_nr; i++) tpp[ti++] = &pols[pi]->xfrm_vec[i]; } xfrm_nr = ti; if (npols > 1) { xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); tpp = stp; } /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. * Upon success, marks secpath entries as having been * verified to allow them to be skipped in future policy * checks (e.g. nested tunnels). */ for (i = xfrm_nr-1, k = 0; i >= 0; i--) { k = xfrm_policy_ok(tpp[i], sp, k, family, if_id); if (k < 0) { if (k < -1) /* "-2 - errored_index" returned */ xerr_idx = -(2+k); XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } } if (secpath_has_nontransport(sp, k, &xerr_idx)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH); goto reject; } xfrm_pols_put(pols, npols); sp->verified_cnt = k; return 1; } XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK); reject: xfrm_secpath_reject(xerr_idx, skb, &fl); reject_error: xfrm_pols_put(pols, npols); return 0; } EXPORT_SYMBOL(__xfrm_policy_check); int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) { struct net *net = dev_net(skb->dev); struct flowi fl; struct dst_entry *dst; int res = 1; if (xfrm_decode_session(net, skb, &fl, family) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } skb_dst_force(skb); if (!skb_dst(skb)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { res = 0; dst = NULL; } if (dst && !dst->xfrm) dst = xfrm_out_fwd_icmp(skb, &fl, family, dst); skb_dst_set(skb, dst); return res; } EXPORT_SYMBOL(__xfrm_route_forward); /* Optimize later using cookies and generation ids. */ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to * get validated by dst_ops->check on every use. We do this * because when a normal route referenced by an XFRM dst is * obsoleted we do not go looking around for all parent * referencing XFRM dsts so that we can invalidate them. It * is just too much work. Instead we make the checks here on * every use. For example: * * XFRM dst A --> IPv4 dst X * * X is the "xdst->route" of A (X is also the "dst->path" of A * in this example). If X is marked obsolete, "A" will not * notice. That's what we are validating here via the * stale_bundle() check. * * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will * be marked on it. * This will force stale_bundle() to fail on any xdst bundle with * this dst linked in it. */ if (dst->obsolete < 0 && !stale_bundle(dst)) return dst; return NULL; } static int stale_bundle(struct dst_entry *dst) { return !xfrm_bundle_ok((struct xfrm_dst *)dst); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) { while ((dst = xfrm_dst_child(dst)) && dst->xfrm && dst->dev == dev) { dst->dev = blackhole_netdev; dev_hold(dst->dev); dev_put(dev); } } EXPORT_SYMBOL(xfrm_dst_ifdown); static void xfrm_link_failure(struct sk_buff *skb) { /* Impossible. Such dst must be popped before reaches point of failure. */ } static void xfrm_negative_advice(struct sock *sk, struct dst_entry *dst) { if (dst->obsolete) sk_dst_reset(sk); } static void xfrm_init_pmtu(struct xfrm_dst **bundle, int nr) { while (nr--) { struct xfrm_dst *xdst = bundle[nr]; u32 pmtu, route_mtu_cached; struct dst_entry *dst; dst = &xdst->u.dst; pmtu = dst_mtu(xfrm_dst_child(dst)); xdst->child_mtu_cached = pmtu; pmtu = xfrm_state_mtu(dst->xfrm, pmtu); route_mtu_cached = dst_mtu(xdst->route); xdst->route_mtu_cached = route_mtu_cached; if (pmtu > route_mtu_cached) pmtu = route_mtu_cached; dst_metric_set(dst, RTAX_MTU, pmtu); } } /* Check that the bundle accepts the flow and its components are * still valid. */ static int xfrm_bundle_ok(struct xfrm_dst *first) { struct xfrm_dst *bundle[XFRM_MAX_DEPTH]; struct dst_entry *dst = &first->u.dst; struct xfrm_dst *xdst; int start_from, nr; u32 mtu; if (!dst_check(xfrm_dst_path(dst), ((struct xfrm_dst *)dst)->path_cookie) || (dst->dev && !netif_running(dst->dev))) return 0; if (dst->flags & DST_XFRM_QUEUE) return 1; start_from = nr = 0; do { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; if (xdst->xfrm_genid != dst->xfrm->genid) return 0; if (xdst->num_pols > 0 && xdst->policy_genid != atomic_read(&xdst->pols[0]->genid)) return 0; bundle[nr++] = xdst; mtu = dst_mtu(xfrm_dst_child(dst)); if (xdst->child_mtu_cached != mtu) { start_from = nr; xdst->child_mtu_cached = mtu; } if (!dst_check(xdst->route, xdst->route_cookie)) return 0; mtu = dst_mtu(xdst->route); if (xdst->route_mtu_cached != mtu) { start_from = nr; xdst->route_mtu_cached = mtu; } dst = xfrm_dst_child(dst); } while (dst->xfrm); if (likely(!start_from)) return 1; xdst = bundle[start_from - 1]; mtu = xdst->child_mtu_cached; while (start_from--) { dst = &xdst->u.dst; mtu = xfrm_state_mtu(dst->xfrm, mtu); if (mtu > xdst->route_mtu_cached) mtu = xdst->route_mtu_cached; dst_metric_set(dst, RTAX_MTU, mtu); if (!start_from) break; xdst = bundle[start_from - 1]; xdst->child_mtu_cached = mtu; } return 1; } static unsigned int xfrm_default_advmss(const struct dst_entry *dst) { return dst_metric_advmss(xfrm_dst_path(dst)); } static unsigned int xfrm_mtu(const struct dst_entry *dst) { unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); return mtu ? : dst_mtu(xfrm_dst_path(dst)); } static const void *xfrm_get_dst_nexthop(const struct dst_entry *dst, const void *daddr) { while (dst->xfrm) { const struct xfrm_state *xfrm = dst->xfrm; dst = xfrm_dst_child(dst); if (xfrm->props.mode == XFRM_MODE_TRANSPORT) continue; if (xfrm->type->flags & XFRM_TYPE_REMOTE_COADDR) daddr = xfrm->coaddr; else if (!(xfrm->type->flags & XFRM_TYPE_LOCAL_COADDR)) daddr = &xfrm->id.daddr; } return daddr; } static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr) { const struct dst_entry *path = xfrm_dst_path(dst); if (!skb) daddr = xfrm_get_dst_nexthop(dst, daddr); return path->ops->neigh_lookup(path, skb, daddr); } static void xfrm_confirm_neigh(const struct dst_entry *dst, const void *daddr) { const struct dst_entry *path = xfrm_dst_path(dst); daddr = xfrm_get_dst_nexthop(dst, daddr); path->ops->confirm_neigh(path, daddr); } int xfrm_policy_register_afinfo(const struct xfrm_policy_afinfo *afinfo, int family) { int err = 0; if (WARN_ON(family >= ARRAY_SIZE(xfrm_policy_afinfo))) return -EAFNOSUPPORT; spin_lock(&xfrm_policy_afinfo_lock); if (unlikely(xfrm_policy_afinfo[family] != NULL)) err = -EEXIST; else { struct dst_ops *dst_ops = afinfo->dst_ops; if (likely(dst_ops->kmem_cachep == NULL)) dst_ops->kmem_cachep = xfrm_dst_cache; if (likely(dst_ops->check == NULL)) dst_ops->check = xfrm_dst_check; if (likely(dst_ops->default_advmss == NULL)) dst_ops->default_advmss = xfrm_default_advmss; if (likely(dst_ops->mtu == NULL)) dst_ops->mtu = xfrm_mtu; if (likely(dst_ops->negative_advice == NULL)) dst_ops->negative_advice = xfrm_negative_advice; if (likely(dst_ops->link_failure == NULL)) dst_ops->link_failure = xfrm_link_failure; if (likely(dst_ops->neigh_lookup == NULL)) dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(!dst_ops->confirm_neigh)) dst_ops->confirm_neigh = xfrm_confirm_neigh; rcu_assign_pointer(xfrm_policy_afinfo[family], afinfo); } spin_unlock(&xfrm_policy_afinfo_lock); return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); void xfrm_policy_unregister_afinfo(const struct xfrm_policy_afinfo *afinfo) { struct dst_ops *dst_ops = afinfo->dst_ops; int i; for (i = 0; i < ARRAY_SIZE(xfrm_policy_afinfo); i++) { if (xfrm_policy_afinfo[i] != afinfo) continue; RCU_INIT_POINTER(xfrm_policy_afinfo[i], NULL); break; } synchronize_rcu(); dst_ops->kmem_cachep = NULL; dst_ops->check = NULL; dst_ops->negative_advice = NULL; dst_ops->link_failure = NULL; } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); void xfrm_if_register_cb(const struct xfrm_if_cb *ifcb) { spin_lock(&xfrm_if_cb_lock); rcu_assign_pointer(xfrm_if_cb, ifcb); spin_unlock(&xfrm_if_cb_lock); } EXPORT_SYMBOL(xfrm_if_register_cb); void xfrm_if_unregister_cb(void) { RCU_INIT_POINTER(xfrm_if_cb, NULL); synchronize_rcu(); } EXPORT_SYMBOL(xfrm_if_unregister_cb); #ifdef CONFIG_XFRM_STATISTICS static int __net_init xfrm_statistics_init(struct net *net) { int rv; net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib); if (!net->mib.xfrm_statistics) return -ENOMEM; rv = xfrm_proc_init(net); if (rv < 0) free_percpu(net->mib.xfrm_statistics); return rv; } static void xfrm_statistics_fini(struct net *net) { xfrm_proc_fini(net); free_percpu(net->mib.xfrm_statistics); } #else static int __net_init xfrm_statistics_init(struct net *net) { return 0; } static void xfrm_statistics_fini(struct net *net) { } #endif static int __net_init xfrm_policy_init(struct net *net) { unsigned int hmask, sz; int dir, err; if (net_eq(net, &init_net)) { xfrm_dst_cache = KMEM_CACHE(xfrm_dst, SLAB_HWCACHE_ALIGN | SLAB_PANIC); err = rhashtable_init(&xfrm_policy_inexact_table, &xfrm_pol_inexact_params); BUG_ON(err); } hmask = 8 - 1; sz = (hmask+1) * sizeof(struct hlist_head); net->xfrm.policy_byidx = xfrm_hash_alloc(sz); if (!net->xfrm.policy_byidx) goto out_byidx; net->xfrm.policy_idx_hmask = hmask; for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy_hash *htab; net->xfrm.policy_count[dir] = 0; net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); htab = &net->xfrm.policy_bydst[dir]; htab->table = xfrm_hash_alloc(sz); if (!htab->table) goto out_bydst; htab->hmask = hmask; htab->dbits4 = 32; htab->sbits4 = 32; htab->dbits6 = 128; htab->sbits6 = 128; } net->xfrm.policy_hthresh.lbits4 = 32; net->xfrm.policy_hthresh.rbits4 = 32; net->xfrm.policy_hthresh.lbits6 = 128; net->xfrm.policy_hthresh.rbits6 = 128; seqlock_init(&net->xfrm.policy_hthresh.lock); INIT_LIST_HEAD(&net->xfrm.policy_all); INIT_LIST_HEAD(&net->xfrm.inexact_bins); INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild); return 0; out_bydst: for (dir--; dir >= 0; dir--) { struct xfrm_policy_hash *htab; htab = &net->xfrm.policy_bydst[dir]; xfrm_hash_free(htab->table, sz); } xfrm_hash_free(net->xfrm.policy_byidx, sz); out_byidx: return -ENOMEM; } static void xfrm_policy_fini(struct net *net) { struct xfrm_pol_inexact_bin *b, *t; unsigned int sz; int dir; flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); #endif xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); WARN_ON(!list_empty(&net->xfrm.policy_all)); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy_hash *htab; WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); htab = &net->xfrm.policy_bydst[dir]; sz = (htab->hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(htab->table)); xfrm_hash_free(htab->table, sz); } sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head); WARN_ON(!hlist_empty(net->xfrm.policy_byidx)); xfrm_hash_free(net->xfrm.policy_byidx, sz); spin_lock_bh(&net->xfrm.xfrm_policy_lock); list_for_each_entry_safe(b, t, &net->xfrm.inexact_bins, inexact_bins) __xfrm_policy_inexact_prune_bin(b, true); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); } static int __net_init xfrm_net_init(struct net *net) { int rv; /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); spin_lock_init(&net->xfrm.xfrm_policy_lock); seqcount_spinlock_init(&net->xfrm.xfrm_policy_hash_generation, &net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); net->xfrm.policy_default[XFRM_POLICY_IN] = XFRM_USERPOLICY_ACCEPT; net->xfrm.policy_default[XFRM_POLICY_FWD] = XFRM_USERPOLICY_ACCEPT; net->xfrm.policy_default[XFRM_POLICY_OUT] = XFRM_USERPOLICY_ACCEPT; rv = xfrm_statistics_init(net); if (rv < 0) goto out_statistics; rv = xfrm_state_init(net); if (rv < 0) goto out_state; rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; rv = xfrm_nat_keepalive_net_init(net); if (rv < 0) goto out_nat_keepalive; return 0; out_nat_keepalive: xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: xfrm_state_fini(net); out_state: xfrm_statistics_fini(net); out_statistics: return rv; } static void __net_exit xfrm_net_exit(struct net *net) { xfrm_nat_keepalive_net_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); xfrm_statistics_fini(net); } static struct pernet_operations __net_initdata xfrm_net_ops = { .init = xfrm_net_init, .exit = xfrm_net_exit, }; static const struct flow_dissector_key xfrm_flow_dissector_keys[] = { { .key_id = FLOW_DISSECTOR_KEY_CONTROL, .offset = offsetof(struct xfrm_flow_keys, control), }, { .key_id = FLOW_DISSECTOR_KEY_BASIC, .offset = offsetof(struct xfrm_flow_keys, basic), }, { .key_id = FLOW_DISSECTOR_KEY_IPV4_ADDRS, .offset = offsetof(struct xfrm_flow_keys, addrs.ipv4), }, { .key_id = FLOW_DISSECTOR_KEY_IPV6_ADDRS, .offset = offsetof(struct xfrm_flow_keys, addrs.ipv6), }, { .key_id = FLOW_DISSECTOR_KEY_PORTS, .offset = offsetof(struct xfrm_flow_keys, ports), }, { .key_id = FLOW_DISSECTOR_KEY_GRE_KEYID, .offset = offsetof(struct xfrm_flow_keys, gre), }, { .key_id = FLOW_DISSECTOR_KEY_IP, .offset = offsetof(struct xfrm_flow_keys, ip), }, { .key_id = FLOW_DISSECTOR_KEY_ICMP, .offset = offsetof(struct xfrm_flow_keys, icmp), }, }; void __init xfrm_init(void) { skb_flow_dissector_init(&xfrm_session_dissector, xfrm_flow_dissector_keys, ARRAY_SIZE(xfrm_flow_dissector_keys)); register_pernet_subsys(&xfrm_net_ops); xfrm_dev_init(); xfrm_input_init(); #ifdef CONFIG_XFRM_ESPINTCP espintcp_init(); #endif register_xfrm_state_bpf(); xfrm_nat_keepalive_init(AF_INET); } #ifdef CONFIG_AUDITSYSCALL static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp, struct audit_buffer *audit_buf) { struct xfrm_sec_ctx *ctx = xp->security; struct xfrm_selector *sel = &xp->selector; if (ctx) audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); switch (sel->family) { case AF_INET: audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); if (sel->prefixlen_s != 32) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4); if (sel->prefixlen_d != 32) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); break; case AF_INET6: audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6); if (sel->prefixlen_s != 128) audit_log_format(audit_buf, " src_prefixlen=%d", sel->prefixlen_s); audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6); if (sel->prefixlen_d != 128) audit_log_format(audit_buf, " dst_prefixlen=%d", sel->prefixlen_d); break; } } void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-add"); if (audit_buf == NULL) return; xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); } EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-delete"); if (audit_buf == NULL) return; xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); } EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete); #endif #ifdef CONFIG_XFRM_MIGRATE static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp, const struct xfrm_selector *sel_tgt) { if (sel_cmp->proto == IPSEC_ULPROTO_ANY) { if (sel_tgt->family == sel_cmp->family && xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr, sel_cmp->family) && xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr, sel_cmp->family) && sel_tgt->prefixlen_d == sel_cmp->prefixlen_d && sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) { return true; } } else { if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) { return true; } } return false; } static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, u8 dir, u8 type, struct net *net, u32 if_id) { struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; u32 priority = ~0U; spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { if ((if_id == 0 || pol->if_id == if_id) && xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; priority = ret->priority; break; } } chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, chain, bydst_inexact_list) { if ((pol->priority >= priority) && ret) break; if ((if_id == 0 || pol->if_id == if_id) && xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { ret = pol; break; } } xfrm_pol_hold(ret); spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t) { int match = 0; if (t->mode == m->mode && t->id.proto == m->proto && (m->reqid == 0 || t->reqid == m->reqid)) { switch (t->mode) { case XFRM_MODE_TUNNEL: case XFRM_MODE_BEET: if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr, m->old_family) && xfrm_addr_equal(&t->saddr, &m->old_saddr, m->old_family)) { match = 1;