8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2020 Facebook */ #include <linux/fs.h> #include <linux/anon_inodes.h> #include <linux/filter.h> #include <linux/bpf.h> #include <linux/rcupdate_trace.h> struct bpf_iter_target_info { struct list_head list; const struct bpf_iter_reg *reg_info; u32 btf_id; /* cached value */ }; struct bpf_iter_link { struct bpf_link link; struct bpf_iter_aux_info aux; struct bpf_iter_target_info *tinfo; }; struct bpf_iter_priv_data { struct bpf_iter_target_info *tinfo; const struct bpf_iter_seq_info *seq_info; struct bpf_prog *prog; u64 session_id; u64 seq_num; bool done_stop; u8 target_private[] __aligned(8); }; static struct list_head targets = LIST_HEAD_INIT(targets); static DEFINE_MUTEX(targets_mutex); /* protect bpf_iter_link changes */ static DEFINE_MUTEX(link_mutex); /* incremented on every opened seq_file */ static atomic64_t session_id; static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, const struct bpf_iter_seq_info *seq_info); static void bpf_iter_inc_seq_num(struct seq_file *seq) { struct bpf_iter_priv_data *iter_priv; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); iter_priv->seq_num++; } static void bpf_iter_dec_seq_num(struct seq_file *seq) { struct bpf_iter_priv_data *iter_priv; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); iter_priv->seq_num--; } static void bpf_iter_done_stop(struct seq_file *seq) { struct bpf_iter_priv_data *iter_priv; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); iter_priv->done_stop = true; } static inline bool bpf_iter_target_support_resched(const struct bpf_iter_target_info *tinfo) { return tinfo->reg_info->feature & BPF_ITER_RESCHED; } static bool bpf_iter_support_resched(struct seq_file *seq) { struct bpf_iter_priv_data *iter_priv; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); return bpf_iter_target_support_resched(iter_priv->tinfo); } /* maximum visited objects before bailing out */ #define MAX_ITER_OBJECTS 1000000 /* bpf_seq_read, a customized and simpler version for bpf iterator. * The following are differences from seq_read(): * . fixed buffer size (PAGE_SIZE) * . assuming NULL ->llseek() * . stop() may call bpf program, handling potential overflow there */ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct seq_file *seq = file->private_data; size_t n, offs, copied = 0; int err = 0, num_objs = 0; bool can_resched; void *p; mutex_lock(&seq->lock); if (!seq->buf) { seq->size = PAGE_SIZE << 3; seq->buf = kvmalloc(seq->size, GFP_KERNEL); if (!seq->buf) { err = -ENOMEM; goto done; } } if (seq->count) { n = min(seq->count, size); err = copy_to_user(buf, seq->buf + seq->from, n); if (err) { err = -EFAULT; goto done; } seq->count -= n; seq->from += n; copied = n; goto done; } seq->from = 0; p = seq->op->start(seq, &seq->index); if (!p) goto stop; if (IS_ERR(p)) { err = PTR_ERR(p); seq->op->stop(seq, p); seq->count = 0; goto done; } err = seq->op->show(seq, p); if (err > 0) { /* object is skipped, decrease seq_num, so next * valid object can reuse the same seq_num. */ bpf_iter_dec_seq_num(seq); seq->count = 0; } else if (err < 0 || seq_has_overflowed(seq)) { if (!err) err = -E2BIG; seq->op->stop(seq, p); seq->count = 0; goto done; } can_resched = bpf_iter_support_resched(seq); while (1) { loff_t pos = seq->index; num_objs++; offs = seq->count; p = seq->op->next(seq, p, &seq->index); if (pos == seq->index) { pr_info_ratelimited("buggy seq_file .next function %ps " "did not updated position index\n", seq->op->next); seq->index++; } if (IS_ERR_OR_NULL(p)) break; /* got a valid next object, increase seq_num */ bpf_iter_inc_seq_num(seq); if (seq->count >= size) break; if (num_objs >= MAX_ITER_OBJECTS) { if (offs == 0) { err = -EAGAIN; seq->op->stop(seq, p); goto done; } break; } err = seq->op->show(seq, p); if (err > 0) { bpf_iter_dec_seq_num(seq); seq->count = offs; } else if (err < 0 || seq_has_overflowed(seq)) { seq->count = offs; if (offs == 0) { if (!err) err = -E2BIG; seq->op->stop(seq, p); goto done; } break; } if (can_resched) cond_resched(); } stop: offs = seq->count; if (IS_ERR(p)) { seq->op->stop(seq, NULL); err = PTR_ERR(p); goto done; } /* bpf program called if !p */ seq->op->stop(seq, p); if (!p) { if (!seq_has_overflowed(seq)) { bpf_iter_done_stop(seq); } else { seq->count = offs; if (offs == 0) { err = -E2BIG; goto done; } } } n = min(seq->count, size); err = copy_to_user(buf, seq->buf, n); if (err) { err = -EFAULT; goto done; } copied = n; seq->count -= n; seq->from = n; done: if (!copied) copied = err; else *ppos += copied; mutex_unlock(&seq->lock); return copied; } static const struct bpf_iter_seq_info * __get_seq_info(struct bpf_iter_link *link) { const struct bpf_iter_seq_info *seq_info; if (link->aux.map) { seq_info = link->aux.map->ops->iter_seq_info; if (seq_info) return seq_info; } return link->tinfo->reg_info->seq_info; } static int iter_open(struct inode *inode, struct file *file) { struct bpf_iter_link *link = inode->i_private; return prepare_seq_file(file, link, __get_seq_info(link)); } static int iter_release(struct inode *inode, struct file *file) { struct bpf_iter_priv_data *iter_priv; struct seq_file *seq; seq = file->private_data; if (!seq) return 0; iter_priv = container_of(seq->private, struct bpf_iter_priv_data, target_private); if (iter_priv->seq_info->fini_seq_private) iter_priv->seq_info->fini_seq_private(seq->private); bpf_prog_put(iter_priv->prog); seq->private = iter_priv; return seq_release_private(inode, file); } const struct file_operations bpf_iter_fops = { .open = iter_open, .read = bpf_seq_read, .release = iter_release, }; /* The argument reg_info will be cached in bpf_iter_target_info. * The common practice is to declare target reg_info as * a const static variable and passed as an argument to * bpf_iter_reg_target(). */ int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) { struct bpf_iter_target_info *tinfo; tinfo = kzalloc(sizeof(*tinfo), GFP_KERNEL); if (!tinfo) return -ENOMEM; tinfo->reg_info = reg_info; INIT_LIST_HEAD(&tinfo->list); mutex_lock(&targets_mutex); list_add(&tinfo->list, &targets); mutex_unlock(&targets_mutex); return 0; } void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) { struct bpf_iter_target_info *tinfo; bool found = false; mutex_lock(&targets_mutex); list_for_each_entry(tinfo, &targets, list) { if (reg_info == tinfo->reg_info) { list_del(&tinfo->list); kfree(tinfo); found = true; break; } } mutex_unlock(&targets_mutex); WARN_ON(found == false); } static void cache_btf_id(struct bpf_iter_target_info *tinfo, struct bpf_prog *prog) { tinfo->btf_id = prog->aux->attach_btf_id; } bool bpf_iter_prog_supported(struct bpf_prog *prog) { const char *attach_fname = prog->aux->attach_func_name; struct bpf_iter_target_info *tinfo = NULL, *iter; u32 prog_btf_id = prog->aux->attach_btf_id; const char *prefix = BPF_ITER_FUNC_PREFIX; int prefix_len = strlen(prefix); if (strncmp(attach_fname, prefix, prefix_len)) return false; mutex_lock(&targets_mutex); list_for_each_entry(iter, &targets, list) { if (iter->btf_id && iter->btf_id == prog_btf_id) { tinfo = iter; break; } if (!strcmp(attach_fname + prefix_len, iter->reg_info->target)) { cache_btf_id(iter, prog); tinfo = iter; break; } } mutex_unlock(&targets_mutex); if (tinfo) { prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; } return tinfo != NULL; } const struct bpf_func_proto * bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { const struct bpf_iter_target_info *tinfo; const struct bpf_func_proto *fn = NULL; mutex_lock(&targets_mutex); list_for_each_entry(tinfo, &targets, list) { if (tinfo->btf_id == prog->aux->attach_btf_id) { const struct bpf_iter_reg *reg_info; reg_info = tinfo->reg_info; if (reg_info->get_func_proto) fn = reg_info->get_func_proto(func_id, prog); break; } } mutex_unlock(&targets_mutex); return fn; } static void bpf_iter_link_release(struct bpf_link *link) { struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); if (iter_link->tinfo->reg_info->detach_target) iter_link->tinfo->reg_info->detach_target(&iter_link->aux); } static void bpf_iter_link_dealloc(struct bpf_link *link) { struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); kfree(iter_link); } static int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog) { int ret = 0; mutex_lock(&link_mutex); if (old_prog && link->prog != old_prog) { ret = -EPERM; goto out_unlock; } if (link->prog->type != new_prog->type || link->prog->expected_attach_type != new_prog->expected_attach_type || link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { ret = -EINVAL; goto out_unlock; } old_prog = xchg(&link->prog, new_prog); bpf_prog_put(old_prog); out_unlock: mutex_unlock(&link_mutex); return ret; } static void bpf_iter_link_show_fdinfo(const struct bpf_link *link, struct seq_file *seq) { struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); bpf_iter_show_fdinfo_t show_fdinfo; seq_printf(seq, "target_name:\t%s\n", iter_link->tinfo->reg_info->target); show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo; if (show_fdinfo) show_fdinfo(&iter_link->aux, seq); } static int bpf_iter_link_fill_link_info(const struct bpf_link *link, struct bpf_link_info *info) { struct bpf_iter_link *iter_link = container_of(link, struct bpf_iter_link, link); char __user *ubuf = u64_to_user_ptr(info->iter.target_name); bpf_iter_fill_link_info_t fill_link_info; u32 ulen = info->iter.target_name_len; const char *target_name; u32 target_len; if (!ulen ^ !ubuf) return -EINVAL; target_name = iter_link->tinfo->reg_info->target; target_len = strlen(target_name); info->iter.target_name_len = target_len + 1; if (ubuf) { if (ulen >= target_len + 1) { if (copy_to_user(ubuf, target_name, target_len + 1)) return -EFAULT; } else { char zero = '\0'; if (copy_to_user(ubuf, target_name, ulen - 1)) return -EFAULT; if (put_user(zero, ubuf + ulen - 1)) return -EFAULT; return -ENOSPC; } } fill_link_info = iter_link->tinfo->reg_info->fill_link_info; if (fill_link_info) return fill_link_info(&iter_link->aux, info); return 0; } static const struct bpf_link_ops bpf_iter_link_lops = { .release = bpf_iter_link_release, .dealloc = bpf_iter_link_dealloc, .update_prog = bpf_iter_link_replace, .show_fdinfo = bpf_iter_link_show_fdinfo, .fill_link_info = bpf_iter_link_fill_link_info, }; bool bpf_link_is_iter(struct bpf_link *link) { return link->ops == &bpf_iter_link_lops; } int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, struct bpf_prog *prog) { struct bpf_iter_target_info *tinfo = NULL, *iter; struct bpf_link_primer link_primer; union bpf_iter_link_info linfo; struct bpf_iter_link *link; u32 prog_btf_id, linfo_len; bpfptr_t ulinfo; int err; if (attr->link_create.target_fd || attr->link_create.flags) return -EINVAL; memset(&linfo, 0, sizeof(union bpf_iter_link_info)); ulinfo = make_bpfptr(attr->link_create.iter_info, uattr.is_kernel); linfo_len = attr->link_create.iter_info_len; if (bpfptr_is_null(ulinfo) ^ !linfo_len) return -EINVAL; if (!bpfptr_is_null(ulinfo)) { err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), linfo_len); if (err) return err; linfo_len = min_t(u32, linfo_len, sizeof(linfo)); if (copy_from_bpfptr(&linfo, ulinfo, linfo_len)) return -EFAULT; } prog_btf_id = prog->aux->attach_btf_id; mutex_lock(&targets_mutex); list_for_each_entry(iter, &targets, list) { if (iter->btf_id == prog_btf_id) { tinfo = iter; break; } } mutex_unlock(&targets_mutex); if (!tinfo) return -ENOENT; /* Only allow sleepable program for resched-able iterator */ if (prog->sleepable && !bpf_iter_target_support_resched(tinfo)) return -EINVAL; link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); if (!link) return -ENOMEM; bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); link->tinfo = tinfo; err = bpf_link_prime(&link->link, &link_primer); if (err) { kfree(link); return err; } if (tinfo->reg_info->attach_target) { err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); if (err) { bpf_link_cleanup(&link_primer); return err; } } return bpf_link_settle(&link_primer); } static void init_seq_meta(struct bpf_iter_priv_data *priv_data, struct bpf_iter_target_info *tinfo, const struct bpf_iter_seq_info *seq_info, struct bpf_prog *prog) { priv_data->tinfo = tinfo; priv_data->seq_info = seq_info; priv_data->prog = prog; priv_data->session_id = atomic64_inc_return(&session_id); priv_data->seq_num = 0; priv_data->done_stop = false; } static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, const struct bpf_iter_seq_info *seq_info) { struct bpf_iter_priv_data *priv_data; struct bpf_iter_target_info *tinfo; struct bpf_prog *prog; u32 total_priv_dsize; struct seq_file *seq; int err = 0; mutex_lock(&link_mutex); prog = link->link.prog; bpf_prog_inc(prog); mutex_unlock(&link_mutex); tinfo = link->tinfo; total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + seq_info->seq_priv_size; priv_data = __seq_open_private(file, seq_info->seq_ops, total_priv_dsize); if (!priv_data) { err = -ENOMEM; goto release_prog; } if (seq_info->init_seq_private) { err = seq_info->init_seq_private(priv_data->target_private, &link->aux); if (err) goto release_seq_file; } init_seq_meta(priv_data, tinfo, seq_info, prog); seq = file->private_data; seq->private = priv_data->target_private; return 0; release_seq_file: seq_release_private(file->f_inode, file); file->private_data = NULL; release_prog: bpf_prog_put(prog); return err; } int bpf_iter_new_fd(struct bpf_link *link) { struct bpf_iter_link *iter_link; struct file *file; unsigned int flags; int err, fd; if (link->ops != &bpf_iter_link_lops) return -EINVAL; flags = O_RDONLY | O_CLOEXEC; fd = get_unused_fd_flags(flags); if (fd < 0) return fd; file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); if (IS_ERR(file)) { err = PTR_ERR(file); goto free_fd; } iter_link = container_of(link, struct bpf_iter_link, link); err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); if (err) goto free_file; fd_install(fd, file); return fd; free_file: fput(file); free_fd: put_unused_fd(fd); return err; } struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) { struct bpf_iter_priv_data *iter_priv; struct seq_file *seq; void *seq_priv; seq = meta->seq; if (seq->file->f_op != &bpf_iter_fops) return NULL; seq_priv = seq->private; iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, target_private); if (in_stop && iter_priv->done_stop) return NULL; meta->session_id = iter_priv->session_id; meta->seq_num = iter_priv->seq_num; return iter_priv->prog; } int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) { struct bpf_run_ctx run_ctx, *old_run_ctx; int ret; if (prog->sleepable) { rcu_read_lock_trace(); migrate_disable(); might_fault(); old_run_ctx = bpf_set_run_ctx(&run_ctx); ret = bpf_prog_run(prog, ctx); bpf_reset_run_ctx(old_run_ctx); migrate_enable(); rcu_read_unlock_trace(); } else { rcu_read_lock(); migrate_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx); ret = bpf_prog_run(prog, ctx); bpf_reset_run_ctx(old_run_ctx); migrate_enable(); rcu_read_unlock(); } /* bpf program can only return 0 or 1: * 0 : okay * 1 : retry the same object * The bpf_iter_run_prog() return value * will be seq_ops->show() return value. */ return ret == 0 ? 0 : -EAGAIN; } BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn, void *, callback_ctx, u64, flags) { return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags); } const struct bpf_func_proto bpf_for_each_map_elem_proto = { .func = bpf_for_each_map_elem, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_FUNC, .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx, u64, flags) { bpf_callback_t callback = (bpf_callback_t)callback_fn; u64 ret; u32 i; /* Note: these safety checks are also verified when bpf_loop * is inlined, be careful to modify this code in sync. See * function verifier.c:inline_bpf_loop. */ if (flags) return -EINVAL; if (nr_loops > BPF_MAX_LOOPS) return -E2BIG; for (i = 0; i < nr_loops; i++) { ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0); /* return value: 0 - continue, 1 - stop and return */ if (ret) return i + 1; } return i; } const struct bpf_func_proto bpf_loop_proto = { .func = bpf_loop, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, .arg2_type = ARG_PTR_TO_FUNC, .arg3_type = ARG_PTR_TO_STACK_OR_NULL, .arg4_type = ARG_ANYTHING, }; struct bpf_iter_num_kern { int cur; /* current value, inclusive */ int end; /* final value, exclusive */ } __aligned(8); __bpf_kfunc_start_defs(); __bpf_kfunc int bpf_iter_num_new(struct bpf_iter_num *it, int start, int end) { struct bpf_iter_num_kern *s = (void *)it; BUILD_BUG_ON(sizeof(struct bpf_iter_num_kern) != sizeof(struct bpf_iter_num)); BUILD_BUG_ON(__alignof__(struct bpf_iter_num_kern) != __alignof__(struct bpf_iter_num)); /* start == end is legit, it's an empty range and we'll just get NULL * on first (and any subsequent) bpf_iter_num_next() call */ if (start > end) { s->cur = s->end = 0; return -EINVAL; } /* avoid overflows, e.g., if start == INT_MIN and end == INT_MAX */ if ((s64)end - (s64)start > BPF_MAX_LOOPS) { s->cur = s->end = 0; return -E2BIG; } /* user will call bpf_iter_num_next() first, * which will set s->cur to exactly start value; * underflow shouldn't matter */ s->cur = start - 1; s->end = end; return 0; } __bpf_kfunc int *bpf_iter_num_next(struct bpf_iter_num* it) { struct bpf_iter_num_kern *s = (void *)it; /* check failed initialization or if we are done (same behavior); * need to be careful about overflow, so convert to s64 for checks, * e.g., if s->cur == s->end == INT_MAX, we can't just do * s->cur + 1 >= s->end */ if ((s64)(s->cur + 1) >= s->end) { s->cur = s->end = 0; return NULL; } s->cur++; return &s->cur; } __bpf_kfunc void bpf_iter_num_destroy(struct bpf_iter_num *it) { struct bpf_iter_num_kern *s = (void *)it; s->cur = s->end = 0; } __bpf_kfunc_end_defs(); |
223 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PIPE_FS_I_H #define _LINUX_PIPE_FS_I_H #define PIPE_DEF_BUFFERS 16 #define PIPE_BUF_FLAG_LRU 0x01 /* page is on the LRU */ #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ #define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ #define PIPE_BUF_FLAG_PACKET 0x08 /* read() as a packet */ #define PIPE_BUF_FLAG_CAN_MERGE 0x10 /* can merge buffers */ #define PIPE_BUF_FLAG_WHOLE 0x20 /* read() must return entire buffer or error */ #ifdef CONFIG_WATCH_QUEUE #define PIPE_BUF_FLAG_LOSS 0x40 /* Message loss happened after this buffer */ #endif /** * struct pipe_buffer - a linux kernel pipe buffer * @page: the page containing the data for the pipe buffer * @offset: offset of data inside the @page * @len: length of data inside the @page * @ops: operations associated with this buffer. See @pipe_buf_operations. * @flags: pipe buffer flags. See above. * @private: private data owned by the ops. **/ struct pipe_buffer { struct page *page; unsigned int offset, len; const struct pipe_buf_operations *ops; unsigned int flags; unsigned long private; }; /** * struct pipe_inode_info - a linux kernel pipe * @mutex: mutex protecting the whole thing * @rd_wait: reader wait point in case of empty pipe * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) * @nr_accounted: The amount this pipe accounts for in user->pipe_bufs * @tmp_page: cached released page * @readers: number of current readers of this pipe * @writers: number of current writers of this pipe * @files: number of struct file referring this pipe (protected by ->i_lock) * @r_counter: reader counter * @w_counter: writer counter * @poll_usage: is this pipe used for epoll, which has crazy wakeups? * @fasync_readers: reader side fasync * @fasync_writers: writer side fasync * @bufs: the circular array of pipe buffers * @user: the user who created this pipe * @watch_queue: If this pipe is a watch_queue, this is the stuff for that **/ struct pipe_inode_info { struct mutex mutex; wait_queue_head_t rd_wait, wr_wait; unsigned int head; unsigned int tail; unsigned int max_usage; unsigned int ring_size; unsigned int nr_accounted; unsigned int readers; unsigned int writers; unsigned int files; unsigned int r_counter; unsigned int w_counter; bool poll_usage; #ifdef CONFIG_WATCH_QUEUE bool note_loss; #endif struct page *tmp_page; struct fasync_struct *fasync_readers; struct fasync_struct *fasync_writers; struct pipe_buffer *bufs; struct user_struct *user; #ifdef CONFIG_WATCH_QUEUE struct watch_queue *watch_queue; #endif }; /* * Note on the nesting of these functions: * * ->confirm() * ->try_steal() * * That is, ->try_steal() must be called on a confirmed buffer. See below for * the meaning of each operation. Also see the kerneldoc in fs/pipe.c for the * pipe and generic variants of these hooks. */ struct pipe_buf_operations { /* * ->confirm() verifies that the data in the pipe buffer is there * and that the contents are good. If the pages in the pipe belong * to a file system, we may need to wait for IO completion in this * hook. Returns 0 for good, or a negative error value in case of * error. If not present all pages are considered good. */ int (*confirm)(struct pipe_inode_info *, struct pipe_buffer *); /* * When the contents of this pipe buffer has been completely * consumed by a reader, ->release() is called. */ void (*release)(struct pipe_inode_info *, struct pipe_buffer *); /* * Attempt to take ownership of the pipe buffer and its contents. * ->try_steal() returns %true for success, in which case the contents * of the pipe (the buf->page) is locked and now completely owned by the * caller. The page may then be transferred to a different mapping, the * most often used case is insertion into different file address space * cache. */ bool (*try_steal)(struct pipe_inode_info *, struct pipe_buffer *); /* * Get a reference to the pipe buffer. */ bool (*get)(struct pipe_inode_info *, struct pipe_buffer *); }; /** * pipe_has_watch_queue - Check whether the pipe is a watch_queue, * i.e. it was created with O_NOTIFICATION_PIPE * @pipe: The pipe to check * * Return: true if pipe is a watch queue, false otherwise. */ static inline bool pipe_has_watch_queue(const struct pipe_inode_info *pipe) { #ifdef CONFIG_WATCH_QUEUE return pipe->watch_queue != NULL; #else return false; #endif } /** * pipe_empty - Return true if the pipe is empty * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ static inline bool pipe_empty(unsigned int head, unsigned int tail) { return head == tail; } /** * pipe_occupancy - Return number of slots used in the pipe * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer */ static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail) { return head - tail; } /** * pipe_full - Return true if the pipe is full * @head: The pipe ring head pointer * @tail: The pipe ring tail pointer * @limit: The maximum amount of slots available. */ static inline bool pipe_full(unsigned int head, unsigned int tail, unsigned int limit) { return pipe_occupancy(head, tail) >= limit; } /** * pipe_buf - Return the pipe buffer for the specified slot in the pipe ring * @pipe: The pipe to access * @slot: The slot of interest */ static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, unsigned int slot) { return &pipe->bufs[slot & (pipe->ring_size - 1)]; } /** * pipe_head_buf - Return the pipe buffer at the head of the pipe ring * @pipe: The pipe to access */ static inline struct pipe_buffer *pipe_head_buf(const struct pipe_inode_info *pipe) { return pipe_buf(pipe, pipe->head); } /** * pipe_buf_get - get a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to get a reference to * * Return: %true if the reference was successfully obtained. */ static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { return buf->ops->get(pipe, buf); } /** * pipe_buf_release - put a reference to a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to put a reference to */ static inline void pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { const struct pipe_buf_operations *ops = buf->ops; buf->ops = NULL; ops->release(pipe, buf); } /** * pipe_buf_confirm - verify contents of the pipe buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to confirm */ static inline int pipe_buf_confirm(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { if (!buf->ops->confirm) return 0; return buf->ops->confirm(pipe, buf); } /** * pipe_buf_try_steal - attempt to take ownership of a pipe_buffer * @pipe: the pipe that the buffer belongs to * @buf: the buffer to attempt to steal */ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { if (!buf->ops->try_steal) return false; return buf->ops->try_steal(pipe, buf); } static inline void pipe_discard_from(struct pipe_inode_info *pipe, unsigned int old_head) { unsigned int mask = pipe->ring_size - 1; while (pipe->head > old_head) pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]); } /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual memory allocation, whereas PIPE_BUF makes atomicity guarantees. */ #define PIPE_SIZE PAGE_SIZE /* Pipe lock and unlock operations */ void pipe_lock(struct pipe_inode_info *); void pipe_unlock(struct pipe_inode_info *); void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *); /* Wait for a pipe to be readable/writable while dropping the pipe lock */ void pipe_wait_readable(struct pipe_inode_info *); void pipe_wait_writable(struct pipe_inode_info *); struct pipe_inode_info *alloc_pipe_info(void); void free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); bool generic_pipe_buf_try_steal(struct pipe_inode_info *, struct pipe_buffer *); void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; unsigned long account_pipe_buffers(struct user_struct *user, unsigned long old, unsigned long new); bool too_many_pipe_buffers_soft(unsigned long user_bufs); bool too_many_pipe_buffers_hard(unsigned long user_bufs); bool pipe_is_unprivileged_user(void); /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots); long pipe_fcntl(struct file *, unsigned int, unsigned int arg); struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); int create_pipe_files(struct file **, int); unsigned int round_pipe_size(unsigned int size); #endif |
1 2 2 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 | // SPDX-License-Identifier: GPL-2.0-or-later /* * HID driver for N-Trig touchscreens * * Copyright (c) 2008-2010 Rafi Rubin * Copyright (c) 2009-2010 Stephane Chatty */ /* */ #include <linux/device.h> #include <linux/hid.h> #include <linux/usb.h> #include "usbhid/usbhid.h" #include <linux/module.h> #include <linux/slab.h> #include "hid-ids.h" #define NTRIG_DUPLICATE_USAGES 0x001 static unsigned int min_width; module_param(min_width, uint, 0644); MODULE_PARM_DESC(min_width, "Minimum touch contact width to accept."); static unsigned int min_height; module_param(min_height, uint, 0644); MODULE_PARM_DESC(min_height, "Minimum touch contact height to accept."); static unsigned int activate_slack = 1; module_param(activate_slack, uint, 0644); MODULE_PARM_DESC(activate_slack, "Number of touch frames to ignore at " "the start of touch input."); static unsigned int deactivate_slack = 4; module_param(deactivate_slack, uint, 0644); MODULE_PARM_DESC(deactivate_slack, "Number of empty frames to ignore before " "deactivating touch."); static unsigned int activation_width = 64; module_param(activation_width, uint, 0644); MODULE_PARM_DESC(activation_width, "Width threshold to immediately start " "processing touch events."); static unsigned int activation_height = 32; module_param(activation_height, uint, 0644); MODULE_PARM_DESC(activation_height, "Height threshold to immediately start " "processing touch events."); struct ntrig_data { /* Incoming raw values for a single contact */ __u16 x, y, w, h; __u16 id; bool tipswitch; bool confidence; bool first_contact_touch; bool reading_mt; __u8 mt_footer[4]; __u8 mt_foot_count; /* The current activation state. */ __s8 act_state; /* Empty frames to ignore before recognizing the end of activity */ __s8 deactivate_slack; /* Frames to ignore before acknowledging the start of activity */ __s8 activate_slack; /* Minimum size contact to accept */ __u16 min_width; __u16 min_height; /* Threshold to override activation slack */ __u16 activation_width; __u16 activation_height; __u16 sensor_logical_width; __u16 sensor_logical_height; __u16 sensor_physical_width; __u16 sensor_physical_height; }; /* * This function converts the 4 byte raw firmware code into * a string containing 5 comma separated numbers. */ static int ntrig_version_string(unsigned char *raw, char *buf) { __u8 a = (raw[1] & 0x0e) >> 1; __u8 b = (raw[0] & 0x3c) >> 2; __u8 c = ((raw[0] & 0x03) << 3) | ((raw[3] & 0xe0) >> 5); __u8 d = ((raw[3] & 0x07) << 3) | ((raw[2] & 0xe0) >> 5); __u8 e = raw[2] & 0x07; /* * As yet unmapped bits: * 0b11000000 0b11110001 0b00011000 0b00011000 */ return sprintf(buf, "%u.%u.%u.%u.%u", a, b, c, d, e); } static inline int ntrig_get_mode(struct hid_device *hdev) { struct hid_report *report = hdev->report_enum[HID_FEATURE_REPORT]. report_id_hash[0x0d]; if (!report || report->maxfield < 1 || report->field[0]->report_count < 1) return -EINVAL; hid_hw_request(hdev, report, HID_REQ_GET_REPORT); hid_hw_wait(hdev); return (int)report->field[0]->value[0]; } static inline void ntrig_set_mode(struct hid_device *hdev, const int mode) { struct hid_report *report; __u8 mode_commands[4] = { 0xe, 0xf, 0x1b, 0x10 }; if (mode < 0 || mode > 3) return; report = hdev->report_enum[HID_FEATURE_REPORT]. report_id_hash[mode_commands[mode]]; if (!report) return; hid_hw_request(hdev, report, HID_REQ_GET_REPORT); } static void ntrig_report_version(struct hid_device *hdev) { int ret; char buf[20]; struct usb_device *usb_dev = hid_to_usb_dev(hdev); unsigned char *data = kmalloc(8, GFP_KERNEL); if (!data) goto err_free; ret = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), USB_REQ_CLEAR_FEATURE, USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN, 0x30c, 1, data, 8, USB_CTRL_SET_TIMEOUT); if (ret == 8) { ret = ntrig_version_string(&data[2], buf); hid_info(hdev, "Firmware version: %s (%02x%02x %02x%02x)\n", buf, data[2], data[3], data[4], data[5]); } err_free: kfree(data); } static ssize_t show_phys_width(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); return sprintf(buf, "%d\n", nd->sensor_physical_width); } static DEVICE_ATTR(sensor_physical_width, S_IRUGO, show_phys_width, NULL); static ssize_t show_phys_height(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); return sprintf(buf, "%d\n", nd->sensor_physical_height); } static DEVICE_ATTR(sensor_physical_height, S_IRUGO, show_phys_height, NULL); static ssize_t show_log_width(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); return sprintf(buf, "%d\n", nd->sensor_logical_width); } static DEVICE_ATTR(sensor_logical_width, S_IRUGO, show_log_width, NULL); static ssize_t show_log_height(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); return sprintf(buf, "%d\n", nd->sensor_logical_height); } static DEVICE_ATTR(sensor_logical_height, S_IRUGO, show_log_height, NULL); static ssize_t show_min_width(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); return sprintf(buf, "%d\n", nd->min_width * nd->sensor_physical_width / nd->sensor_logical_width); } static ssize_t set_min_width(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); unsigned long val; if (kstrtoul(buf, 0, &val)) return -EINVAL; if (val > nd->sensor_physical_width) return -EINVAL; nd->min_width = val * nd->sensor_logical_width / nd->sensor_physical_width; return count; } static DEVICE_ATTR(min_width, S_IWUSR | S_IRUGO, show_min_width, set_min_width); static ssize_t show_min_height(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); return sprintf(buf, "%d\n", nd->min_height * nd->sensor_physical_height / nd->sensor_logical_height); } static ssize_t set_min_height(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); unsigned long val; if (kstrtoul(buf, 0, &val)) return -EINVAL; if (val > nd->sensor_physical_height) return -EINVAL; nd->min_height = val * nd->sensor_logical_height / nd->sensor_physical_height; return count; } static DEVICE_ATTR(min_height, S_IWUSR | S_IRUGO, show_min_height, set_min_height); static ssize_t show_activate_slack(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); return sprintf(buf, "%d\n", nd->activate_slack); } static ssize_t set_activate_slack(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); unsigned long val; if (kstrtoul(buf, 0, &val)) return -EINVAL; if (val > 0x7f) return -EINVAL; nd->activate_slack = val; return count; } static DEVICE_ATTR(activate_slack, S_IWUSR | S_IRUGO, show_activate_slack, set_activate_slack); static ssize_t show_activation_width(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); return sprintf(buf, "%d\n", nd->activation_width * nd->sensor_physical_width / nd->sensor_logical_width); } static ssize_t set_activation_width(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); unsigned long val; if (kstrtoul(buf, 0, &val)) return -EINVAL; if (val > nd->sensor_physical_width) return -EINVAL; nd->activation_width = val * nd->sensor_logical_width / nd->sensor_physical_width; return count; } static DEVICE_ATTR(activation_width, S_IWUSR | S_IRUGO, show_activation_width, set_activation_width); static ssize_t show_activation_height(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); return sprintf(buf, "%d\n", nd->activation_height * nd->sensor_physical_height / nd->sensor_logical_height); } static ssize_t set_activation_height(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); unsigned long val; if (kstrtoul(buf, 0, &val)) return -EINVAL; if (val > nd->sensor_physical_height) return -EINVAL; nd->activation_height = val * nd->sensor_logical_height / nd->sensor_physical_height; return count; } static DEVICE_ATTR(activation_height, S_IWUSR | S_IRUGO, show_activation_height, set_activation_height); static ssize_t show_deactivate_slack(struct device *dev, struct device_attribute *attr, char *buf) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); return sprintf(buf, "%d\n", -nd->deactivate_slack); } static ssize_t set_deactivate_slack(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct hid_device *hdev = to_hid_device(dev); struct ntrig_data *nd = hid_get_drvdata(hdev); unsigned long val; if (kstrtoul(buf, 0, &val)) return -EINVAL; /* * No more than 8 terminal frames have been observed so far * and higher slack is highly likely to leave the single * touch emulation stuck down. */ if (val > 7) return -EINVAL; nd->deactivate_slack = -val; return count; } static DEVICE_ATTR(deactivate_slack, S_IWUSR | S_IRUGO, show_deactivate_slack, set_deactivate_slack); static struct attribute *sysfs_attrs[] = { &dev_attr_sensor_physical_width.attr, &dev_attr_sensor_physical_height.attr, &dev_attr_sensor_logical_width.attr, &dev_attr_sensor_logical_height.attr, &dev_attr_min_height.attr, &dev_attr_min_width.attr, &dev_attr_activate_slack.attr, &dev_attr_activation_width.attr, &dev_attr_activation_height.attr, &dev_attr_deactivate_slack.attr, NULL }; static const struct attribute_group ntrig_attribute_group = { .attrs = sysfs_attrs }; /* * this driver is aimed at two firmware versions in circulation: * - dual pen/finger single touch * - finger multitouch, pen not working */ static int ntrig_input_mapping(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { struct ntrig_data *nd = hid_get_drvdata(hdev); /* No special mappings needed for the pen and single touch */ if (field->physical) return 0; switch (usage->hid & HID_USAGE_PAGE) { case HID_UP_GENDESK: switch (usage->hid) { case HID_GD_X: hid_map_usage(hi, usage, bit, max, EV_ABS, ABS_MT_POSITION_X); input_set_abs_params(hi->input, ABS_X, field->logical_minimum, field->logical_maximum, 0, 0); if (!nd->sensor_logical_width) { nd->sensor_logical_width = field->logical_maximum - field->logical_minimum; nd->sensor_physical_width = field->physical_maximum - field->physical_minimum; nd->activation_width = activation_width * nd->sensor_logical_width / nd->sensor_physical_width; nd->min_width = min_width * nd->sensor_logical_width / nd->sensor_physical_width; } return 1; case HID_GD_Y: hid_map_usage(hi, usage, bit, max, EV_ABS, ABS_MT_POSITION_Y); input_set_abs_params(hi->input, ABS_Y, field->logical_minimum, field->logical_maximum, 0, 0); if (!nd->sensor_logical_height) { nd->sensor_logical_height = field->logical_maximum - field->logical_minimum; nd->sensor_physical_height = field->physical_maximum - field->physical_minimum; nd->activation_height = activation_height * nd->sensor_logical_height / nd->sensor_physical_height; nd->min_height = min_height * nd->sensor_logical_height / nd->sensor_physical_height; } return 1; } return 0; case HID_UP_DIGITIZER: switch (usage->hid) { /* we do not want to map these for now */ case HID_DG_CONTACTID: /* Not trustworthy, squelch for now */ case HID_DG_INPUTMODE: case HID_DG_DEVICEINDEX: case HID_DG_CONTACTMAX: return -1; /* width/height mapped on TouchMajor/TouchMinor/Orientation */ case HID_DG_WIDTH: hid_map_usage(hi, usage, bit, max, EV_ABS, ABS_MT_TOUCH_MAJOR); return 1; case HID_DG_HEIGHT: hid_map_usage(hi, usage, bit, max, EV_ABS, ABS_MT_TOUCH_MINOR); input_set_abs_params(hi->input, ABS_MT_ORIENTATION, 0, 1, 0, 0); return 1; } return 0; case 0xff000000: /* we do not want to map these: no input-oriented meaning */ return -1; } return 0; } static int ntrig_input_mapped(struct hid_device *hdev, struct hid_input *hi, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max) { /* No special mappings needed for the pen and single touch */ if (field->physical) return 0; if (usage->type == EV_KEY || usage->type == EV_REL || usage->type == EV_ABS) clear_bit(usage->code, *bit); return 0; } /* * this function is called upon all reports * so that we can filter contact point information, * decide whether we are in multi or single touch mode * and call input_mt_sync after each point if necessary */ static int ntrig_event (struct hid_device *hid, struct hid_field *field, struct hid_usage *usage, __s32 value) { struct ntrig_data *nd = hid_get_drvdata(hid); struct input_dev *input; /* Skip processing if not a claimed input */ if (!(hid->claimed & HID_CLAIMED_INPUT)) goto not_claimed_input; /* This function is being called before the structures are fully * initialized */ if(!(field->hidinput && field->hidinput->input)) return -EINVAL; input = field->hidinput->input; /* No special handling needed for the pen */ if (field->application == HID_DG_PEN) return 0; switch (usage->hid) { case 0xff000001: /* Tag indicating the start of a multitouch group */ nd->reading_mt = true; nd->first_contact_touch = false; break; case HID_DG_TIPSWITCH: nd->tipswitch = value; /* Prevent emission of touch until validated */ return 1; case HID_DG_CONFIDENCE: nd->confidence = value; break; case HID_GD_X: nd->x = value; /* Clear the contact footer */ nd->mt_foot_count = 0; break; case HID_GD_Y: nd->y = value; break; case HID_DG_CONTACTID: nd->id = value; break; case HID_DG_WIDTH: nd->w = value; break; case HID_DG_HEIGHT: nd->h = value; /* * when in single touch mode, this is the last * report received in a finger event. We want * to emit a normal (X, Y) position */ if (!nd->reading_mt) { /* * TipSwitch indicates the presence of a * finger in single touch mode. */ input_report_key(input, BTN_TOUCH, nd->tipswitch); input_report_key(input, BTN_TOOL_DOUBLETAP, nd->tipswitch); input_event(input, EV_ABS, ABS_X, nd->x); input_event(input, EV_ABS, ABS_Y, nd->y); } break; case 0xff000002: /* * we receive this when the device is in multitouch * mode. The first of the three values tagged with * this usage tells if the contact point is real * or a placeholder */ /* Shouldn't get more than 4 footer packets, so skip */ if (nd->mt_foot_count >= 4) break; nd->mt_footer[nd->mt_foot_count++] = value; /* if the footer isn't complete break */ if (nd->mt_foot_count != 4) break; /* Pen activity signal. */ if (nd->mt_footer[2]) { /* * When the pen deactivates touch, we see a * bogus frame with ContactCount > 0. * We can * save a bit of work by ensuring act_state < 0 * even if deactivation slack is turned off. */ nd->act_state = deactivate_slack - 1; nd->confidence = false; break; } /* * The first footer value indicates the presence of a * finger. */ if (nd->mt_footer[0]) { /* * We do not want to process contacts under * the size threshold, but do not want to * ignore them for activation state */ if (nd->w < nd->min_width || nd->h < nd->min_height) nd->confidence = false; } else break; if (nd->act_state > 0) { /* * Contact meets the activation size threshold */ if (nd->w >= nd->activation_width && nd->h >= nd->activation_height) { if (nd->id) /* * first contact, activate now */ nd->act_state = 0; else { /* * avoid corrupting this frame * but ensure next frame will * be active */ nd->act_state = 1; break; } } else /* * Defer adjusting the activation state * until the end of the frame. */ break; } /* Discarding this contact */ if (!nd->confidence) break; /* emit a normal (X, Y) for the first point only */ if (nd->id == 0) { /* * TipSwitch is superfluous in multitouch * mode. The footer events tell us * if there is a finger on the screen or * not. */ nd->first_contact_touch = nd->confidence; input_event(input, EV_ABS, ABS_X, nd->x); input_event(input, EV_ABS, ABS_Y, nd->y); } /* Emit MT events */ input_event(input, EV_ABS, ABS_MT_POSITION_X, nd->x); input_event(input, EV_ABS, ABS_MT_POSITION_Y, nd->y); /* * Translate from height and width to size * and orientation. */ if (nd->w > nd->h) { input_event(input, EV_ABS, ABS_MT_ORIENTATION, 1); input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, nd->w); input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, nd->h); } else { input_event(input, EV_ABS, ABS_MT_ORIENTATION, 0); input_event(input, EV_ABS, ABS_MT_TOUCH_MAJOR, nd->h); input_event(input, EV_ABS, ABS_MT_TOUCH_MINOR, nd->w); } input_mt_sync(field->hidinput->input); break; case HID_DG_CONTACTCOUNT: /* End of a multitouch group */ if (!nd->reading_mt) /* Just to be sure */ break; nd->reading_mt = false; /* * Activation state machine logic: * * Fundamental states: * state > 0: Inactive * state <= 0: Active * state < -deactivate_slack: * Pen termination of touch * * Specific values of interest * state == activate_slack * no valid input since the last reset * * state == 0 * general operational state * * state == -deactivate_slack * read sufficient empty frames to accept * the end of input and reset */ if (nd->act_state > 0) { /* Currently inactive */ if (value) /* * Consider each live contact as * evidence of intentional activity. */ nd->act_state = (nd->act_state > value) ? nd->act_state - value : 0; else /* * Empty frame before we hit the * activity threshold, reset. */ nd->act_state = nd->activate_slack; /* * Entered this block inactive and no * coordinates sent this frame, so hold off * on button state. */ break; } else { /* Currently active */ if (value && nd->act_state >= nd->deactivate_slack) /* * Live point: clear accumulated * deactivation count. */ nd->act_state = 0; else if (nd->act_state <= nd->deactivate_slack) /* * We've consumed the deactivation * slack, time to deactivate and reset. */ nd->act_state = nd->activate_slack; else { /* Move towards deactivation */ nd->act_state--; break; } } if (nd->first_contact_touch && nd->act_state <= 0) { /* * Check to see if we're ready to start * emitting touch events. * * Note: activation slack will decrease over * the course of the frame, and it will be * inconsistent from the start to the end of * the frame. However if the frame starts * with slack, first_contact_touch will still * be 0 and we will not get to this point. */ input_report_key(input, BTN_TOOL_DOUBLETAP, 1); input_report_key(input, BTN_TOUCH, 1); } else { input_report_key(input, BTN_TOOL_DOUBLETAP, 0); input_report_key(input, BTN_TOUCH, 0); } break; default: /* fall-back to the generic hidinput handling */ return 0; } not_claimed_input: /* we have handled the hidinput part, now remains hiddev */ if ((hid->claimed & HID_CLAIMED_HIDDEV) && hid->hiddev_hid_event) hid->hiddev_hid_event(hid, field, usage, value); return 1; } static int ntrig_input_configured(struct hid_device *hid, struct hid_input *hidinput) { struct input_dev *input = hidinput->input; if (hidinput->report->maxfield < 1) return 0; switch (hidinput->report->field[0]->application) { case HID_DG_PEN: input->name = "N-Trig Pen"; break; case HID_DG_TOUCHSCREEN: /* These keys are redundant for fingers, clear them * to prevent incorrect identification */ __clear_bit(BTN_TOOL_PEN, input->keybit); __clear_bit(BTN_TOOL_FINGER, input->keybit); __clear_bit(BTN_0, input->keybit); __set_bit(BTN_TOOL_DOUBLETAP, input->keybit); /* * The physical touchscreen (single touch) * input has a value for physical, whereas * the multitouch only has logical input * fields. */ input->name = (hidinput->report->field[0]->physical) ? "N-Trig Touchscreen" : "N-Trig MultiTouch"; break; } return 0; } static int ntrig_probe(struct hid_device *hdev, const struct hid_device_id *id) { int ret; struct ntrig_data *nd; struct hid_report *report; if (id->driver_data) hdev->quirks |= HID_QUIRK_MULTI_INPUT | HID_QUIRK_NO_INIT_REPORTS; nd = kmalloc(sizeof(struct ntrig_data), GFP_KERNEL); if (!nd) { hid_err(hdev, "cannot allocate N-Trig data\n"); return -ENOMEM; } nd->reading_mt = false; nd->min_width = 0; nd->min_height = 0; nd->activate_slack = activate_slack; nd->act_state = activate_slack; nd->deactivate_slack = -deactivate_slack; nd->sensor_logical_width = 1; nd->sensor_logical_height = 1; nd->sensor_physical_width = 1; nd->sensor_physical_height = 1; hid_set_drvdata(hdev, nd); ret = hid_parse(hdev); if (ret) { hid_err(hdev, "parse failed\n"); goto err_free; } ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT & ~HID_CONNECT_FF); if (ret) { hid_err(hdev, "hw start failed\n"); goto err_free; } /* This is needed for devices with more recent firmware versions */ report = hdev->report_enum[HID_FEATURE_REPORT].report_id_hash[0x0a]; if (report) { /* Let the device settle to ensure the wakeup message gets * through */ hid_hw_wait(hdev); hid_hw_request(hdev, report, HID_REQ_GET_REPORT); /* * Sanity check: if the current mode is invalid reset it to * something reasonable. */ if (ntrig_get_mode(hdev) >= 4) ntrig_set_mode(hdev, 3); } ntrig_report_version(hdev); ret = sysfs_create_group(&hdev->dev.kobj, &ntrig_attribute_group); if (ret) hid_err(hdev, "cannot create sysfs group\n"); return 0; err_free: kfree(nd); return ret; } static void ntrig_remove(struct hid_device *hdev) { sysfs_remove_group(&hdev->dev.kobj, &ntrig_attribute_group); hid_hw_stop(hdev); kfree(hid_get_drvdata(hdev)); } static const struct hid_device_id ntrig_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_1), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_2), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_3), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_4), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_5), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_6), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_7), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_8), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_9), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_10), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_11), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_12), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_13), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_14), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_15), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_16), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_17), .driver_data = NTRIG_DUPLICATE_USAGES }, { HID_USB_DEVICE(USB_VENDOR_ID_NTRIG, USB_DEVICE_ID_NTRIG_TOUCH_SCREEN_18), .driver_data = NTRIG_DUPLICATE_USAGES }, { } }; MODULE_DEVICE_TABLE(hid, ntrig_devices); static const struct hid_usage_id ntrig_grabbed_usages[] = { { HID_ANY_ID, HID_ANY_ID, HID_ANY_ID }, { HID_ANY_ID - 1, HID_ANY_ID - 1, HID_ANY_ID - 1 } }; static struct hid_driver ntrig_driver = { .name = "ntrig", .id_table = ntrig_devices, .probe = ntrig_probe, .remove = ntrig_remove, .input_mapping = ntrig_input_mapping, .input_mapped = ntrig_input_mapped, .input_configured = ntrig_input_configured, .usage_table = ntrig_grabbed_usages, .event = ntrig_event, }; module_hid_driver(ntrig_driver); MODULE_DESCRIPTION("HID driver for N-Trig touchscreens"); MODULE_LICENSE("GPL"); |
12 1 2 2 11 11 11 8 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 | // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) B.A.T.M.A.N. contributors: * * Martin Hundebøll, Jeppe Ledet-Pedersen */ #include "network-coding.h" #include "main.h" #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/byteorder/generic.h> #include <linux/compiler.h> #include <linux/container_of.h> #include <linux/errno.h> #include <linux/etherdevice.h> #include <linux/gfp.h> #include <linux/if_ether.h> #include <linux/if_packet.h> #include <linux/init.h> #include <linux/jhash.h> #include <linux/jiffies.h> #include <linux/kref.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/net.h> #include <linux/netdevice.h> #include <linux/printk.h> #include <linux/random.h> #include <linux/rculist.h> #include <linux/rcupdate.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/spinlock.h> #include <linux/stddef.h> #include <linux/string.h> #include <linux/workqueue.h> #include <uapi/linux/batadv_packet.h> #include "hash.h" #include "log.h" #include "originator.h" #include "routing.h" #include "send.h" #include "tvlv.h" static struct lock_class_key batadv_nc_coding_hash_lock_class_key; static struct lock_class_key batadv_nc_decoding_hash_lock_class_key; static void batadv_nc_worker(struct work_struct *work); static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); /** * batadv_nc_init() - one-time initialization for network coding * * Return: 0 on success or negative error number in case of failure */ int __init batadv_nc_init(void) { /* Register our packet type */ return batadv_recv_handler_register(BATADV_CODED, batadv_nc_recv_coded_packet); } /** * batadv_nc_start_timer() - initialise the nc periodic worker * @bat_priv: the bat priv with all the soft interface information */ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) { queue_delayed_work(batadv_event_workqueue, &bat_priv->nc.work, msecs_to_jiffies(10)); } /** * batadv_nc_tvlv_container_update() - update the network coding tvlv container * after network coding setting change * @bat_priv: the bat priv with all the soft interface information */ static void batadv_nc_tvlv_container_update(struct batadv_priv *bat_priv) { char nc_mode; nc_mode = atomic_read(&bat_priv->network_coding); switch (nc_mode) { case 0: batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_NC, 1); break; case 1: batadv_tvlv_container_register(bat_priv, BATADV_TVLV_NC, 1, NULL, 0); break; } } /** * batadv_nc_status_update() - update the network coding tvlv container after * network coding setting change * @net_dev: the soft interface net device */ void batadv_nc_status_update(struct net_device *net_dev) { struct batadv_priv *bat_priv = netdev_priv(net_dev); batadv_nc_tvlv_container_update(bat_priv); } /** * batadv_nc_tvlv_ogm_handler_v1() - process incoming nc tvlv container * @bat_priv: the bat priv with all the soft interface information * @orig: the orig_node of the ogm * @flags: flags indicating the tvlv state (see batadv_tvlv_handler_flags) * @tvlv_value: tvlv buffer containing the gateway data * @tvlv_value_len: tvlv buffer length */ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, struct batadv_orig_node *orig, u8 flags, void *tvlv_value, u16 tvlv_value_len) { if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) clear_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities); else set_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities); } /** * batadv_nc_mesh_init() - initialise coding hash table and start housekeeping * @bat_priv: the bat priv with all the soft interface information * * Return: 0 on success or negative error number in case of failure */ int batadv_nc_mesh_init(struct batadv_priv *bat_priv) { bat_priv->nc.timestamp_fwd_flush = jiffies; bat_priv->nc.timestamp_sniffed_purge = jiffies; if (bat_priv->nc.coding_hash || bat_priv->nc.decoding_hash) return 0; bat_priv->nc.coding_hash = batadv_hash_new(128); if (!bat_priv->nc.coding_hash) goto err; batadv_hash_set_lock_class(bat_priv->nc.coding_hash, &batadv_nc_coding_hash_lock_class_key); bat_priv->nc.decoding_hash = batadv_hash_new(128); if (!bat_priv->nc.decoding_hash) { batadv_hash_destroy(bat_priv->nc.coding_hash); goto err; } batadv_hash_set_lock_class(bat_priv->nc.decoding_hash, &batadv_nc_decoding_hash_lock_class_key); INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); batadv_nc_start_timer(bat_priv); batadv_tvlv_handler_register(bat_priv, batadv_nc_tvlv_ogm_handler_v1, NULL, NULL, BATADV_TVLV_NC, 1, BATADV_TVLV_HANDLER_OGM_CIFNOTFND); batadv_nc_tvlv_container_update(bat_priv); return 0; err: return -ENOMEM; } /** * batadv_nc_init_bat_priv() - initialise the nc specific bat_priv variables * @bat_priv: the bat priv with all the soft interface information */ void batadv_nc_init_bat_priv(struct batadv_priv *bat_priv) { atomic_set(&bat_priv->network_coding, 0); bat_priv->nc.min_tq = 200; bat_priv->nc.max_fwd_delay = 10; bat_priv->nc.max_buffer_time = 200; } /** * batadv_nc_init_orig() - initialise the nc fields of an orig_node * @orig_node: the orig_node which is going to be initialised */ void batadv_nc_init_orig(struct batadv_orig_node *orig_node) { INIT_LIST_HEAD(&orig_node->in_coding_list); INIT_LIST_HEAD(&orig_node->out_coding_list); spin_lock_init(&orig_node->in_coding_list_lock); spin_lock_init(&orig_node->out_coding_list_lock); } /** * batadv_nc_node_release() - release nc_node from lists and queue for free * after rcu grace period * @ref: kref pointer of the nc_node */ static void batadv_nc_node_release(struct kref *ref) { struct batadv_nc_node *nc_node; nc_node = container_of(ref, struct batadv_nc_node, refcount); batadv_orig_node_put(nc_node->orig_node); kfree_rcu(nc_node, rcu); } /** * batadv_nc_node_put() - decrement the nc_node refcounter and possibly * release it * @nc_node: nc_node to be free'd */ static void batadv_nc_node_put(struct batadv_nc_node *nc_node) { if (!nc_node) return; kref_put(&nc_node->refcount, batadv_nc_node_release); } /** * batadv_nc_path_release() - release nc_path from lists and queue for free * after rcu grace period * @ref: kref pointer of the nc_path */ static void batadv_nc_path_release(struct kref *ref) { struct batadv_nc_path *nc_path; nc_path = container_of(ref, struct batadv_nc_path, refcount); kfree_rcu(nc_path, rcu); } /** * batadv_nc_path_put() - decrement the nc_path refcounter and possibly * release it * @nc_path: nc_path to be free'd */ static void batadv_nc_path_put(struct batadv_nc_path *nc_path) { if (!nc_path) return; kref_put(&nc_path->refcount, batadv_nc_path_release); } /** * batadv_nc_packet_free() - frees nc packet * @nc_packet: the nc packet to free * @dropped: whether the packet is freed because is dropped */ static void batadv_nc_packet_free(struct batadv_nc_packet *nc_packet, bool dropped) { if (dropped) kfree_skb(nc_packet->skb); else consume_skb(nc_packet->skb); batadv_nc_path_put(nc_packet->nc_path); kfree(nc_packet); } /** * batadv_nc_to_purge_nc_node() - checks whether an nc node has to be purged * @bat_priv: the bat priv with all the soft interface information * @nc_node: the nc node to check * * Return: true if the entry has to be purged now, false otherwise */ static bool batadv_nc_to_purge_nc_node(struct batadv_priv *bat_priv, struct batadv_nc_node *nc_node) { if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) return true; return batadv_has_timed_out(nc_node->last_seen, BATADV_NC_NODE_TIMEOUT); } /** * batadv_nc_to_purge_nc_path_coding() - checks whether an nc path has timed out * @bat_priv: the bat priv with all the soft interface information * @nc_path: the nc path to check * * Return: true if the entry has to be purged now, false otherwise */ static bool batadv_nc_to_purge_nc_path_coding(struct batadv_priv *bat_priv, struct batadv_nc_path *nc_path) { if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) return true; /* purge the path when no packets has been added for 10 times the * max_fwd_delay time */ return batadv_has_timed_out(nc_path->last_valid, bat_priv->nc.max_fwd_delay * 10); } /** * batadv_nc_to_purge_nc_path_decoding() - checks whether an nc path has timed * out * @bat_priv: the bat priv with all the soft interface information * @nc_path: the nc path to check * * Return: true if the entry has to be purged now, false otherwise */ static bool batadv_nc_to_purge_nc_path_decoding(struct batadv_priv *bat_priv, struct batadv_nc_path *nc_path) { if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE) return true; /* purge the path when no packets has been added for 10 times the * max_buffer time */ return batadv_has_timed_out(nc_path->last_valid, bat_priv->nc.max_buffer_time * 10); } /** * batadv_nc_purge_orig_nc_nodes() - go through list of nc nodes and purge stale * entries * @bat_priv: the bat priv with all the soft interface information * @list: list of nc nodes * @lock: nc node list lock * @to_purge: function in charge to decide whether an entry has to be purged or * not. This function takes the nc node as argument and has to return * a boolean value: true if the entry has to be deleted, false * otherwise */ static void batadv_nc_purge_orig_nc_nodes(struct batadv_priv *bat_priv, struct list_head *list, spinlock_t *lock, bool (*to_purge)(struct batadv_priv *, struct batadv_nc_node *)) { struct batadv_nc_node *nc_node, *nc_node_tmp; /* For each nc_node in list */ spin_lock_bh(lock); list_for_each_entry_safe(nc_node, nc_node_tmp, list, list) { /* if an helper function has been passed as parameter, * ask it if the entry has to be purged or not */ if (to_purge && !to_purge(bat_priv, nc_node)) continue; batadv_dbg(BATADV_DBG_NC, bat_priv, "Removing nc_node %pM -> %pM\n", nc_node->addr, nc_node->orig_node->orig); list_del_rcu(&nc_node->list); batadv_nc_node_put(nc_node); } spin_unlock_bh(lock); } /** * batadv_nc_purge_orig() - purges all nc node data attached of the given * originator * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig_node with the nc node entries to be purged * @to_purge: function in charge to decide whether an entry has to be purged or * not. This function takes the nc node as argument and has to return * a boolean value: true is the entry has to be deleted, false * otherwise */ void batadv_nc_purge_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, bool (*to_purge)(struct batadv_priv *, struct batadv_nc_node *)) { /* Check ingoing nc_node's of this orig_node */ batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->in_coding_list, &orig_node->in_coding_list_lock, to_purge); /* Check outgoing nc_node's of this orig_node */ batadv_nc_purge_orig_nc_nodes(bat_priv, &orig_node->out_coding_list, &orig_node->out_coding_list_lock, to_purge); } /** * batadv_nc_purge_orig_hash() - traverse entire originator hash to check if * they have timed out nc nodes * @bat_priv: the bat priv with all the soft interface information */ static void batadv_nc_purge_orig_hash(struct batadv_priv *bat_priv) { struct batadv_hashtable *hash = bat_priv->orig_hash; struct hlist_head *head; struct batadv_orig_node *orig_node; u32 i; if (!hash) return; /* For each orig_node */ for (i = 0; i < hash->size; i++) { head = &hash->table[i]; rcu_read_lock(); hlist_for_each_entry_rcu(orig_node, head, hash_entry) batadv_nc_purge_orig(bat_priv, orig_node, batadv_nc_to_purge_nc_node); rcu_read_unlock(); } } /** * batadv_nc_purge_paths() - traverse all nc paths part of the hash and remove * unused ones * @bat_priv: the bat priv with all the soft interface information * @hash: hash table containing the nc paths to check * @to_purge: function in charge to decide whether an entry has to be purged or * not. This function takes the nc node as argument and has to return * a boolean value: true is the entry has to be deleted, false * otherwise */ static void batadv_nc_purge_paths(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, bool (*to_purge)(struct batadv_priv *, struct batadv_nc_path *)) { struct hlist_head *head; struct hlist_node *node_tmp; struct batadv_nc_path *nc_path; spinlock_t *lock; /* Protects lists in hash */ u32 i; for (i = 0; i < hash->size; i++) { head = &hash->table[i]; lock = &hash->list_locks[i]; /* For each nc_path in this bin */ spin_lock_bh(lock); hlist_for_each_entry_safe(nc_path, node_tmp, head, hash_entry) { /* if an helper function has been passed as parameter, * ask it if the entry has to be purged or not */ if (to_purge && !to_purge(bat_priv, nc_path)) continue; /* purging an non-empty nc_path should never happen, but * is observed under high CPU load. Delay the purging * until next iteration to allow the packet_list to be * emptied first. */ if (!unlikely(list_empty(&nc_path->packet_list))) { net_ratelimited_function(printk, KERN_WARNING "Skipping free of non-empty nc_path (%pM -> %pM)!\n", nc_path->prev_hop, nc_path->next_hop); continue; } /* nc_path is unused, so remove it */ batadv_dbg(BATADV_DBG_NC, bat_priv, "Remove nc_path %pM -> %pM\n", nc_path->prev_hop, nc_path->next_hop); hlist_del_rcu(&nc_path->hash_entry); batadv_nc_path_put(nc_path); } spin_unlock_bh(lock); } } /** * batadv_nc_hash_key_gen() - computes the nc_path hash key * @key: buffer to hold the final hash key * @src: source ethernet mac address going into the hash key * @dst: destination ethernet mac address going into the hash key */ static void batadv_nc_hash_key_gen(struct batadv_nc_path *key, const char *src, const char *dst) { memcpy(key->prev_hop, src, sizeof(key->prev_hop)); memcpy(key->next_hop, dst, sizeof(key->next_hop)); } /** * batadv_nc_hash_choose() - compute the hash value for an nc path * @data: data to hash * @size: size of the hash table * * Return: the selected index in the hash table for the given data. */ static u32 batadv_nc_hash_choose(const void *data, u32 size) { const struct batadv_nc_path *nc_path = data; u32 hash = 0; hash = jhash(&nc_path->prev_hop, sizeof(nc_path->prev_hop), hash); hash = jhash(&nc_path->next_hop, sizeof(nc_path->next_hop), hash); return hash % size; } /** * batadv_nc_hash_compare() - comparing function used in the network coding hash * tables * @node: node in the local table * @data2: second object to compare the node to * * Return: true if the two entry are the same, false otherwise */ static bool batadv_nc_hash_compare(const struct hlist_node *node, const void *data2) { const struct batadv_nc_path *nc_path1, *nc_path2; nc_path1 = container_of(node, struct batadv_nc_path, hash_entry); nc_path2 = data2; /* Return 1 if the two keys are identical */ if (!batadv_compare_eth(nc_path1->prev_hop, nc_path2->prev_hop)) return false; if (!batadv_compare_eth(nc_path1->next_hop, nc_path2->next_hop)) return false; return true; } /** * batadv_nc_hash_find() - search for an existing nc path and return it * @hash: hash table containing the nc path * @data: search key * * Return: the nc_path if found, NULL otherwise. */ static struct batadv_nc_path * batadv_nc_hash_find(struct batadv_hashtable *hash, void *data) { struct hlist_head *head; struct batadv_nc_path *nc_path, *nc_path_tmp = NULL; int index; if (!hash) return NULL; index = batadv_nc_hash_choose(data, hash->size); head = &hash->table[index]; rcu_read_lock(); hlist_for_each_entry_rcu(nc_path, head, hash_entry) { if (!batadv_nc_hash_compare(&nc_path->hash_entry, data)) continue; if (!kref_get_unless_zero(&nc_path->refcount)) continue; nc_path_tmp = nc_path; break; } rcu_read_unlock(); return nc_path_tmp; } /** * batadv_nc_send_packet() - send non-coded packet and free nc_packet struct * @nc_packet: the nc packet to send */ static void batadv_nc_send_packet(struct batadv_nc_packet *nc_packet) { batadv_send_unicast_skb(nc_packet->skb, nc_packet->neigh_node); nc_packet->skb = NULL; batadv_nc_packet_free(nc_packet, false); } /** * batadv_nc_sniffed_purge() - Checks timestamp of given sniffed nc_packet. * @bat_priv: the bat priv with all the soft interface information * @nc_path: the nc path the packet belongs to * @nc_packet: the nc packet to be checked * * Checks whether the given sniffed (overheard) nc_packet has hit its buffering * timeout. If so, the packet is no longer kept and the entry deleted from the * queue. Has to be called with the appropriate locks. * * Return: false as soon as the entry in the fifo queue has not been timed out * yet and true otherwise. */ static bool batadv_nc_sniffed_purge(struct batadv_priv *bat_priv, struct batadv_nc_path *nc_path, struct batadv_nc_packet *nc_packet) { unsigned long timeout = bat_priv->nc.max_buffer_time; bool res = false; lockdep_assert_held(&nc_path->packet_list_lock); /* Packets are added to tail, so the remaining packets did not time * out and we can stop processing the current queue */ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE && !batadv_has_timed_out(nc_packet->timestamp, timeout)) goto out; /* purge nc packet */ list_del(&nc_packet->list); batadv_nc_packet_free(nc_packet, true); res = true; out: return res; } /** * batadv_nc_fwd_flush() - Checks the timestamp of the given nc packet. * @bat_priv: the bat priv with all the soft interface information * @nc_path: the nc path the packet belongs to * @nc_packet: the nc packet to be checked * * Checks whether the given nc packet has hit its forward timeout. If so, the * packet is no longer delayed, immediately sent and the entry deleted from the * queue. Has to be called with the appropriate locks. * * Return: false as soon as the entry in the fifo queue has not been timed out * yet and true otherwise. */ static bool batadv_nc_fwd_flush(struct batadv_priv *bat_priv, struct batadv_nc_path *nc_path, struct batadv_nc_packet *nc_packet) { unsigned long timeout = bat_priv->nc.max_fwd_delay; lockdep_assert_held(&nc_path->packet_list_lock); /* Packets are added to tail, so the remaining packets did not time * out and we can stop processing the current queue */ if (atomic_read(&bat_priv->mesh_state) == BATADV_MESH_ACTIVE && !batadv_has_timed_out(nc_packet->timestamp, timeout)) return false; /* Send packet */ batadv_inc_counter(bat_priv, BATADV_CNT_FORWARD); batadv_add_counter(bat_priv, BATADV_CNT_FORWARD_BYTES, nc_packet->skb->len + ETH_HLEN); list_del(&nc_packet->list); batadv_nc_send_packet(nc_packet); return true; } /** * batadv_nc_process_nc_paths() - traverse given nc packet pool and free timed * out nc packets * @bat_priv: the bat priv with all the soft interface information * @hash: to be processed hash table * @process_fn: Function called to process given nc packet. Should return true * to encourage this function to proceed with the next packet. * Otherwise the rest of the current queue is skipped. */ static void batadv_nc_process_nc_paths(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, bool (*process_fn)(struct batadv_priv *, struct batadv_nc_path *, struct batadv_nc_packet *)) { struct hlist_head *head; struct batadv_nc_packet *nc_packet, *nc_packet_tmp; struct batadv_nc_path *nc_path; bool ret; int i; if (!hash) return; /* Loop hash table bins */ for (i = 0; i < hash->size; i++) { head = &hash->table[i]; /* Loop coding paths */ rcu_read_lock(); hlist_for_each_entry_rcu(nc_path, head, hash_entry) { /* Loop packets */ spin_lock_bh(&nc_path->packet_list_lock); list_for_each_entry_safe(nc_packet, nc_packet_tmp, &nc_path->packet_list, list) { ret = process_fn(bat_priv, nc_path, nc_packet); if (!ret) break; } spin_unlock_bh(&nc_path->packet_list_lock); } rcu_read_unlock(); } } /** * batadv_nc_worker() - periodic task for housekeeping related to network * coding * @work: kernel work struct */ static void batadv_nc_worker(struct work_struct *work) { struct delayed_work *delayed_work; struct batadv_priv_nc *priv_nc; struct batadv_priv *bat_priv; unsigned long timeout; delayed_work = to_delayed_work(work); priv_nc = container_of(delayed_work, struct batadv_priv_nc, work); bat_priv = container_of(priv_nc, struct batadv_priv, nc); batadv_nc_purge_orig_hash(bat_priv); batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, batadv_nc_to_purge_nc_path_coding); batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, batadv_nc_to_purge_nc_path_decoding); timeout = bat_priv->nc.max_fwd_delay; if (batadv_has_timed_out(bat_priv->nc.timestamp_fwd_flush, timeout)) { batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.coding_hash, batadv_nc_fwd_flush); bat_priv->nc.timestamp_fwd_flush = jiffies; } if (batadv_has_timed_out(bat_priv->nc.timestamp_sniffed_purge, bat_priv->nc.max_buffer_time)) { batadv_nc_process_nc_paths(bat_priv, bat_priv->nc.decoding_hash, batadv_nc_sniffed_purge); bat_priv->nc.timestamp_sniffed_purge = jiffies; } /* Schedule a new check */ batadv_nc_start_timer(bat_priv); } /** * batadv_can_nc_with_orig() - checks whether the given orig node is suitable * for coding or not * @bat_priv: the bat priv with all the soft interface information * @orig_node: neighboring orig node which may be used as nc candidate * @ogm_packet: incoming ogm packet also used for the checks * * Return: true if: * 1) The OGM must have the most recent sequence number. * 2) The TTL must be decremented by one and only one. * 3) The OGM must be received from the first hop from orig_node. * 4) The TQ value of the OGM must be above bat_priv->nc.min_tq. */ static bool batadv_can_nc_with_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_ogm_packet *ogm_packet) { struct batadv_orig_ifinfo *orig_ifinfo; u32 last_real_seqno; u8 last_ttl; orig_ifinfo = batadv_orig_ifinfo_get(orig_node, BATADV_IF_DEFAULT); if (!orig_ifinfo) return false; last_ttl = orig_ifinfo->last_ttl; last_real_seqno = orig_ifinfo->last_real_seqno; batadv_orig_ifinfo_put(orig_ifinfo); if (last_real_seqno != ntohl(ogm_packet->seqno)) return false; if (last_ttl != ogm_packet->ttl + 1) return false; if (!batadv_compare_eth(ogm_packet->orig, ogm_packet->prev_sender)) return false; if (ogm_packet->tq < bat_priv->nc.min_tq) return false; return true; } /** * batadv_nc_find_nc_node() - search for an existing nc node and return it * @orig_node: orig node originating the ogm packet * @orig_neigh_node: neighboring orig node from which we received the ogm packet * (can be equal to orig_node) * @in_coding: traverse incoming or outgoing network coding list * * Return: the nc_node if found, NULL otherwise. */ static struct batadv_nc_node * batadv_nc_find_nc_node(struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, bool in_coding) { struct batadv_nc_node *nc_node, *nc_node_out = NULL; struct list_head *list; if (in_coding) list = &orig_neigh_node->in_coding_list; else list = &orig_neigh_node->out_coding_list; /* Traverse list of nc_nodes to orig_node */ rcu_read_lock(); list_for_each_entry_rcu(nc_node, list, list) { if (!batadv_compare_eth(nc_node->addr, orig_node->orig)) continue; if (!kref_get_unless_zero(&nc_node->refcount)) continue; /* Found a match */ nc_node_out = nc_node; break; } rcu_read_unlock(); return nc_node_out; } /** * batadv_nc_get_nc_node() - retrieves an nc node or creates the entry if it was * not found * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node originating the ogm packet * @orig_neigh_node: neighboring orig node from which we received the ogm packet * (can be equal to orig_node) * @in_coding: traverse incoming or outgoing network coding list * * Return: the nc_node if found or created, NULL in case of an error. */ static struct batadv_nc_node * batadv_nc_get_nc_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, bool in_coding) { struct batadv_nc_node *nc_node; spinlock_t *lock; /* Used to lock list selected by "int in_coding" */ struct list_head *list; /* Select ingoing or outgoing coding node */ if (in_coding) { lock = &orig_neigh_node->in_coding_list_lock; list = &orig_neigh_node->in_coding_list; } else { lock = &orig_neigh_node->out_coding_list_lock; list = &orig_neigh_node->out_coding_list; } spin_lock_bh(lock); /* Check if nc_node is already added */ nc_node = batadv_nc_find_nc_node(orig_node, orig_neigh_node, in_coding); /* Node found */ if (nc_node) goto unlock; nc_node = kzalloc(sizeof(*nc_node), GFP_ATOMIC); if (!nc_node) goto unlock; /* Initialize nc_node */ INIT_LIST_HEAD(&nc_node->list); kref_init(&nc_node->refcount); ether_addr_copy(nc_node->addr, orig_node->orig); kref_get(&orig_neigh_node->refcount); nc_node->orig_node = orig_neigh_node; batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_node %pM -> %pM\n", nc_node->addr, nc_node->orig_node->orig); /* Add nc_node to orig_node */ kref_get(&nc_node->refcount); list_add_tail_rcu(&nc_node->list, list); unlock: spin_unlock_bh(lock); return nc_node; } /** * batadv_nc_update_nc_node() - updates stored incoming and outgoing nc node * structs (best called on incoming OGMs) * @bat_priv: the bat priv with all the soft interface information * @orig_node: orig node originating the ogm packet * @orig_neigh_node: neighboring orig node from which we received the ogm packet * (can be equal to orig_node) * @ogm_packet: incoming ogm packet * @is_single_hop_neigh: orig_node is a single hop neighbor */ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, struct batadv_ogm_packet *ogm_packet, int is_single_hop_neigh) { struct batadv_nc_node *in_nc_node = NULL; struct batadv_nc_node *out_nc_node = NULL; /* Check if network coding is enabled */ if (!atomic_read(&bat_priv->network_coding)) goto out; /* check if orig node is network coding enabled */ if (!test_bit(BATADV_ORIG_CAPA_HAS_NC, &orig_node->capabilities)) goto out; /* accept ogms from 'good' neighbors and single hop neighbors */ if (!batadv_can_nc_with_orig(bat_priv, orig_node, ogm_packet) && !is_single_hop_neigh) goto out; /* Add orig_node as in_nc_node on hop */ in_nc_node = batadv_nc_get_nc_node(bat_priv, orig_node, orig_neigh_node, true); if (!in_nc_node) goto out; in_nc_node->last_seen = jiffies; /* Add hop as out_nc_node on orig_node */ out_nc_node = batadv_nc_get_nc_node(bat_priv, orig_neigh_node, orig_node, false); if (!out_nc_node) goto out; out_nc_node->last_seen = jiffies; out: batadv_nc_node_put(in_nc_node); batadv_nc_node_put(out_nc_node); } /** * batadv_nc_get_path() - get existing nc_path or allocate a new one * @bat_priv: the bat priv with all the soft interface information * @hash: hash table containing the nc path * @src: ethernet source address - first half of the nc path search key * @dst: ethernet destination address - second half of the nc path search key * * Return: pointer to nc_path if the path was found or created, returns NULL * on error. */ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, struct batadv_hashtable *hash, u8 *src, u8 *dst) { int hash_added; struct batadv_nc_path *nc_path, nc_path_key; batadv_nc_hash_key_gen(&nc_path_key, src, dst); /* Search for existing nc_path */ nc_path = batadv_nc_hash_find(hash, (void *)&nc_path_key); if (nc_path) { /* Set timestamp to delay removal of nc_path */ nc_path->last_valid = jiffies; return nc_path; } /* No existing nc_path was found; create a new */ nc_path = kzalloc(sizeof(*nc_path), GFP_ATOMIC); if (!nc_path) return NULL; /* Initialize nc_path */ INIT_LIST_HEAD(&nc_path->packet_list); spin_lock_init(&nc_path->packet_list_lock); kref_init(&nc_path->refcount); nc_path->last_valid = jiffies; ether_addr_copy(nc_path->next_hop, dst); ether_addr_copy(nc_path->prev_hop, src); batadv_dbg(BATADV_DBG_NC, bat_priv, "Adding nc_path %pM -> %pM\n", nc_path->prev_hop, nc_path->next_hop); /* Add nc_path to hash table */ kref_get(&nc_path->refcount); hash_added = batadv_hash_add(hash, batadv_nc_hash_compare, batadv_nc_hash_choose, &nc_path_key, &nc_path->hash_entry); if (hash_added < 0) { kfree(nc_path); return NULL; } return nc_path; } /** * batadv_nc_random_weight_tq() - scale the receivers TQ-value to avoid unfair * selection of a receiver with slightly lower TQ than the other * @tq: to be weighted tq value * * Return: scaled tq value */ static u8 batadv_nc_random_weight_tq(u8 tq) { /* randomize the estimated packet loss (max TQ - estimated TQ) */ u8 rand_tq = get_random_u32_below(BATADV_TQ_MAX_VALUE + 1 - tq); /* convert to (randomized) estimated tq again */ return BATADV_TQ_MAX_VALUE - rand_tq; } /** * batadv_nc_memxor() - XOR destination with source * @dst: byte array to XOR into * @src: byte array to XOR from * @len: length of destination array */ static void batadv_nc_memxor(char *dst, const char *src, unsigned int len) { unsigned int i; for (i = 0; i < len; ++i) dst[i] ^= src[i]; } /** * batadv_nc_code_packets() - code a received unicast_packet with an nc packet * into a coded_packet and send it * @bat_priv: the bat priv with all the soft interface information * @skb: data skb to forward * @ethhdr: pointer to the ethernet header inside the skb * @nc_packet: structure containing the packet to the skb can be coded with * @neigh_node: next hop to forward packet to * * Return: true if both packets are consumed, false otherwise. */ static bool batadv_nc_code_packets(struct batadv_priv *bat_priv, struct sk_buff *skb, struct ethhdr *ethhdr, struct batadv_nc_packet *nc_packet, struct batadv_neigh_node *neigh_node) { u8 tq_weighted_neigh, tq_weighted_coding, tq_tmp; struct sk_buff *skb_dest, *skb_src; struct batadv_unicast_packet *packet1; struct batadv_unicast_packet *packet2; struct batadv_coded_packet *coded_packet; struct batadv_neigh_node *neigh_tmp, *router_neigh, *first_dest; struct batadv_neigh_node *router_coding = NULL, *second_dest; struct batadv_neigh_ifinfo *router_neigh_ifinfo = NULL; struct batadv_neigh_ifinfo *router_coding_ifinfo = NULL; u8 *first_source, *second_source; __be32 packet_id1, packet_id2; size_t count; bool res = false; int coding_len; int unicast_size = sizeof(*packet1); int coded_size = sizeof(*coded_packet); int header_add = coded_size - unicast_size; /* TODO: do we need to consider the outgoing interface for * coded packets? */ router_neigh = batadv_orig_router_get(neigh_node->orig_node, BATADV_IF_DEFAULT); if (!router_neigh) goto out; router_neigh_ifinfo = batadv_neigh_ifinfo_get(router_neigh, BATADV_IF_DEFAULT); if (!router_neigh_ifinfo) goto out; neigh_tmp = nc_packet->neigh_node; router_coding = batadv_orig_router_get(neigh_tmp->orig_node, BATADV_IF_DEFAULT); if (!router_coding) goto out; router_coding_ifinfo = batadv_neigh_ifinfo_get(router_coding, BATADV_IF_DEFAULT); if (!router_coding_ifinfo) goto out; tq_tmp = router_neigh_ifinfo->bat_iv.tq_avg; tq_weighted_neigh = batadv_nc_random_weight_tq(tq_tmp); tq_tmp = router_coding_ifinfo->bat_iv.tq_avg; tq_weighted_coding = batadv_nc_random_weight_tq(tq_tmp); /* Select one destination for the MAC-header dst-field based on * weighted TQ-values. */ if (tq_weighted_neigh >= tq_weighted_coding) { /* Destination from nc_packet is selected for MAC-header */ first_dest = nc_packet->neigh_node; first_source = nc_packet->nc_path->prev_hop; second_dest = neigh_node; second_source = ethhdr->h_source; packet1 = (struct batadv_unicast_packet *)nc_packet->skb->data; packet2 = (struct batadv_unicast_packet *)skb->data; packet_id1 = nc_packet->packet_id; packet_id2 = batadv_skb_crc32(skb, skb->data + sizeof(*packet2)); } else { /* Destination for skb is selected for MAC-header */ first_dest = neigh_node; first_source = ethhdr->h_source; second_dest = nc_packet->neigh_node; second_source = nc_packet->nc_path->prev_hop; packet1 = (struct batadv_unicast_packet *)skb->data; packet2 = (struct batadv_unicast_packet *)nc_packet->skb->data; packet_id1 = batadv_skb_crc32(skb, skb->data + sizeof(*packet1)); packet_id2 = nc_packet->packet_id; } /* Instead of zero padding the smallest data buffer, we * code into the largest. */ if (skb->len <= nc_packet->skb->len) { skb_dest = nc_packet->skb; skb_src = skb; } else { skb_dest = skb; skb_src = nc_packet->skb; } /* coding_len is used when decoding the packet shorter packet */ coding_len = skb_src->len - unicast_size; if (skb_linearize(skb_dest) < 0 || skb_linearize(skb_src) < 0) goto out; skb_push(skb_dest, header_add); coded_packet = (struct batadv_coded_packet *)skb_dest->data; skb_reset_mac_header(skb_dest); coded_packet->packet_type = BATADV_CODED; coded_packet->version = BATADV_COMPAT_VERSION; coded_packet->ttl = packet1->ttl; /* Info about first unicast packet */ ether_addr_copy(coded_packet->first_source, first_source); ether_addr_copy(coded_packet->first_orig_dest, packet1->dest); coded_packet->first_crc = packet_id1; coded_packet->first_ttvn = packet1->ttvn; /* Info about second unicast packet */ ether_addr_copy(coded_packet->second_dest, second_dest->addr); ether_addr_copy(coded_packet->second_source, second_source); ether_addr_copy(coded_packet->second_orig_dest, packet2->dest); coded_packet->second_crc = packet_id2; coded_packet->second_ttl = packet2->ttl; coded_packet->second_ttvn = packet2->ttvn; coded_packet->coded_len = htons(coding_len); /* This is where the magic happens: Code skb_src into skb_dest */ batadv_nc_memxor(skb_dest->data + coded_size, skb_src->data + unicast_size, coding_len); /* Update counters accordingly */ if (BATADV_SKB_CB(skb_src)->decoded && BATADV_SKB_CB(skb_dest)->decoded) { /* Both packets are recoded */ count = skb_src->len + ETH_HLEN; count += skb_dest->len + ETH_HLEN; batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE, 2); batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, count); } else if (!BATADV_SKB_CB(skb_src)->decoded && !BATADV_SKB_CB(skb_dest)->decoded) { /* Both packets are newly coded */ count = skb_src->len + ETH_HLEN; count += skb_dest->len + ETH_HLEN; batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE, 2); batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, count); } else if (BATADV_SKB_CB(skb_src)->decoded && !BATADV_SKB_CB(skb_dest)->decoded) { /* skb_src recoded and skb_dest is newly coded */ batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE); batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, skb_src->len + ETH_HLEN); batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE); batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, skb_dest->len + ETH_HLEN); } else if (!BATADV_SKB_CB(skb_src)->decoded && BATADV_SKB_CB(skb_dest)->decoded) { /* skb_src is newly coded and skb_dest is recoded */ batadv_inc_counter(bat_priv, BATADV_CNT_NC_CODE); batadv_add_counter(bat_priv, BATADV_CNT_NC_CODE_BYTES, skb_src->len + ETH_HLEN); batadv_inc_counter(bat_priv, BATADV_CNT_NC_RECODE); batadv_add_counter(bat_priv, BATADV_CNT_NC_RECODE_BYTES, skb_dest->len + ETH_HLEN); } /* skb_src is now coded into skb_dest, so free it */ consume_skb(skb_src); /* avoid duplicate free of skb from nc_packet */ nc_packet->skb = NULL; batadv_nc_packet_free(nc_packet, false); /* Send the coded packet and return true */ batadv_send_unicast_skb(skb_dest, first_dest); res = true; out: batadv_neigh_node_put(router_neigh); batadv_neigh_node_put(router_coding); batadv_neigh_ifinfo_put(router_neigh_ifinfo); batadv_neigh_ifinfo_put(router_coding_ifinfo); return res; } /** * batadv_nc_skb_coding_possible() - true if a decoded skb is available at dst. * @skb: data skb to forward * @dst: destination mac address of the other skb to code with * @src: source mac address of skb * * Whenever we network code a packet we have to check whether we received it in * a network coded form. If so, we may not be able to use it for coding because * some neighbors may also have received (overheard) the packet in the network * coded form without being able to decode it. It is hard to know which of the * neighboring nodes was able to decode the packet, therefore we can only * re-code the packet if the source of the previous encoded packet is involved. * Since the source encoded the packet we can be certain it has all necessary * decode information. * * Return: true if coding of a decoded packet is allowed. */ static bool batadv_nc_skb_coding_possible(struct sk_buff *skb, u8 *dst, u8 *src) { if (BATADV_SKB_CB(skb)->decoded && !batadv_compare_eth(dst, src)) return false; return true; } /** * batadv_nc_path_search() - Find the coding path matching in_nc_node and * out_nc_node to retrieve a buffered packet that can be used for coding. * @bat_priv: the bat priv with all the soft interface information * @in_nc_node: pointer to skb next hop's neighbor nc node * @out_nc_node: pointer to skb source's neighbor nc node * @skb: data skb to forward * @eth_dst: next hop mac address of skb * * Return: true if coding of a decoded skb is allowed. */ static struct batadv_nc_packet * batadv_nc_path_search(struct batadv_priv *bat_priv, struct batadv_nc_node *in_nc_node, struct batadv_nc_node *out_nc_node, struct sk_buff *skb, u8 *eth_dst) { struct batadv_nc_path *nc_path, nc_path_key; struct batadv_nc_packet *nc_packet_out = NULL; struct batadv_nc_packet *nc_packet, *nc_packet_tmp; struct batadv_hashtable *hash = bat_priv->nc.coding_hash; int idx; if (!hash) return NULL; /* Create almost path key */ batadv_nc_hash_key_gen(&nc_path_key, in_nc_node->addr, out_nc_node->addr); idx = batadv_nc_hash_choose(&nc_path_key, hash->size); /* Check for coding opportunities in this nc_path */ rcu_read_lock(); hlist_for_each_entry_rcu(nc_path, &hash->table[idx], hash_entry) { if (!batadv_compare_eth(nc_path->prev_hop, in_nc_node->addr)) continue; if (!batadv_compare_eth(nc_path->next_hop, out_nc_node->addr)) continue; spin_lock_bh(&nc_path->packet_list_lock); if (list_empty(&nc_path->packet_list)) { spin_unlock_bh(&nc_path->packet_list_lock); continue; } list_for_each_entry_safe(nc_packet, nc_packet_tmp, &nc_path->packet_list, list) { if (!batadv_nc_skb_coding_possible(nc_packet->skb, eth_dst, in_nc_node->addr)) continue; /* Coding opportunity is found! */ list_del(&nc_packet->list); nc_packet_out = nc_packet; break; } spin_unlock_bh(&nc_path->packet_list_lock); break; } rcu_read_unlock(); return nc_packet_out; } /** * batadv_nc_skb_src_search() - Loops through the list of neighboring nodes of * the skb's sender (may be equal to the originator). * @bat_priv: the bat priv with all the soft interface information * @skb: data skb to forward * @eth_dst: next hop mac address of skb * @eth_src: source mac address of skb * @in_nc_node: pointer to skb next hop's neighbor nc node * * Return: an nc packet if a suitable coding packet was found, NULL otherwise. */ static struct batadv_nc_packet * batadv_nc_skb_src_search(struct batadv_priv *bat_priv, struct sk_buff *skb, u8 *eth_dst, u8 *eth_src, struct batadv_nc_node *in_nc_node) { struct batadv_orig_node *orig_node; struct batadv_nc_node *out_nc_node; struct batadv_nc_packet *nc_packet = NULL; orig_node = batadv_orig_hash_find(bat_priv, eth_src); if (!orig_node) return NULL; rcu_read_lock(); list_for_each_entry_rcu(out_nc_node, &orig_node->out_coding_list, list) { /* Check if the skb is decoded and if recoding is possible */ if (!batadv_nc_skb_coding_possible(skb, out_nc_node->addr, eth_src)) continue; /* Search for an opportunity in this nc_path */ nc_packet = batadv_nc_path_search(bat_priv, in_nc_node, out_nc_node, skb, eth_dst); if (nc_packet) break; } rcu_read_unlock(); batadv_orig_node_put(orig_node); return nc_packet; } /** * batadv_nc_skb_store_before_coding() - set the ethernet src and dst of the * unicast skb before it is stored for use in later decoding * @bat_priv: the bat priv with all the soft interface information * @skb: data skb to store * @eth_dst_new: new destination mac address of skb */ static void batadv_nc_skb_store_before_coding(struct batadv_priv *bat_priv, struct sk_buff *skb, u8 *eth_dst_new) { struct ethhdr *ethhdr; /* Copy skb header to change the mac header */ skb = pskb_copy_for_clone(skb, GFP_ATOMIC); if (!skb) return; /* Set the mac header as if we actually sent the packet uncoded */ ethhdr = eth_hdr(skb); ether_addr_copy(ethhdr->h_source, ethhdr->h_dest); ether_addr_copy(ethhdr->h_dest, eth_dst_new); /* Set data pointer to MAC header to mimic packets from our tx path */ skb_push(skb, ETH_HLEN); /* Add the packet to the decoding packet pool */ batadv_nc_skb_store_for_decoding(bat_priv, skb); /* batadv_nc_skb_store_for_decoding() clones the skb, so we must free * our ref */ consume_skb(skb); } /** * batadv_nc_skb_dst_search() - Loops through list of neighboring nodes to dst. * @skb: data skb to forward * @neigh_node: next hop to forward packet to * @ethhdr: pointer to the ethernet header inside the skb * * Loops through the list of neighboring nodes the next hop has a good * connection to (receives OGMs with a sufficient quality). We need to find a * neighbor of our next hop that potentially sent a packet which our next hop * also received (overheard) and has stored for later decoding. * * Return: true if the skb was consumed (encoded packet sent) or false otherwise */ static bool batadv_nc_skb_dst_search(struct sk_buff *skb, struct batadv_neigh_node *neigh_node, struct ethhdr *ethhdr) { struct net_device *netdev = neigh_node->if_incoming->soft_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_orig_node *orig_node = neigh_node->orig_node; struct batadv_nc_node *nc_node; struct batadv_nc_packet *nc_packet = NULL; rcu_read_lock(); list_for_each_entry_rcu(nc_node, &orig_node->in_coding_list, list) { /* Search for coding opportunity with this in_nc_node */ nc_packet = batadv_nc_skb_src_search(bat_priv, skb, neigh_node->addr, ethhdr->h_source, nc_node); /* Opportunity was found, so stop searching */ if (nc_packet) break; } rcu_read_unlock(); if (!nc_packet) return false; /* Save packets for later decoding */ batadv_nc_skb_store_before_coding(bat_priv, skb, neigh_node->addr); batadv_nc_skb_store_before_coding(bat_priv, nc_packet->skb, nc_packet->neigh_node->addr); /* Code and send packets */ if (batadv_nc_code_packets(bat_priv, skb, ethhdr, nc_packet, neigh_node)) return true; /* out of mem ? Coding failed - we have to free the buffered packet * to avoid memleaks. The skb passed as argument will be dealt with * by the calling function. */ batadv_nc_send_packet(nc_packet); return false; } /** * batadv_nc_skb_add_to_path() - buffer skb for later encoding / decoding * @skb: skb to add to path * @nc_path: path to add skb to * @neigh_node: next hop to forward packet to * @packet_id: checksum to identify packet * * Return: true if the packet was buffered or false in case of an error. */ static bool batadv_nc_skb_add_to_path(struct sk_buff *skb, struct batadv_nc_path *nc_path, struct batadv_neigh_node *neigh_node, __be32 packet_id) { struct batadv_nc_packet *nc_packet; nc_packet = kzalloc(sizeof(*nc_packet), GFP_ATOMIC); if (!nc_packet) return false; /* Initialize nc_packet */ nc_packet->timestamp = jiffies; nc_packet->packet_id = packet_id; nc_packet->skb = skb; nc_packet->neigh_node = neigh_node; nc_packet->nc_path = nc_path; /* Add coding packet to list */ spin_lock_bh(&nc_path->packet_list_lock); list_add_tail(&nc_packet->list, &nc_path->packet_list); spin_unlock_bh(&nc_path->packet_list_lock); return true; } /** * batadv_nc_skb_forward() - try to code a packet or add it to the coding packet * buffer * @skb: data skb to forward * @neigh_node: next hop to forward packet to * * Return: true if the skb was consumed (encoded packet sent) or false otherwise */ bool batadv_nc_skb_forward(struct sk_buff *skb, struct batadv_neigh_node *neigh_node) { const struct net_device *netdev = neigh_node->if_incoming->soft_iface; struct batadv_priv *bat_priv = netdev_priv(netdev); struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; /* Check if network coding is enabled */ if (!atomic_read(&bat_priv->network_coding)) goto out; /* We only handle unicast packets */ payload = skb_network_header(skb); packet = (struct batadv_unicast_packet *)payload; if (packet->packet_type != BATADV_UNICAST) goto out; /* Try to find a coding opportunity and send the skb if one is found */ if (batadv_nc_skb_dst_search(skb, neigh_node, ethhdr)) return true; /* Find or create a nc_path for this src-dst pair */ nc_path = batadv_nc_get_path(bat_priv, bat_priv->nc.coding_hash, ethhdr->h_source, neigh_node->addr); if (!nc_path) goto out; /* Add skb to nc_path */ packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet)); if (!batadv_nc_skb_add_to_path(skb, nc_path, neigh_node, packet_id)) goto free_nc_path; /* Packet is consumed */ return true; free_nc_path: batadv_nc_path_put(nc_path); out: /* Packet is not consumed */ return false; } /** * batadv_nc_skb_store_for_decoding() - save a clone of the skb which can be * used when decoding coded packets * @bat_priv: the bat priv with all the soft interface information * @skb: data skb to store */ void batadv_nc_skb_store_for_decoding(struct batadv_priv *bat_priv, struct sk_buff *skb) { struct batadv_unicast_packet *packet; struct batadv_nc_path *nc_path; struct ethhdr *ethhdr = eth_hdr(skb); __be32 packet_id; u8 *payload; /* Check if network coding is enabled */ if (!atomic_read(&bat_priv->network_coding)) goto out; /* Check for supported packet type */ payload = skb_network_header(skb); packet = (struct batadv_unicast_packet *)payload; if (packet->packet_type != BATADV_UNICAST) goto out; /* Find existing nc_path or create a new */ nc_path = batadv_nc_get_path(bat_priv, bat_priv->nc.decoding_hash, ethhdr->h_source, ethhdr->h_dest); if (!nc_path) goto out; /* Clone skb and adjust skb->data to point at batman header */ skb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!skb)) goto free_nc_path; if (unlikely(!pskb_may_pull(skb, ETH_HLEN))) goto free_skb; if (unlikely(!skb_pull_rcsum(skb, ETH_HLEN))) goto free_skb; /* Add skb to nc_path */ packet_id = batadv_skb_crc32(skb, payload + sizeof(*packet)); if (!batadv_nc_skb_add_to_path(skb, nc_path, NULL, packet_id)) goto free_skb; batadv_inc_counter(bat_priv, BATADV_CNT_NC_BUFFER); return; free_skb: kfree_skb(skb); free_nc_path: batadv_nc_path_put(nc_path); out: return; } /** * batadv_nc_skb_store_sniffed_unicast() - check if a received unicast packet * should be saved in the decoding buffer and, if so, store it there * @bat_priv: the bat priv with all the soft interface information * @skb: unicast skb to store */ void batadv_nc_skb_store_sniffed_unicast(struct batadv_priv *bat_priv, struct sk_buff *skb) { struct ethhdr *ethhdr = eth_hdr(skb); if (batadv_is_my_mac(bat_priv, ethhdr->h_dest)) return; /* Set data pointer to MAC header to mimic packets from our tx path */ skb_push(skb, ETH_HLEN); batadv_nc_skb_store_for_decoding(bat_priv, skb); } /** * batadv_nc_skb_decode_packet() - decode given skb using the decode data stored * in nc_packet * @bat_priv: the bat priv with all the soft interface information * @skb: unicast skb to decode * @nc_packet: decode data needed to decode the skb * * Return: pointer to decoded unicast packet if the packet was decoded or NULL * in case of an error. */ static struct batadv_unicast_packet * batadv_nc_skb_decode_packet(struct batadv_priv *bat_priv, struct sk_buff *skb, struct batadv_nc_packet *nc_packet) { const int h_size = sizeof(struct batadv_unicast_packet); const int h_diff = sizeof(struct batadv_coded_packet) - h_size; struct batadv_unicast_packet *unicast_packet; struct batadv_coded_packet coded_packet_tmp; struct ethhdr *ethhdr, ethhdr_tmp; u8 *orig_dest, ttl, ttvn; unsigned int coding_len; int err; /* Save headers temporarily */ memcpy(&coded_packet_tmp, skb->data, sizeof(coded_packet_tmp)); memcpy(ðhdr_tmp, skb_mac_header(skb), sizeof(ethhdr_tmp)); if (skb_cow(skb, 0) < 0) return NULL; if (unlikely(!skb_pull_rcsum(skb, h_diff))) return NULL; /* Data points to batman header, so set mac header 14 bytes before * and network to data */ skb_set_mac_header(skb, -ETH_HLEN); skb_reset_network_header(skb); /* Reconstruct original mac header */ ethhdr = eth_hdr(skb); *ethhdr = ethhdr_tmp; /* Select the correct unicast header information based on the location * of our mac address in the coded_packet header */ if (batadv_is_my_mac(bat_priv, coded_packet_tmp.second_dest)) { /* If we are the second destination the packet was overheard, * so the Ethernet address must be copied to h_dest and * pkt_type changed from PACKET_OTHERHOST to PACKET_HOST */ ether_addr_copy(ethhdr->h_dest, coded_packet_tmp.second_dest); skb->pkt_type = PACKET_HOST; orig_dest = coded_packet_tmp.second_orig_dest; ttl = coded_packet_tmp.second_ttl; ttvn = coded_packet_tmp.second_ttvn; } else { orig_dest = coded_packet_tmp.first_orig_dest; ttl = coded_packet_tmp.ttl; ttvn = coded_packet_tmp.first_ttvn; } coding_len = ntohs(coded_packet_tmp.coded_len); if (coding_len > skb->len) return NULL; /* Here the magic is reversed: * extract the missing packet from the received coded packet */ batadv_nc_memxor(skb->data + h_size, nc_packet->skb->data + h_size, coding_len); /* Resize decoded skb if decoded with larger packet */ if (nc_packet->skb->len > coding_len + h_size) { err = pskb_trim_rcsum(skb, coding_len + h_size); if (err) return NULL; } /* Create decoded unicast packet */ unicast_packet = (struct batadv_unicast_packet *)skb->data; unicast_packet->packet_type = BATADV_UNICAST; unicast_packet->version = BATADV_COMPAT_VERSION; unicast_packet->ttl = ttl; ether_addr_copy(unicast_packet->dest, orig_dest); unicast_packet->ttvn = ttvn; batadv_nc_packet_free(nc_packet, false); return unicast_packet; } /** * batadv_nc_find_decoding_packet() - search through buffered decoding data to * find the data needed to decode the coded packet * @bat_priv: the bat priv with all the soft interface information * @ethhdr: pointer to the ethernet header inside the coded packet * @coded: coded packet we try to find decode data for * * Return: pointer to nc packet if the needed data was found or NULL otherwise. */ static struct batadv_nc_packet * batadv_nc_find_decoding_packet(struct batadv_priv *bat_priv, struct ethhdr *ethhdr, struct batadv_coded_packet *coded) { struct batadv_hashtable *hash = bat_priv->nc.decoding_hash; struct batadv_nc_packet *tmp_nc_packet, *nc_packet = NULL; struct batadv_nc_path *nc_path, nc_path_key; u8 *dest, *source; __be32 packet_id; int index; if (!hash) return NULL; /* Select the correct packet id based on the location of our mac-addr */ dest = ethhdr->h_source; if (!batadv_is_my_mac(bat_priv, coded->second_dest)) { source = coded->second_source; packet_id = coded->second_crc; } else { source = coded->first_source; packet_id = coded->first_crc; } batadv_nc_hash_key_gen(&nc_path_key, source, dest); index = batadv_nc_hash_choose(&nc_path_key, hash->size); /* Search for matching coding path */ rcu_read_lock(); hlist_for_each_entry_rcu(nc_path, &hash->table[index], hash_entry) { /* Find matching nc_packet */ spin_lock_bh(&nc_path->packet_list_lock); list_for_each_entry(tmp_nc_packet, &nc_path->packet_list, list) { if (packet_id == tmp_nc_packet->packet_id) { list_del(&tmp_nc_packet->list); nc_packet = tmp_nc_packet; break; } } spin_unlock_bh(&nc_path->packet_list_lock); if (nc_packet) break; } rcu_read_unlock(); if (!nc_packet) batadv_dbg(BATADV_DBG_NC, bat_priv, "No decoding packet found for %u\n", packet_id); return nc_packet; } /** * batadv_nc_recv_coded_packet() - try to decode coded packet and enqueue the * resulting unicast packet * @skb: incoming coded packet * @recv_if: pointer to interface this packet was received on * * Return: NET_RX_SUCCESS if the packet has been consumed or NET_RX_DROP * otherwise. */ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if) { struct batadv_priv *bat_priv = netdev_priv(recv_if->soft_iface); struct batadv_unicast_packet *unicast_packet; struct batadv_coded_packet *coded_packet; struct batadv_nc_packet *nc_packet; struct ethhdr *ethhdr; int hdr_size = sizeof(*coded_packet); /* Check if network coding is enabled */ if (!atomic_read(&bat_priv->network_coding)) goto free_skb; /* Make sure we can access (and remove) header */ if (unlikely(!pskb_may_pull(skb, hdr_size))) goto free_skb; coded_packet = (struct batadv_coded_packet *)skb->data; ethhdr = eth_hdr(skb); /* Verify frame is destined for us */ if (!batadv_is_my_mac(bat_priv, ethhdr->h_dest) && !batadv_is_my_mac(bat_priv, coded_packet->second_dest)) goto free_skb; /* Update stat counter */ if (batadv_is_my_mac(bat_priv, coded_packet->second_dest)) batadv_inc_counter(bat_priv, BATADV_CNT_NC_SNIFFED); nc_packet = batadv_nc_find_decoding_packet(bat_priv, ethhdr, coded_packet); if (!nc_packet) { batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); goto free_skb; } /* Make skb's linear, because decoding accesses the entire buffer */ if (skb_linearize(skb) < 0) goto free_nc_packet; if (skb_linearize(nc_packet->skb) < 0) goto free_nc_packet; /* Decode the packet */ unicast_packet = batadv_nc_skb_decode_packet(bat_priv, skb, nc_packet); if (!unicast_packet) { batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE_FAILED); goto free_nc_packet; } /* Mark packet as decoded to do correct recoding when forwarding */ BATADV_SKB_CB(skb)->decoded = true; batadv_inc_counter(bat_priv, BATADV_CNT_NC_DECODE); batadv_add_counter(bat_priv, BATADV_CNT_NC_DECODE_BYTES, skb->len + ETH_HLEN); return batadv_recv_unicast_packet(skb, recv_if); free_nc_packet: batadv_nc_packet_free(nc_packet, true); free_skb: kfree_skb(skb); return NET_RX_DROP; } /** * batadv_nc_mesh_free() - clean up network coding memory * @bat_priv: the bat priv with all the soft interface information */ void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_NC, 1); batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_NC, 1); cancel_delayed_work_sync(&bat_priv->nc.work); batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); batadv_hash_destroy(bat_priv->nc.coding_hash); batadv_nc_purge_paths(bat_priv, bat_priv->nc.decoding_hash, NULL); batadv_hash_destroy(bat_priv->nc.decoding_hash); } |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Memory-to-memory device framework for Video for Linux 2. * * Helper functions for devices that use memory buffers for both source * and destination. * * Copyright (c) 2009 Samsung Electronics Co., Ltd. * Pawel Osciak, <pawel@osciak.com> * Marek Szyprowski, <m.szyprowski@samsung.com> */ #ifndef _MEDIA_V4L2_MEM2MEM_H #define _MEDIA_V4L2_MEM2MEM_H #include <media/videobuf2-v4l2.h> /** * struct v4l2_m2m_ops - mem-to-mem device driver callbacks * @device_run: required. Begin the actual job (transaction) inside this * callback. * The job does NOT have to end before this callback returns * (and it will be the usual case). When the job finishes, * v4l2_m2m_job_finish() or v4l2_m2m_buf_done_and_job_finish() * has to be called. * @job_ready: optional. Should return 0 if the driver does not have a job * fully prepared to run yet (i.e. it will not be able to finish a * transaction without sleeping). If not provided, it will be * assumed that one source and one destination buffer are all * that is required for the driver to perform one full transaction. * This method may not sleep. * @job_abort: optional. Informs the driver that it has to abort the currently * running transaction as soon as possible (i.e. as soon as it can * stop the device safely; e.g. in the next interrupt handler), * even if the transaction would not have been finished by then. * After the driver performs the necessary steps, it has to call * v4l2_m2m_job_finish() or v4l2_m2m_buf_done_and_job_finish() as * if the transaction ended normally. * This function does not have to (and will usually not) wait * until the device enters a state when it can be stopped. */ struct v4l2_m2m_ops { void (*device_run)(void *priv); int (*job_ready)(void *priv); void (*job_abort)(void *priv); }; struct video_device; struct v4l2_m2m_dev; /** * struct v4l2_m2m_queue_ctx - represents a queue for buffers ready to be * processed * * @q: pointer to struct &vb2_queue * @rdy_queue: List of V4L2 mem-to-mem queues * @rdy_spinlock: spin lock to protect the struct usage * @num_rdy: number of buffers ready to be processed * @buffered: is the queue buffered? * * Queue for buffers ready to be processed as soon as this * instance receives access to the device. */ struct v4l2_m2m_queue_ctx { struct vb2_queue q; struct list_head rdy_queue; spinlock_t rdy_spinlock; u8 num_rdy; bool buffered; }; /** * struct v4l2_m2m_ctx - Memory to memory context structure * * @q_lock: struct &mutex lock * @new_frame: valid in the device_run callback: if true, then this * starts a new frame; if false, then this is a new slice * for an existing frame. This is always true unless * V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF is set, which * indicates slicing support. * @is_draining: indicates device is in draining phase * @last_src_buf: indicate the last source buffer for draining * @next_buf_last: next capture queud buffer will be tagged as last * @has_stopped: indicate the device has been stopped * @ignore_cap_streaming: If true, job_ready can be called even if the CAPTURE * queue is not streaming. This allows firmware to * analyze the bitstream header which arrives on the * OUTPUT queue. The driver must implement the job_ready * callback correctly to make sure that the requirements * for actual decoding are met. * @m2m_dev: opaque pointer to the internal data to handle M2M context * @cap_q_ctx: Capture (output to memory) queue context * @out_q_ctx: Output (input from memory) queue context * @queue: List of memory to memory contexts * @job_flags: Job queue flags, used internally by v4l2-mem2mem.c: * %TRANS_QUEUED, %TRANS_RUNNING and %TRANS_ABORT. * @finished: Wait queue used to signalize when a job queue finished. * @priv: Instance private data * * The memory to memory context is specific to a file handle, NOT to e.g. * a device. */ struct v4l2_m2m_ctx { /* optional cap/out vb2 queues lock */ struct mutex *q_lock; bool new_frame; bool is_draining; struct vb2_v4l2_buffer *last_src_buf; bool next_buf_last; bool has_stopped; bool ignore_cap_streaming; /* internal use only */ struct v4l2_m2m_dev *m2m_dev; struct v4l2_m2m_queue_ctx cap_q_ctx; struct v4l2_m2m_queue_ctx out_q_ctx; /* For device job queue */ struct list_head queue; unsigned long job_flags; wait_queue_head_t finished; void *priv; }; /** * struct v4l2_m2m_buffer - Memory to memory buffer * * @vb: pointer to struct &vb2_v4l2_buffer * @list: list of m2m buffers */ struct v4l2_m2m_buffer { struct vb2_v4l2_buffer vb; struct list_head list; }; /** * v4l2_m2m_get_curr_priv() - return driver private data for the currently * running instance or NULL if no instance is running * * @m2m_dev: opaque pointer to the internal data to handle M2M context */ void *v4l2_m2m_get_curr_priv(struct v4l2_m2m_dev *m2m_dev); /** * v4l2_m2m_get_vq() - return vb2_queue for the given type * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @type: type of the V4L2 buffer, as defined by enum &v4l2_buf_type */ struct vb2_queue *v4l2_m2m_get_vq(struct v4l2_m2m_ctx *m2m_ctx, enum v4l2_buf_type type); /** * v4l2_m2m_try_schedule() - check whether an instance is ready to be added to * the pending job queue and add it if so. * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * * There are three basic requirements an instance has to meet to be able to run: * 1) at least one source buffer has to be queued, * 2) at least one destination buffer has to be queued, * 3) streaming has to be on. * * If a queue is buffered (for example a decoder hardware ringbuffer that has * to be drained before doing streamoff), allow scheduling without v4l2 buffers * on that queue. * * There may also be additional, custom requirements. In such case the driver * should supply a custom callback (job_ready in v4l2_m2m_ops) that should * return 1 if the instance is ready. * An example of the above could be an instance that requires more than one * src/dst buffer per transaction. */ void v4l2_m2m_try_schedule(struct v4l2_m2m_ctx *m2m_ctx); /** * v4l2_m2m_job_finish() - inform the framework that a job has been finished * and have it clean up * * @m2m_dev: opaque pointer to the internal data to handle M2M context * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * * Called by a driver to yield back the device after it has finished with it. * Should be called as soon as possible after reaching a state which allows * other instances to take control of the device. * * This function has to be called only after &v4l2_m2m_ops->device_run * callback has been called on the driver. To prevent recursion, it should * not be called directly from the &v4l2_m2m_ops->device_run callback though. */ void v4l2_m2m_job_finish(struct v4l2_m2m_dev *m2m_dev, struct v4l2_m2m_ctx *m2m_ctx); /** * v4l2_m2m_buf_done_and_job_finish() - return source/destination buffers with * state and inform the framework that a job has been finished and have it * clean up * * @m2m_dev: opaque pointer to the internal data to handle M2M context * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @state: vb2 buffer state passed to v4l2_m2m_buf_done(). * * Drivers that set V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF must use this * function instead of job_finish() to take held buffers into account. It is * optional for other drivers. * * This function removes the source buffer from the ready list and returns * it with the given state. The same is done for the destination buffer, unless * it is marked 'held'. In that case the buffer is kept on the ready list. * * After that the job is finished (see job_finish()). * * This allows for multiple output buffers to be used to fill in a single * capture buffer. This is typically used by stateless decoders where * multiple e.g. H.264 slices contribute to a single decoded frame. */ void v4l2_m2m_buf_done_and_job_finish(struct v4l2_m2m_dev *m2m_dev, struct v4l2_m2m_ctx *m2m_ctx, enum vb2_buffer_state state); static inline void v4l2_m2m_buf_done(struct vb2_v4l2_buffer *buf, enum vb2_buffer_state state) { vb2_buffer_done(&buf->vb2_buf, state); } /** * v4l2_m2m_clear_state() - clear encoding/decoding state * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline void v4l2_m2m_clear_state(struct v4l2_m2m_ctx *m2m_ctx) { m2m_ctx->next_buf_last = false; m2m_ctx->is_draining = false; m2m_ctx->has_stopped = false; } /** * v4l2_m2m_mark_stopped() - set current encoding/decoding state as stopped * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline void v4l2_m2m_mark_stopped(struct v4l2_m2m_ctx *m2m_ctx) { m2m_ctx->next_buf_last = false; m2m_ctx->is_draining = false; m2m_ctx->has_stopped = true; } /** * v4l2_m2m_dst_buf_is_last() - return the current encoding/decoding session * draining management state of next queued capture buffer * * This last capture buffer should be tagged with V4L2_BUF_FLAG_LAST to notify * the end of the capture session. * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline bool v4l2_m2m_dst_buf_is_last(struct v4l2_m2m_ctx *m2m_ctx) { return m2m_ctx->is_draining && m2m_ctx->next_buf_last; } /** * v4l2_m2m_has_stopped() - return the current encoding/decoding session * stopped state * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline bool v4l2_m2m_has_stopped(struct v4l2_m2m_ctx *m2m_ctx) { return m2m_ctx->has_stopped; } /** * v4l2_m2m_is_last_draining_src_buf() - return the output buffer draining * state in the current encoding/decoding session * * This will identify the last output buffer queued before a session stop * was required, leading to an actual encoding/decoding session stop state * in the encoding/decoding process after being processed. * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @vbuf: pointer to struct &v4l2_buffer */ static inline bool v4l2_m2m_is_last_draining_src_buf(struct v4l2_m2m_ctx *m2m_ctx, struct vb2_v4l2_buffer *vbuf) { return m2m_ctx->is_draining && vbuf == m2m_ctx->last_src_buf; } /** * v4l2_m2m_last_buffer_done() - marks the buffer with LAST flag and DONE * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @vbuf: pointer to struct &v4l2_buffer */ void v4l2_m2m_last_buffer_done(struct v4l2_m2m_ctx *m2m_ctx, struct vb2_v4l2_buffer *vbuf); /** * v4l2_m2m_suspend() - stop new jobs from being run and wait for current job * to finish * * @m2m_dev: opaque pointer to the internal data to handle M2M context * * Called by a driver in the suspend hook. Stop new jobs from being run, and * wait for current running job to finish. */ void v4l2_m2m_suspend(struct v4l2_m2m_dev *m2m_dev); /** * v4l2_m2m_resume() - resume job running and try to run a queued job * * @m2m_dev: opaque pointer to the internal data to handle M2M context * * Called by a driver in the resume hook. This reverts the operation of * v4l2_m2m_suspend() and allows job to be run. Also try to run a queued job if * there is any. */ void v4l2_m2m_resume(struct v4l2_m2m_dev *m2m_dev); /** * v4l2_m2m_reqbufs() - multi-queue-aware REQBUFS multiplexer * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @reqbufs: pointer to struct &v4l2_requestbuffers */ int v4l2_m2m_reqbufs(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_requestbuffers *reqbufs); /** * v4l2_m2m_querybuf() - multi-queue-aware QUERYBUF multiplexer * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @buf: pointer to struct &v4l2_buffer * * See v4l2_m2m_mmap() documentation for details. */ int v4l2_m2m_querybuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_buffer *buf); /** * v4l2_m2m_qbuf() - enqueue a source or destination buffer, depending on * the type * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @buf: pointer to struct &v4l2_buffer */ int v4l2_m2m_qbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_buffer *buf); /** * v4l2_m2m_dqbuf() - dequeue a source or destination buffer, depending on * the type * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @buf: pointer to struct &v4l2_buffer */ int v4l2_m2m_dqbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_buffer *buf); /** * v4l2_m2m_prepare_buf() - prepare a source or destination buffer, depending on * the type * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @buf: pointer to struct &v4l2_buffer */ int v4l2_m2m_prepare_buf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_buffer *buf); /** * v4l2_m2m_create_bufs() - create a source or destination buffer, depending * on the type * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @create: pointer to struct &v4l2_create_buffers */ int v4l2_m2m_create_bufs(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_create_buffers *create); /** * v4l2_m2m_expbuf() - export a source or destination buffer, depending on * the type * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @eb: pointer to struct &v4l2_exportbuffer */ int v4l2_m2m_expbuf(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_exportbuffer *eb); /** * v4l2_m2m_streamon() - turn on streaming for a video queue * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @type: type of the V4L2 buffer, as defined by enum &v4l2_buf_type */ int v4l2_m2m_streamon(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, enum v4l2_buf_type type); /** * v4l2_m2m_streamoff() - turn off streaming for a video queue * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @type: type of the V4L2 buffer, as defined by enum &v4l2_buf_type */ int v4l2_m2m_streamoff(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, enum v4l2_buf_type type); /** * v4l2_m2m_update_start_streaming_state() - update the encoding/decoding * session state when a start of streaming of a video queue is requested * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @q: queue */ void v4l2_m2m_update_start_streaming_state(struct v4l2_m2m_ctx *m2m_ctx, struct vb2_queue *q); /** * v4l2_m2m_update_stop_streaming_state() - update the encoding/decoding * session state when a stop of streaming of a video queue is requested * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @q: queue */ void v4l2_m2m_update_stop_streaming_state(struct v4l2_m2m_ctx *m2m_ctx, struct vb2_queue *q); /** * v4l2_m2m_encoder_cmd() - execute an encoder command * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @ec: pointer to the encoder command */ int v4l2_m2m_encoder_cmd(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_encoder_cmd *ec); /** * v4l2_m2m_decoder_cmd() - execute a decoder command * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @dc: pointer to the decoder command */ int v4l2_m2m_decoder_cmd(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct v4l2_decoder_cmd *dc); /** * v4l2_m2m_poll() - poll replacement, for destination buffers only * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @wait: pointer to struct &poll_table_struct * * Call from the driver's poll() function. Will poll both queues. If a buffer * is available to dequeue (with dqbuf) from the source queue, this will * indicate that a non-blocking write can be performed, while read will be * returned in case of the destination queue. */ __poll_t v4l2_m2m_poll(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct poll_table_struct *wait); /** * v4l2_m2m_mmap() - source and destination queues-aware mmap multiplexer * * @file: pointer to struct &file * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @vma: pointer to struct &vm_area_struct * * Call from driver's mmap() function. Will handle mmap() for both queues * seamlessly for the video buffer, which will receive normal per-queue offsets * and proper vb2 queue pointers. The differentiation is made outside * vb2 by adding a predefined offset to buffers from one of the queues * and subtracting it before passing it back to vb2. Only drivers (and * thus applications) receive modified offsets. */ int v4l2_m2m_mmap(struct file *file, struct v4l2_m2m_ctx *m2m_ctx, struct vm_area_struct *vma); #ifndef CONFIG_MMU unsigned long v4l2_m2m_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); #endif /** * v4l2_m2m_init() - initialize per-driver m2m data * * @m2m_ops: pointer to struct v4l2_m2m_ops * * Usually called from driver's ``probe()`` function. * * Return: returns an opaque pointer to the internal data to handle M2M context */ struct v4l2_m2m_dev *v4l2_m2m_init(const struct v4l2_m2m_ops *m2m_ops); #if defined(CONFIG_MEDIA_CONTROLLER) void v4l2_m2m_unregister_media_controller(struct v4l2_m2m_dev *m2m_dev); int v4l2_m2m_register_media_controller(struct v4l2_m2m_dev *m2m_dev, struct video_device *vdev, int function); #else static inline void v4l2_m2m_unregister_media_controller(struct v4l2_m2m_dev *m2m_dev) { } static inline int v4l2_m2m_register_media_controller(struct v4l2_m2m_dev *m2m_dev, struct video_device *vdev, int function) { return 0; } #endif /** * v4l2_m2m_release() - cleans up and frees a m2m_dev structure * * @m2m_dev: opaque pointer to the internal data to handle M2M context * * Usually called from driver's ``remove()`` function. */ void v4l2_m2m_release(struct v4l2_m2m_dev *m2m_dev); /** * v4l2_m2m_ctx_init() - allocate and initialize a m2m context * * @m2m_dev: opaque pointer to the internal data to handle M2M context * @drv_priv: driver's instance private data * @queue_init: a callback for queue type-specific initialization function * to be used for initializing vb2_queues * * Usually called from driver's ``open()`` function. */ struct v4l2_m2m_ctx *v4l2_m2m_ctx_init(struct v4l2_m2m_dev *m2m_dev, void *drv_priv, int (*queue_init)(void *priv, struct vb2_queue *src_vq, struct vb2_queue *dst_vq)); static inline void v4l2_m2m_set_src_buffered(struct v4l2_m2m_ctx *m2m_ctx, bool buffered) { m2m_ctx->out_q_ctx.buffered = buffered; } static inline void v4l2_m2m_set_dst_buffered(struct v4l2_m2m_ctx *m2m_ctx, bool buffered) { m2m_ctx->cap_q_ctx.buffered = buffered; } /** * v4l2_m2m_ctx_release() - release m2m context * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * * Usually called from driver's release() function. */ void v4l2_m2m_ctx_release(struct v4l2_m2m_ctx *m2m_ctx); /** * v4l2_m2m_buf_queue() - add a buffer to the proper ready buffers list. * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @vbuf: pointer to struct &vb2_v4l2_buffer * * Call from vb2_queue_ops->ops->buf_queue, vb2_queue_ops callback. */ void v4l2_m2m_buf_queue(struct v4l2_m2m_ctx *m2m_ctx, struct vb2_v4l2_buffer *vbuf); /** * v4l2_m2m_num_src_bufs_ready() - return the number of source buffers ready for * use * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline unsigned int v4l2_m2m_num_src_bufs_ready(struct v4l2_m2m_ctx *m2m_ctx) { unsigned int num_buf_rdy; unsigned long flags; spin_lock_irqsave(&m2m_ctx->out_q_ctx.rdy_spinlock, flags); num_buf_rdy = m2m_ctx->out_q_ctx.num_rdy; spin_unlock_irqrestore(&m2m_ctx->out_q_ctx.rdy_spinlock, flags); return num_buf_rdy; } /** * v4l2_m2m_num_dst_bufs_ready() - return the number of destination buffers * ready for use * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline unsigned int v4l2_m2m_num_dst_bufs_ready(struct v4l2_m2m_ctx *m2m_ctx) { unsigned int num_buf_rdy; unsigned long flags; spin_lock_irqsave(&m2m_ctx->cap_q_ctx.rdy_spinlock, flags); num_buf_rdy = m2m_ctx->cap_q_ctx.num_rdy; spin_unlock_irqrestore(&m2m_ctx->cap_q_ctx.rdy_spinlock, flags); return num_buf_rdy; } /** * v4l2_m2m_next_buf() - return next buffer from the list of ready buffers * * @q_ctx: pointer to struct @v4l2_m2m_queue_ctx */ struct vb2_v4l2_buffer *v4l2_m2m_next_buf(struct v4l2_m2m_queue_ctx *q_ctx); /** * v4l2_m2m_next_src_buf() - return next source buffer from the list of ready * buffers * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline struct vb2_v4l2_buffer * v4l2_m2m_next_src_buf(struct v4l2_m2m_ctx *m2m_ctx) { return v4l2_m2m_next_buf(&m2m_ctx->out_q_ctx); } /** * v4l2_m2m_next_dst_buf() - return next destination buffer from the list of * ready buffers * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline struct vb2_v4l2_buffer * v4l2_m2m_next_dst_buf(struct v4l2_m2m_ctx *m2m_ctx) { return v4l2_m2m_next_buf(&m2m_ctx->cap_q_ctx); } /** * v4l2_m2m_last_buf() - return last buffer from the list of ready buffers * * @q_ctx: pointer to struct @v4l2_m2m_queue_ctx */ struct vb2_v4l2_buffer *v4l2_m2m_last_buf(struct v4l2_m2m_queue_ctx *q_ctx); /** * v4l2_m2m_last_src_buf() - return last source buffer from the list of * ready buffers * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline struct vb2_v4l2_buffer * v4l2_m2m_last_src_buf(struct v4l2_m2m_ctx *m2m_ctx) { return v4l2_m2m_last_buf(&m2m_ctx->out_q_ctx); } /** * v4l2_m2m_last_dst_buf() - return last destination buffer from the list of * ready buffers * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline struct vb2_v4l2_buffer * v4l2_m2m_last_dst_buf(struct v4l2_m2m_ctx *m2m_ctx) { return v4l2_m2m_last_buf(&m2m_ctx->cap_q_ctx); } /** * v4l2_m2m_for_each_dst_buf() - iterate over a list of destination ready * buffers * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @b: current buffer of type struct v4l2_m2m_buffer */ #define v4l2_m2m_for_each_dst_buf(m2m_ctx, b) \ list_for_each_entry(b, &m2m_ctx->cap_q_ctx.rdy_queue, list) /** * v4l2_m2m_for_each_src_buf() - iterate over a list of source ready buffers * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @b: current buffer of type struct v4l2_m2m_buffer */ #define v4l2_m2m_for_each_src_buf(m2m_ctx, b) \ list_for_each_entry(b, &m2m_ctx->out_q_ctx.rdy_queue, list) /** * v4l2_m2m_for_each_dst_buf_safe() - iterate over a list of destination ready * buffers safely * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @b: current buffer of type struct v4l2_m2m_buffer * @n: used as temporary storage */ #define v4l2_m2m_for_each_dst_buf_safe(m2m_ctx, b, n) \ list_for_each_entry_safe(b, n, &m2m_ctx->cap_q_ctx.rdy_queue, list) /** * v4l2_m2m_for_each_src_buf_safe() - iterate over a list of source ready * buffers safely * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @b: current buffer of type struct v4l2_m2m_buffer * @n: used as temporary storage */ #define v4l2_m2m_for_each_src_buf_safe(m2m_ctx, b, n) \ list_for_each_entry_safe(b, n, &m2m_ctx->out_q_ctx.rdy_queue, list) /** * v4l2_m2m_get_src_vq() - return vb2_queue for source buffers * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline struct vb2_queue *v4l2_m2m_get_src_vq(struct v4l2_m2m_ctx *m2m_ctx) { return &m2m_ctx->out_q_ctx.q; } /** * v4l2_m2m_get_dst_vq() - return vb2_queue for destination buffers * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline struct vb2_queue *v4l2_m2m_get_dst_vq(struct v4l2_m2m_ctx *m2m_ctx) { return &m2m_ctx->cap_q_ctx.q; } /** * v4l2_m2m_buf_remove() - take off a buffer from the list of ready buffers and * return it * * @q_ctx: pointer to struct @v4l2_m2m_queue_ctx */ struct vb2_v4l2_buffer *v4l2_m2m_buf_remove(struct v4l2_m2m_queue_ctx *q_ctx); /** * v4l2_m2m_src_buf_remove() - take off a source buffer from the list of ready * buffers and return it * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline struct vb2_v4l2_buffer * v4l2_m2m_src_buf_remove(struct v4l2_m2m_ctx *m2m_ctx) { return v4l2_m2m_buf_remove(&m2m_ctx->out_q_ctx); } /** * v4l2_m2m_dst_buf_remove() - take off a destination buffer from the list of * ready buffers and return it * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx */ static inline struct vb2_v4l2_buffer * v4l2_m2m_dst_buf_remove(struct v4l2_m2m_ctx *m2m_ctx) { return v4l2_m2m_buf_remove(&m2m_ctx->cap_q_ctx); } /** * v4l2_m2m_buf_remove_by_buf() - take off exact buffer from the list of ready * buffers * * @q_ctx: pointer to struct @v4l2_m2m_queue_ctx * @vbuf: the buffer to be removed */ void v4l2_m2m_buf_remove_by_buf(struct v4l2_m2m_queue_ctx *q_ctx, struct vb2_v4l2_buffer *vbuf); /** * v4l2_m2m_src_buf_remove_by_buf() - take off exact source buffer from the list * of ready buffers * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @vbuf: the buffer to be removed */ static inline void v4l2_m2m_src_buf_remove_by_buf(struct v4l2_m2m_ctx *m2m_ctx, struct vb2_v4l2_buffer *vbuf) { v4l2_m2m_buf_remove_by_buf(&m2m_ctx->out_q_ctx, vbuf); } /** * v4l2_m2m_dst_buf_remove_by_buf() - take off exact destination buffer from the * list of ready buffers * * @m2m_ctx: m2m context assigned to the instance given by struct &v4l2_m2m_ctx * @vbuf: the buffer to be removed */ static inline void v4l2_m2m_dst_buf_remove_by_buf(struct v4l2_m2m_ctx *m2m_ctx, struct vb2_v4l2_buffer *vbuf) { v4l2_m2m_buf_remove_by_buf(&m2m_ctx->cap_q_ctx, vbuf); } struct vb2_v4l2_buffer * v4l2_m2m_buf_remove_by_idx(struct v4l2_m2m_queue_ctx *q_ctx, unsigned int idx); static inline struct vb2_v4l2_buffer * v4l2_m2m_src_buf_remove_by_idx(struct v4l2_m2m_ctx *m2m_ctx, unsigned int idx) { return v4l2_m2m_buf_remove_by_idx(&m2m_ctx->out_q_ctx, idx); } static inline struct vb2_v4l2_buffer * v4l2_m2m_dst_buf_remove_by_idx(struct v4l2_m2m_ctx *m2m_ctx, unsigned int idx) { return v4l2_m2m_buf_remove_by_idx(&m2m_ctx->cap_q_ctx, idx); } /** * v4l2_m2m_buf_copy_metadata() - copy buffer metadata from * the output buffer to the capture buffer * * @out_vb: the output buffer that is the source of the metadata. * @cap_vb: the capture buffer that will receive the metadata. * @copy_frame_flags: copy the KEY/B/PFRAME flags as well. * * This helper function copies the timestamp, timecode (if the TIMECODE * buffer flag was set), field and the TIMECODE, KEYFRAME, BFRAME, PFRAME * and TSTAMP_SRC_MASK flags from @out_vb to @cap_vb. * * If @copy_frame_flags is false, then the KEYFRAME, BFRAME and PFRAME * flags are not copied. This is typically needed for encoders that * set this bits explicitly. */ void v4l2_m2m_buf_copy_metadata(const struct vb2_v4l2_buffer *out_vb, struct vb2_v4l2_buffer *cap_vb, bool copy_frame_flags); /* v4l2 request helper */ void v4l2_m2m_request_queue(struct media_request *req); /* v4l2 ioctl helpers */ int v4l2_m2m_ioctl_reqbufs(struct file *file, void *priv, struct v4l2_requestbuffers *rb); int v4l2_m2m_ioctl_create_bufs(struct file *file, void *fh, struct v4l2_create_buffers *create); int v4l2_m2m_ioctl_remove_bufs(struct file *file, void *priv, struct v4l2_remove_buffers *d); int v4l2_m2m_ioctl_querybuf(struct file *file, void *fh, struct v4l2_buffer *buf); int v4l2_m2m_ioctl_expbuf(struct file *file, void *fh, struct v4l2_exportbuffer *eb); int v4l2_m2m_ioctl_qbuf(struct file *file, void *fh, struct v4l2_buffer *buf); int v4l2_m2m_ioctl_dqbuf(struct file *file, void *fh, struct v4l2_buffer *buf); int v4l2_m2m_ioctl_prepare_buf(struct file *file, void *fh, struct v4l2_buffer *buf); int v4l2_m2m_ioctl_streamon(struct file *file, void *fh, enum v4l2_buf_type type); int v4l2_m2m_ioctl_streamoff(struct file *file, void *fh, enum v4l2_buf_type type); int v4l2_m2m_ioctl_encoder_cmd(struct file *file, void *fh, struct v4l2_encoder_cmd *ec); int v4l2_m2m_ioctl_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc); int v4l2_m2m_ioctl_try_encoder_cmd(struct file *file, void *fh, struct v4l2_encoder_cmd *ec); int v4l2_m2m_ioctl_try_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc); int v4l2_m2m_ioctl_stateless_try_decoder_cmd(struct file *file, void *fh, struct v4l2_decoder_cmd *dc); int v4l2_m2m_ioctl_stateless_decoder_cmd(struct file *file, void *priv, struct v4l2_decoder_cmd *dc); int v4l2_m2m_fop_mmap(struct file *file, struct vm_area_struct *vma); __poll_t v4l2_m2m_fop_poll(struct file *file, poll_table *wait); #endif /* _MEDIA_V4L2_MEM2MEM_H */ |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 | /* IPv6-specific defines for netfilter. * (C)1998 Rusty Russell -- This code is GPL. * (C)1999 David Jeffery * this header was blatantly ripped from netfilter_ipv4.h * it's amazing what adding a bunch of 6s can do =8^) */ #ifndef __LINUX_IP6_NETFILTER_H #define __LINUX_IP6_NETFILTER_H #include <uapi/linux/netfilter_ipv6.h> #include <net/tcp.h> /* Check for an extension */ static inline int nf_ip6_ext_hdr(u8 nexthdr) { return (nexthdr == IPPROTO_HOPOPTS) || (nexthdr == IPPROTO_ROUTING) || (nexthdr == IPPROTO_FRAGMENT) || (nexthdr == IPPROTO_ESP) || (nexthdr == IPPROTO_AH) || (nexthdr == IPPROTO_NONE) || (nexthdr == IPPROTO_DSTOPTS); } /* Extra routing may needed on local out, as the QUEUE target never returns * control to the table. */ struct ip6_rt_info { struct in6_addr daddr; struct in6_addr saddr; u_int32_t mark; }; struct nf_queue_entry; struct nf_bridge_frag_data; /* * Hook functions for ipv6 to allow xt_* modules to be built-in even * if IPv6 is a module. */ struct nf_ipv6_ops { #if IS_MODULE(CONFIG_IPV6) int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); int (*route_me_harder)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*dev_get_saddr)(struct net *net, const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); int (*route)(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); u32 (*cookie_init_sequence)(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp); int (*cookie_v6_check)(const struct ipv6hdr *iph, const struct tcphdr *th); #endif void (*route_input)(struct sk_buff *skb); int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, int (*output)(struct net *, struct sock *, struct sk_buff *)); int (*reroute)(struct sk_buff *skb, const struct nf_queue_entry *entry); #if IS_MODULE(CONFIG_IPV6) int (*br_fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, const struct nf_bridge_frag_data *data, struct sk_buff *)); #endif }; #ifdef CONFIG_NETFILTER #include <net/addrconf.h> extern const struct nf_ipv6_ops __rcu *nf_ipv6_ops; static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) { return rcu_dereference(nf_ipv6_ops); } static inline int nf_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (!v6_ops) return 1; return v6_ops->chk_addr(net, addr, dev, strict); #elif IS_BUILTIN(CONFIG_IPV6) return ipv6_chk_addr(net, addr, dev, strict); #else return 1; #endif } int __nf_ip6_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict); static inline int nf_ip6_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); if (v6ops) return v6ops->route(net, dst, fl, strict); return -EHOSTUNREACH; #endif #if IS_BUILTIN(CONFIG_IPV6) return __nf_ip6_route(net, dst, fl, strict); #else return -EHOSTUNREACH; #endif } #include <net/netfilter/ipv6/nf_defrag_ipv6.h> int br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, const struct nf_bridge_frag_data *data, struct sk_buff *)); static inline int nf_br_ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, struct nf_bridge_frag_data *data, int (*output)(struct net *, struct sock *sk, const struct nf_bridge_frag_data *data, struct sk_buff *)) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (!v6_ops) return 1; return v6_ops->br_fragment(net, sk, skb, data, output); #elif IS_BUILTIN(CONFIG_IPV6) return br_ip6_fragment(net, sk, skb, data, output); #else return 1; #endif } int ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb); static inline int nf_ip6_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb) { #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (!v6_ops) return -EHOSTUNREACH; return v6_ops->route_me_harder(net, sk, skb); #elif IS_BUILTIN(CONFIG_IPV6) return ip6_route_me_harder(net, sk, skb); #else return -EHOSTUNREACH; #endif } static inline u32 nf_ipv6_cookie_init_sequence(const struct ipv6hdr *iph, const struct tcphdr *th, u16 *mssp) { #if IS_ENABLED(CONFIG_SYN_COOKIES) #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (v6_ops) return v6_ops->cookie_init_sequence(iph, th, mssp); #elif IS_BUILTIN(CONFIG_IPV6) return __cookie_v6_init_sequence(iph, th, mssp); #endif #endif return 0; } static inline int nf_cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th) { #if IS_ENABLED(CONFIG_SYN_COOKIES) #if IS_MODULE(CONFIG_IPV6) const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); if (v6_ops) return v6_ops->cookie_v6_check(iph, th); #elif IS_BUILTIN(CONFIG_IPV6) return __cookie_v6_check(iph, th); #endif #endif return 0; } __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); int nf_ip6_check_hbh_len(struct sk_buff *skb, u32 *plen); int ipv6_netfilter_init(void); void ipv6_netfilter_fini(void); #else /* CONFIG_NETFILTER */ static inline int ipv6_netfilter_init(void) { return 0; } static inline void ipv6_netfilter_fini(void) { return; } static inline const struct nf_ipv6_ops *nf_get_ipv6_ops(void) { return NULL; } #endif /* CONFIG_NETFILTER */ #endif /*__LINUX_IP6_NETFILTER_H*/ |
28 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | // SPDX-License-Identifier: GPL-2.0 /* RTT/RTO calculation. * * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) * * https://tools.ietf.org/html/rfc6298 * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf */ #include <linux/net.h> #include "ar-internal.h" #define RXRPC_RTO_MAX (120 * USEC_PER_SEC) #define RXRPC_TIMEOUT_INIT ((unsigned int)(1 * MSEC_PER_SEC)) /* RFC6298 2.1 initial RTO value */ #define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) { return 200; } static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) { return (peer->srtt_us >> 3) + peer->rttvar_us; } static u32 rxrpc_bound_rto(u32 rto) { return min(rto, RXRPC_RTO_MAX); } /* * Called to compute a smoothed rtt estimate. The data fed to this * routine either comes from timestamps, or from segments that were * known _not_ to have been retransmitted [see Karn/Partridge * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 * piece by Van Jacobson. * NOTE: the next three routines used to be one big routine. * To save cycles in the RFC 1323 implementation it was better to break * it up into three procedures. -- erics */ static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) { long m = sample_rtt_us; /* RTT */ u32 srtt = peer->srtt_us; /* The following amusing code comes from Jacobson's * article in SIGCOMM '88. Note that rtt and mdev * are scaled versions of rtt and mean deviation. * This is designed to be as fast as possible * m stands for "measurement". * * On a 1990 paper the rto value is changed to: * RTO = rtt + 4 * mdev * * Funny. This algorithm seems to be very broken. * These formulae increase RTO, when it should be decreased, increase * too slowly, when it should be increased quickly, decrease too quickly * etc. I guess in BSD RTO takes ONE value, so that it is absolutely * does not matter how to _calculate_ it. Seems, it was trap * that VJ failed to avoid. 8) */ if (srtt != 0) { m -= (srtt >> 3); /* m is now error in rtt est */ srtt += m; /* rtt = 7/8 rtt + 1/8 new */ if (m < 0) { m = -m; /* m is now abs(error) */ m -= (peer->mdev_us >> 2); /* similar update on mdev */ /* This is similar to one of Eifel findings. * Eifel blocks mdev updates when rtt decreases. * This solution is a bit different: we use finer gain * for mdev in this case (alpha*beta). * Like Eifel it also prevents growth of rto, * but also it limits too fast rto decreases, * happening in pure Eifel. */ if (m > 0) m >>= 3; } else { m -= (peer->mdev_us >> 2); /* similar update on mdev */ } peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ if (peer->mdev_us > peer->mdev_max_us) { peer->mdev_max_us = peer->mdev_us; if (peer->mdev_max_us > peer->rttvar_us) peer->rttvar_us = peer->mdev_max_us; } } else { /* no previous measure. */ srtt = m << 3; /* take the measured time to be rtt */ peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); peer->mdev_max_us = peer->rttvar_us; } peer->srtt_us = max(1U, srtt); } /* * Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ static void rxrpc_set_rto(struct rxrpc_peer *peer) { u32 rto; /* 1. If rtt variance happened to be less 50msec, it is hallucination. * It cannot be less due to utterly erratic ACK generation made * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ * to do with delayed acks, because at cwnd>2 true delack timeout * is invisible. Actually, Linux-2.4 also generates erratic * ACKs in some circumstances. */ rto = __rxrpc_set_rto(peer); /* 2. Fixups made earlier cannot be right. * If we do not estimate RTO correctly without them, * all the algo is pure shit and should be replaced * with correct one. It is exactly, which we pretend to do. */ /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo * guarantees that rto is higher. */ peer->rto_us = rxrpc_bound_rto(rto); } static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) { if (rtt_us < 0) return; //rxrpc_update_rtt_min(peer, rtt_us); rxrpc_rtt_estimator(peer, rtt_us); rxrpc_set_rto(peer); /* RFC6298: only reset backoff on valid RTT measurement. */ peer->backoff = 0; } /* * Add RTT information to cache. This is called in softirq mode and has * exclusive access to the peer RTT data. */ void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, int rtt_slot, rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, ktime_t send_time, ktime_t resp_time) { struct rxrpc_peer *peer = call->peer; s64 rtt_us; rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); if (rtt_us < 0) return; spin_lock(&peer->rtt_input_lock); rxrpc_ack_update_rtt(peer, rtt_us); if (peer->rtt_count < 3) peer->rtt_count++; spin_unlock(&peer->rtt_input_lock); trace_rxrpc_rtt_rx(call, why, rtt_slot, send_serial, resp_serial, peer->srtt_us >> 3, peer->rto_us); } /* * Get the retransmission timeout to set in nanoseconds, backing it off each * time we retransmit. */ ktime_t rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) { u64 timo_us; u32 backoff = READ_ONCE(peer->backoff); timo_us = peer->rto_us; timo_us <<= backoff; if (retrans && timo_us * 2 <= RXRPC_RTO_MAX) WRITE_ONCE(peer->backoff, backoff + 1); if (timo_us < 1) timo_us = 1; return ns_to_ktime(timo_us * NSEC_PER_USEC); } void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) { peer->rto_us = RXRPC_TIMEOUT_INIT; peer->mdev_us = RXRPC_TIMEOUT_INIT; peer->backoff = 0; //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); } |
11 1263 1265 40 1266 1257 1251 1386 1366 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_DST_METADATA_H #define __NET_DST_METADATA_H 1 #include <linux/skbuff.h> #include <net/ip_tunnels.h> #include <net/macsec.h> #include <net/dst.h> enum metadata_type { METADATA_IP_TUNNEL, METADATA_HW_PORT_MUX, METADATA_MACSEC, METADATA_XFRM, }; struct hw_port_info { struct net_device *lower_dev; u32 port_id; }; struct macsec_info { sci_t sci; }; struct xfrm_md_info { u32 if_id; int link; struct dst_entry *dst_orig; }; struct metadata_dst { struct dst_entry dst; enum metadata_type type; union { struct ip_tunnel_info tun_info; struct hw_port_info port_info; struct macsec_info macsec_info; struct xfrm_md_info xfrm_info; } u; }; static inline struct metadata_dst *skb_metadata_dst(const struct sk_buff *skb) { struct metadata_dst *md_dst = (struct metadata_dst *) skb_dst(skb); if (md_dst && md_dst->dst.flags & DST_METADATA) return md_dst; return NULL; } static inline struct ip_tunnel_info * skb_tunnel_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_IP_TUNNEL) return &md_dst->u.tun_info; dst = skb_dst(skb); if (dst && dst->lwtstate && (dst->lwtstate->type == LWTUNNEL_ENCAP_IP || dst->lwtstate->type == LWTUNNEL_ENCAP_IP6)) return lwt_tun_info(dst->lwtstate); return NULL; } static inline struct xfrm_md_info *lwt_xfrm_info(struct lwtunnel_state *lwt) { return (struct xfrm_md_info *)lwt->data; } static inline struct xfrm_md_info *skb_xfrm_md_info(const struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); struct dst_entry *dst; if (md_dst && md_dst->type == METADATA_XFRM) return &md_dst->u.xfrm_info; dst = skb_dst(skb); if (dst && dst->lwtstate && dst->lwtstate->type == LWTUNNEL_ENCAP_XFRM) return lwt_xfrm_info(dst->lwtstate); return NULL; } static inline bool skb_valid_dst(const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); return dst && !(dst->flags & DST_METADATA); } static inline int skb_metadata_dst_cmp(const struct sk_buff *skb_a, const struct sk_buff *skb_b) { const struct metadata_dst *a, *b; if (!(skb_a->_skb_refdst | skb_b->_skb_refdst)) return 0; a = (const struct metadata_dst *) skb_dst(skb_a); b = (const struct metadata_dst *) skb_dst(skb_b); if (!a != !b || a->type != b->type) return 1; switch (a->type) { case METADATA_HW_PORT_MUX: return memcmp(&a->u.port_info, &b->u.port_info, sizeof(a->u.port_info)); case METADATA_IP_TUNNEL: return memcmp(&a->u.tun_info, &b->u.tun_info, sizeof(a->u.tun_info) + a->u.tun_info.options_len); case METADATA_MACSEC: return memcmp(&a->u.macsec_info, &b->u.macsec_info, sizeof(a->u.macsec_info)); case METADATA_XFRM: return memcmp(&a->u.xfrm_info, &b->u.xfrm_info, sizeof(a->u.xfrm_info)); default: return 1; } } void metadata_dst_free(struct metadata_dst *); struct metadata_dst *metadata_dst_alloc(u8 optslen, enum metadata_type type, gfp_t flags); void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst); struct metadata_dst __percpu * metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags); static inline struct metadata_dst *tun_rx_dst(int md_size) { struct metadata_dst *tun_dst; tun_dst = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!tun_dst) return NULL; tun_dst->u.tun_info.options_len = 0; tun_dst->u.tun_info.mode = 0; return tun_dst; } static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb) { struct metadata_dst *md_dst = skb_metadata_dst(skb); int md_size; struct metadata_dst *new_md; if (!md_dst || md_dst->type != METADATA_IP_TUNNEL) return ERR_PTR(-EINVAL); md_size = md_dst->u.tun_info.options_len; new_md = metadata_dst_alloc(md_size, METADATA_IP_TUNNEL, GFP_ATOMIC); if (!new_md) return ERR_PTR(-ENOMEM); unsafe_memcpy(&new_md->u.tun_info, &md_dst->u.tun_info, sizeof(struct ip_tunnel_info) + md_size, /* metadata_dst_alloc() reserves room (md_size bytes) for * options right after the ip_tunnel_info struct. */); #ifdef CONFIG_DST_CACHE /* Unclone the dst cache if there is one */ if (new_md->u.tun_info.dst_cache.cache) { int ret; ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC); if (ret) { metadata_dst_free(new_md); return ERR_PTR(ret); } } #endif skb_dst_drop(skb); skb_dst_set(skb, &new_md->dst); return new_md; } static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb) { struct metadata_dst *dst; dst = tun_dst_unclone(skb); if (IS_ERR(dst)) return NULL; return &dst->u.tun_info; } static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, __be32 daddr, __u8 tos, __u8 ttl, __be16 tp_dst, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, saddr, daddr, tos, ttl, 0, 0, tp_dst, tunnel_id, flags); return tun_dst; } static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, const unsigned long *flags, __be64 tunnel_id, int md_size) { const struct iphdr *iph = ip_hdr(skb); return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, 0, flags, tunnel_id, md_size); } static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 tos, __u8 ttl, __be16 tp_dst, __be32 label, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; struct ip_tunnel_info *info; tun_dst = tun_rx_dst(md_size); if (!tun_dst) return NULL; info = &tun_dst->u.tun_info; info->mode = IP_TUNNEL_INFO_IPV6; ip_tunnel_flags_copy(info->key.tun_flags, flags); info->key.tun_id = tunnel_id; info->key.tp_src = 0; info->key.tp_dst = tp_dst; info->key.u.ipv6.src = *saddr; info->key.u.ipv6.dst = *daddr; info->key.tos = tos; info->key.ttl = ttl; info->key.label = label; return tun_dst; } static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, const unsigned long *flags, __be64 tunnel_id, int md_size) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, ipv6_get_dsfield(ip6h), ip6h->hop_limit, 0, ip6_flowlabel(ip6h), flags, tunnel_id, md_size); } #endif /* __NET_DST_METADATA_H */ |
75 74 76 73 75 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | // SPDX-License-Identifier: GPL-2.0-or-later /* mpihelp-lshift.c - MPI helper functions * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc. * * This file is part of GnuPG. * * Note: This code is heavily based on the GNU MP Library. * Actually it's the same code with only minor changes in the * way the data is stored; this is to support the abstraction * of an optional secure memory allocation which may be used * to avoid revealing of sensitive data due to paging etc. * The GNU MP Library itself is published under the LGPL; * however I decided to publish this code under the plain GPL. */ #include "mpi-internal.h" /* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left * and store the USIZE least significant digits of the result at WP. * Return the bits shifted out from the most significant digit. * * Argument constraints: * 1. 0 < CNT < BITS_PER_MP_LIMB * 2. If the result is to be written over the input, WP must be >= UP. */ mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt) { mpi_limb_t high_limb, low_limb; unsigned sh_1, sh_2; mpi_size_t i; mpi_limb_t retval; sh_1 = cnt; wp += 1; sh_2 = BITS_PER_MPI_LIMB - sh_1; i = usize - 1; low_limb = up[i]; retval = low_limb >> sh_2; high_limb = low_limb; while (--i >= 0) { low_limb = up[i]; wp[i] = (high_limb << sh_1) | (low_limb >> sh_2); high_limb = low_limb; } wp[i] = high_limb << sh_1; return retval; } |
15 16 16 16 15 3 13 22 1 19 2 2 1 3 4 6 9 7 1 7 7 7 7 21 15 1 14 1 1 1 1 4 5 9 2 1 2 2 2 2 13 7 2 1 2 5 3 3 4 6 2 3 1 4 11 10 1 2 2 5 2 5 7 6 1 7 5 1 3 1 4 10 2 2 1 5 2 3 2 8 2 2 1 5 2 3 2 5 1 4 4 4 1 4 4 171 8 7 4 12 5 10 2 10 1 1 1 9 7 3 7 1 10 12 12 9 2 1 12 7 5 5 4 1 1 1 7 2 5 28 1 9 1 16 9 13 3 13 17 4 9 13 18 3 9 2 6 1 4 1 17 2 2 23 10 1 11 20 21 1 19 2 20 18 16 4 20 22 1 21 21 16 17 17 17 21 4 11 8 9 9 1 1 16 15 11 2 4 2 2 1 12 1 2 1 1 1 24 21 1 2 1 1 1 1 1 1 1 11 2 17 3 25 26 26 1 1 1 2 1 1 2 1 1 3 245 27 3 1 1 1 1 1 1 1 1 3 3 3 1 28 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 24 1 1 2 1 1 1 1 1 1 1 1 2 1 1 2 1 2 1 12 2 2 2 2 2 1 6 15 5 2 2 2 32 1 3 1 9 6 4 9 1 1 1 1 1 1 1 1 5 5 5 5 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 | // SPDX-License-Identifier: GPL-2.0 /* * linux/kernel/sys.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/export.h> #include <linux/mm.h> #include <linux/mm_inline.h> #include <linux/utsname.h> #include <linux/mman.h> #include <linux/reboot.h> #include <linux/prctl.h> #include <linux/highuid.h> #include <linux/fs.h> #include <linux/kmod.h> #include <linux/ksm.h> #include <linux/perf_event.h> #include <linux/resource.h> #include <linux/kernel.h> #include <linux/workqueue.h> #include <linux/capability.h> #include <linux/device.h> #include <linux/key.h> #include <linux/times.h> #include <linux/posix-timers.h> #include <linux/security.h> #include <linux/random.h> #include <linux/suspend.h> #include <linux/tty.h> #include <linux/signal.h> #include <linux/cn_proc.h> #include <linux/getcpu.h> #include <linux/task_io_accounting_ops.h> #include <linux/seccomp.h> #include <linux/cpu.h> #include <linux/personality.h> #include <linux/ptrace.h> #include <linux/fs_struct.h> #include <linux/file.h> #include <linux/mount.h> #include <linux/gfp.h> #include <linux/syscore_ops.h> #include <linux/version.h> #include <linux/ctype.h> #include <linux/syscall_user_dispatch.h> #include <linux/compat.h> #include <linux/syscalls.h> #include <linux/kprobes.h> #include <linux/user_namespace.h> #include <linux/time_namespace.h> #include <linux/binfmts.h> #include <linux/sched.h> #include <linux/sched/autogroup.h> #include <linux/sched/loadavg.h> #include <linux/sched/stat.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/task.h> #include <linux/sched/cputime.h> #include <linux/rcupdate.h> #include <linux/uidgid.h> #include <linux/cred.h> #include <linux/nospec.h> #include <linux/kmsg_dump.h> /* Move somewhere else to avoid recompiling? */ #include <generated/utsrelease.h> #include <linux/uaccess.h> #include <asm/io.h> #include <asm/unistd.h> #include "uid16.h" #ifndef SET_UNALIGN_CTL # define SET_UNALIGN_CTL(a, b) (-EINVAL) #endif #ifndef GET_UNALIGN_CTL # define GET_UNALIGN_CTL(a, b) (-EINVAL) #endif #ifndef SET_FPEMU_CTL # define SET_FPEMU_CTL(a, b) (-EINVAL) #endif #ifndef GET_FPEMU_CTL # define GET_FPEMU_CTL(a, b) (-EINVAL) #endif #ifndef SET_FPEXC_CTL # define SET_FPEXC_CTL(a, b) (-EINVAL) #endif #ifndef GET_FPEXC_CTL # define GET_FPEXC_CTL(a, b) (-EINVAL) #endif #ifndef GET_ENDIAN # define GET_ENDIAN(a, b) (-EINVAL) #endif #ifndef SET_ENDIAN # define SET_ENDIAN(a, b) (-EINVAL) #endif #ifndef GET_TSC_CTL # define GET_TSC_CTL(a) (-EINVAL) #endif #ifndef SET_TSC_CTL # define SET_TSC_CTL(a) (-EINVAL) #endif #ifndef GET_FP_MODE # define GET_FP_MODE(a) (-EINVAL) #endif #ifndef SET_FP_MODE # define SET_FP_MODE(a,b) (-EINVAL) #endif #ifndef SVE_SET_VL # define SVE_SET_VL(a) (-EINVAL) #endif #ifndef SVE_GET_VL # define SVE_GET_VL() (-EINVAL) #endif #ifndef SME_SET_VL # define SME_SET_VL(a) (-EINVAL) #endif #ifndef SME_GET_VL # define SME_GET_VL() (-EINVAL) #endif #ifndef PAC_RESET_KEYS # define PAC_RESET_KEYS(a, b) (-EINVAL) #endif #ifndef PAC_SET_ENABLED_KEYS # define PAC_SET_ENABLED_KEYS(a, b, c) (-EINVAL) #endif #ifndef PAC_GET_ENABLED_KEYS # define PAC_GET_ENABLED_KEYS(a) (-EINVAL) #endif #ifndef SET_TAGGED_ADDR_CTRL # define SET_TAGGED_ADDR_CTRL(a) (-EINVAL) #endif #ifndef GET_TAGGED_ADDR_CTRL # define GET_TAGGED_ADDR_CTRL() (-EINVAL) #endif #ifndef RISCV_V_SET_CONTROL # define RISCV_V_SET_CONTROL(a) (-EINVAL) #endif #ifndef RISCV_V_GET_CONTROL # define RISCV_V_GET_CONTROL() (-EINVAL) #endif #ifndef RISCV_SET_ICACHE_FLUSH_CTX # define RISCV_SET_ICACHE_FLUSH_CTX(a, b) (-EINVAL) #endif #ifndef PPC_GET_DEXCR_ASPECT # define PPC_GET_DEXCR_ASPECT(a, b) (-EINVAL) #endif #ifndef PPC_SET_DEXCR_ASPECT # define PPC_SET_DEXCR_ASPECT(a, b, c) (-EINVAL) #endif /* * this is where the system-wide overflow UID and GID are defined, for * architectures that now have 32-bit UID/GID but didn't in the past */ int overflowuid = DEFAULT_OVERFLOWUID; int overflowgid = DEFAULT_OVERFLOWGID; EXPORT_SYMBOL(overflowuid); EXPORT_SYMBOL(overflowgid); /* * the same as above, but for filesystems which can only store a 16-bit * UID and GID. as such, this is needed on all architectures */ int fs_overflowuid = DEFAULT_FS_OVERFLOWUID; int fs_overflowgid = DEFAULT_FS_OVERFLOWGID; EXPORT_SYMBOL(fs_overflowuid); EXPORT_SYMBOL(fs_overflowgid); /* * Returns true if current's euid is same as p's uid or euid, * or has CAP_SYS_NICE to p's user_ns. * * Called with rcu_read_lock, creds are safe */ static bool set_one_prio_perm(struct task_struct *p) { const struct cred *cred = current_cred(), *pcred = __task_cred(p); if (uid_eq(pcred->uid, cred->euid) || uid_eq(pcred->euid, cred->euid)) return true; if (ns_capable(pcred->user_ns, CAP_SYS_NICE)) return true; return false; } /* * set the priority of a task * - the caller must hold the RCU read lock */ static int set_one_prio(struct task_struct *p, int niceval, int error) { int no_nice; if (!set_one_prio_perm(p)) { error = -EPERM; goto out; } if (niceval < task_nice(p) && !can_nice(p, niceval)) { error = -EACCES; goto out; } no_nice = security_task_setnice(p, niceval); if (no_nice) { error = no_nice; goto out; } if (error == -ESRCH) error = 0; set_user_nice(p, niceval); out: return error; } SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval) { struct task_struct *g, *p; struct user_struct *user; const struct cred *cred = current_cred(); int error = -EINVAL; struct pid *pgrp; kuid_t uid; if (which > PRIO_USER || which < PRIO_PROCESS) goto out; /* normalize: avoid signed division (rounding problems) */ error = -ESRCH; if (niceval < MIN_NICE) niceval = MIN_NICE; if (niceval > MAX_NICE) niceval = MAX_NICE; rcu_read_lock(); switch (which) { case PRIO_PROCESS: if (who) p = find_task_by_vpid(who); else p = current; if (p) error = set_one_prio(p, niceval, error); break; case PRIO_PGRP: if (who) pgrp = find_vpid(who); else pgrp = task_pgrp(current); read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { error = set_one_prio(p, niceval, error); } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); read_unlock(&tasklist_lock); break; case PRIO_USER: uid = make_kuid(cred->user_ns, who); user = cred->user; if (!who) uid = cred->uid; else if (!uid_eq(uid, cred->uid)) { user = find_user(uid); if (!user) goto out_unlock; /* No processes for this user */ } for_each_process_thread(g, p) { if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) error = set_one_prio(p, niceval, error); } if (!uid_eq(uid, cred->uid)) free_uid(user); /* For find_user() */ break; } out_unlock: rcu_read_unlock(); out: return error; } /* * Ugh. To avoid negative return values, "getpriority()" will * not return the normal nice-value, but a negated value that * has been offset by 20 (ie it returns 40..1 instead of -20..19) * to stay compatible. */ SYSCALL_DEFINE2(getpriority, int, which, int, who) { struct task_struct *g, *p; struct user_struct *user; const struct cred *cred = current_cred(); long niceval, retval = -ESRCH; struct pid *pgrp; kuid_t uid; if (which > PRIO_USER || which < PRIO_PROCESS) return -EINVAL; rcu_read_lock(); switch (which) { case PRIO_PROCESS: if (who) p = find_task_by_vpid(who); else p = current; if (p) { niceval = nice_to_rlimit(task_nice(p)); if (niceval > retval) retval = niceval; } break; case PRIO_PGRP: if (who) pgrp = find_vpid(who); else pgrp = task_pgrp(current); read_lock(&tasklist_lock); do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { niceval = nice_to_rlimit(task_nice(p)); if (niceval > retval) retval = niceval; } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); read_unlock(&tasklist_lock); break; case PRIO_USER: uid = make_kuid(cred->user_ns, who); user = cred->user; if (!who) uid = cred->uid; else if (!uid_eq(uid, cred->uid)) { user = find_user(uid); if (!user) goto out_unlock; /* No processes for this user */ } for_each_process_thread(g, p) { if (uid_eq(task_uid(p), uid) && task_pid_vnr(p)) { niceval = nice_to_rlimit(task_nice(p)); if (niceval > retval) retval = niceval; } } if (!uid_eq(uid, cred->uid)) free_uid(user); /* for find_user() */ break; } out_unlock: rcu_read_unlock(); return retval; } /* * Unprivileged users may change the real gid to the effective gid * or vice versa. (BSD-style) * * If you set the real gid at all, or set the effective gid to a value not * equal to the real gid, then the saved gid is set to the new effective gid. * * This makes it possible for a setgid program to completely drop its * privileges, which is often a useful assertion to make when you are doing * a security audit over a program. * * The general idea is that a program which uses just setregid() will be * 100% compatible with BSD. A program which uses just setgid() will be * 100% compatible with POSIX with saved IDs. * * SMP: There are not races, the GIDs are checked only by filesystem * operations (as far as semantic preservation is concerned). */ #ifdef CONFIG_MULTIUSER long __sys_setregid(gid_t rgid, gid_t egid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; kgid_t krgid, kegid; krgid = make_kgid(ns, rgid); kegid = make_kgid(ns, egid); if ((rgid != (gid_t) -1) && !gid_valid(krgid)) return -EINVAL; if ((egid != (gid_t) -1) && !gid_valid(kegid)) return -EINVAL; new = prepare_creds(); if (!new) return -ENOMEM; old = current_cred(); retval = -EPERM; if (rgid != (gid_t) -1) { if (gid_eq(old->gid, krgid) || gid_eq(old->egid, krgid) || ns_capable_setid(old->user_ns, CAP_SETGID)) new->gid = krgid; else goto error; } if (egid != (gid_t) -1) { if (gid_eq(old->gid, kegid) || gid_eq(old->egid, kegid) || gid_eq(old->sgid, kegid) || ns_capable_setid(old->user_ns, CAP_SETGID)) new->egid = kegid; else goto error; } if (rgid != (gid_t) -1 || (egid != (gid_t) -1 && !gid_eq(kegid, old->gid))) new->sgid = new->egid; new->fsgid = new->egid; retval = security_task_fix_setgid(new, old, LSM_SETID_RE); if (retval < 0) goto error; return commit_creds(new); error: abort_creds(new); return retval; } SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid) { return __sys_setregid(rgid, egid); } /* * setgid() is implemented like SysV w/ SAVED_IDS * * SMP: Same implicit races as above. */ long __sys_setgid(gid_t gid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; kgid_t kgid; kgid = make_kgid(ns, gid); if (!gid_valid(kgid)) return -EINVAL; new = prepare_creds(); if (!new) return -ENOMEM; old = current_cred(); retval = -EPERM; if (ns_capable_setid(old->user_ns, CAP_SETGID)) new->gid = new->egid = new->sgid = new->fsgid = kgid; else if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->sgid)) new->egid = new->fsgid = kgid; else goto error; retval = security_task_fix_setgid(new, old, LSM_SETID_ID); if (retval < 0) goto error; return commit_creds(new); error: abort_creds(new); return retval; } SYSCALL_DEFINE1(setgid, gid_t, gid) { return __sys_setgid(gid); } /* * change the user struct in a credentials set to match the new UID */ static int set_user(struct cred *new) { struct user_struct *new_user; new_user = alloc_uid(new->uid); if (!new_user) return -EAGAIN; free_uid(new->user); new->user = new_user; return 0; } static void flag_nproc_exceeded(struct cred *new) { if (new->ucounts == current_ucounts()) return; /* * We don't fail in case of NPROC limit excess here because too many * poorly written programs don't check set*uid() return code, assuming * it never fails if called by root. We may still enforce NPROC limit * for programs doing set*uid()+execve() by harmlessly deferring the * failure to the execve() stage. */ if (is_rlimit_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) && new->user != INIT_USER) current->flags |= PF_NPROC_EXCEEDED; else current->flags &= ~PF_NPROC_EXCEEDED; } /* * Unprivileged users may change the real uid to the effective uid * or vice versa. (BSD-style) * * If you set the real uid at all, or set the effective uid to a value not * equal to the real uid, then the saved uid is set to the new effective uid. * * This makes it possible for a setuid program to completely drop its * privileges, which is often a useful assertion to make when you are doing * a security audit over a program. * * The general idea is that a program which uses just setreuid() will be * 100% compatible with BSD. A program which uses just setuid() will be * 100% compatible with POSIX with saved IDs. */ long __sys_setreuid(uid_t ruid, uid_t euid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; kuid_t kruid, keuid; kruid = make_kuid(ns, ruid); keuid = make_kuid(ns, euid); if ((ruid != (uid_t) -1) && !uid_valid(kruid)) return -EINVAL; if ((euid != (uid_t) -1) && !uid_valid(keuid)) return -EINVAL; new = prepare_creds(); if (!new) return -ENOMEM; old = current_cred(); retval = -EPERM; if (ruid != (uid_t) -1) { new->uid = kruid; if (!uid_eq(old->uid, kruid) && !uid_eq(old->euid, kruid) && !ns_capable_setid(old->user_ns, CAP_SETUID)) goto error; } if (euid != (uid_t) -1) { new->euid = keuid; if (!uid_eq(old->uid, keuid) && !uid_eq(old->euid, keuid) && !uid_eq(old->suid, keuid) && !ns_capable_setid(old->user_ns, CAP_SETUID)) goto error; } if (!uid_eq(new->uid, old->uid)) { retval = set_user(new); if (retval < 0) goto error; } if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && !uid_eq(keuid, old->uid))) new->suid = new->euid; new->fsuid = new->euid; retval = security_task_fix_setuid(new, old, LSM_SETID_RE); if (retval < 0) goto error; retval = set_cred_ucounts(new); if (retval < 0) goto error; flag_nproc_exceeded(new); return commit_creds(new); error: abort_creds(new); return retval; } SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) { return __sys_setreuid(ruid, euid); } /* * setuid() is implemented like SysV with SAVED_IDS * * Note that SAVED_ID's is deficient in that a setuid root program * like sendmail, for example, cannot set its uid to be a normal * user and then switch back, because if you're root, setuid() sets * the saved uid too. If you don't like this, blame the bright people * in the POSIX committee and/or USG. Note that the BSD-style setreuid() * will allow a root program to temporarily drop privileges and be able to * regain them by swapping the real and effective uid. */ long __sys_setuid(uid_t uid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; kuid_t kuid; kuid = make_kuid(ns, uid); if (!uid_valid(kuid)) return -EINVAL; new = prepare_creds(); if (!new) return -ENOMEM; old = current_cred(); retval = -EPERM; if (ns_capable_setid(old->user_ns, CAP_SETUID)) { new->suid = new->uid = kuid; if (!uid_eq(kuid, old->uid)) { retval = set_user(new); if (retval < 0) goto error; } } else if (!uid_eq(kuid, old->uid) && !uid_eq(kuid, new->suid)) { goto error; } new->fsuid = new->euid = kuid; retval = security_task_fix_setuid(new, old, LSM_SETID_ID); if (retval < 0) goto error; retval = set_cred_ucounts(new); if (retval < 0) goto error; flag_nproc_exceeded(new); return commit_creds(new); error: abort_creds(new); return retval; } SYSCALL_DEFINE1(setuid, uid_t, uid) { return __sys_setuid(uid); } /* * This function implements a generic ability to update ruid, euid, * and suid. This allows you to implement the 4.4 compatible seteuid(). */ long __sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; kuid_t kruid, keuid, ksuid; bool ruid_new, euid_new, suid_new; kruid = make_kuid(ns, ruid); keuid = make_kuid(ns, euid); ksuid = make_kuid(ns, suid); if ((ruid != (uid_t) -1) && !uid_valid(kruid)) return -EINVAL; if ((euid != (uid_t) -1) && !uid_valid(keuid)) return -EINVAL; if ((suid != (uid_t) -1) && !uid_valid(ksuid)) return -EINVAL; old = current_cred(); /* check for no-op */ if ((ruid == (uid_t) -1 || uid_eq(kruid, old->uid)) && (euid == (uid_t) -1 || (uid_eq(keuid, old->euid) && uid_eq(keuid, old->fsuid))) && (suid == (uid_t) -1 || uid_eq(ksuid, old->suid))) return 0; ruid_new = ruid != (uid_t) -1 && !uid_eq(kruid, old->uid) && !uid_eq(kruid, old->euid) && !uid_eq(kruid, old->suid); euid_new = euid != (uid_t) -1 && !uid_eq(keuid, old->uid) && !uid_eq(keuid, old->euid) && !uid_eq(keuid, old->suid); suid_new = suid != (uid_t) -1 && !uid_eq(ksuid, old->uid) && !uid_eq(ksuid, old->euid) && !uid_eq(ksuid, old->suid); if ((ruid_new || euid_new || suid_new) && !ns_capable_setid(old->user_ns, CAP_SETUID)) return -EPERM; new = prepare_creds(); if (!new) return -ENOMEM; if (ruid != (uid_t) -1) { new->uid = kruid; if (!uid_eq(kruid, old->uid)) { retval = set_user(new); if (retval < 0) goto error; } } if (euid != (uid_t) -1) new->euid = keuid; if (suid != (uid_t) -1) new->suid = ksuid; new->fsuid = new->euid; retval = security_task_fix_setuid(new, old, LSM_SETID_RES); if (retval < 0) goto error; retval = set_cred_ucounts(new); if (retval < 0) goto error; flag_nproc_exceeded(new); return commit_creds(new); error: abort_creds(new); return retval; } SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) { return __sys_setresuid(ruid, euid, suid); } SYSCALL_DEFINE3(getresuid, uid_t __user *, ruidp, uid_t __user *, euidp, uid_t __user *, suidp) { const struct cred *cred = current_cred(); int retval; uid_t ruid, euid, suid; ruid = from_kuid_munged(cred->user_ns, cred->uid); euid = from_kuid_munged(cred->user_ns, cred->euid); suid = from_kuid_munged(cred->user_ns, cred->suid); retval = put_user(ruid, ruidp); if (!retval) { retval = put_user(euid, euidp); if (!retval) return put_user(suid, suidp); } return retval; } /* * Same as above, but for rgid, egid, sgid. */ long __sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) { struct user_namespace *ns = current_user_ns(); const struct cred *old; struct cred *new; int retval; kgid_t krgid, kegid, ksgid; bool rgid_new, egid_new, sgid_new; krgid = make_kgid(ns, rgid); kegid = make_kgid(ns, egid); ksgid = make_kgid(ns, sgid); if ((rgid != (gid_t) -1) && !gid_valid(krgid)) return -EINVAL; if ((egid != (gid_t) -1) && !gid_valid(kegid)) return -EINVAL; if ((sgid != (gid_t) -1) && !gid_valid(ksgid)) return -EINVAL; old = current_cred(); /* check for no-op */ if ((rgid == (gid_t) -1 || gid_eq(krgid, old->gid)) && (egid == (gid_t) -1 || (gid_eq(kegid, old->egid) && gid_eq(kegid, old->fsgid))) && (sgid == (gid_t) -1 || gid_eq(ksgid, old->sgid))) return 0; rgid_new = rgid != (gid_t) -1 && !gid_eq(krgid, old->gid) && !gid_eq(krgid, old->egid) && !gid_eq(krgid, old->sgid); egid_new = egid != (gid_t) -1 && !gid_eq(kegid, old->gid) && !gid_eq(kegid, old->egid) && !gid_eq(kegid, old->sgid); sgid_new = sgid != (gid_t) -1 && !gid_eq(ksgid, old->gid) && !gid_eq(ksgid, old->egid) && !gid_eq(ksgid, old->sgid); if ((rgid_new || egid_new || sgid_new) && !ns_capable_setid(old->user_ns, CAP_SETGID)) return -EPERM; new = prepare_creds(); if (!new) return -ENOMEM; if (rgid != (gid_t) -1) new->gid = krgid; if (egid != (gid_t) -1) new->egid = kegid; if (sgid != (gid_t) -1) new->sgid = ksgid; new->fsgid = new->egid; retval = security_task_fix_setgid(new, old, LSM_SETID_RES); if (retval < 0) goto error; return commit_creds(new); error: abort_creds(new); return retval; } SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid) { return __sys_setresgid(rgid, egid, sgid); } SYSCALL_DEFINE3(getresgid, gid_t __user *, rgidp, gid_t __user *, egidp, gid_t __user *, sgidp) { const struct cred *cred = current_cred(); int retval; gid_t rgid, egid, sgid; rgid = from_kgid_munged(cred->user_ns, cred->gid); egid = from_kgid_munged(cred->user_ns, cred->egid); sgid = from_kgid_munged(cred->user_ns, cred->sgid); retval = put_user(rgid, rgidp); if (!retval) { retval = put_user(egid, egidp); if (!retval) retval = put_user(sgid, sgidp); } return retval; } /* * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This * is used for "access()" and for the NFS daemon (letting nfsd stay at * whatever uid it wants to). It normally shadows "euid", except when * explicitly set by setfsuid() or for access.. */ long __sys_setfsuid(uid_t uid) { const struct cred *old; struct cred *new; uid_t old_fsuid; kuid_t kuid; old = current_cred(); old_fsuid = from_kuid_munged(old->user_ns, old->fsuid); kuid = make_kuid(old->user_ns, uid); if (!uid_valid(kuid)) return old_fsuid; new = prepare_creds(); if (!new) return old_fsuid; if (uid_eq(kuid, old->uid) || uid_eq(kuid, old->euid) || uid_eq(kuid, old->suid) || uid_eq(kuid, old->fsuid) || ns_capable_setid(old->user_ns, CAP_SETUID)) { if (!uid_eq(kuid, old->fsuid)) { new->fsuid = kuid; if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0) goto change_okay; } } abort_creds(new); return old_fsuid; change_okay: commit_creds(new); return old_fsuid; } SYSCALL_DEFINE1(setfsuid, uid_t, uid) { return __sys_setfsuid(uid); } /* * Samma pÃ¥ svenska.. */ long __sys_setfsgid(gid_t gid) { const struct cred *old; struct cred *new; gid_t old_fsgid; kgid_t kgid; old = current_cred(); old_fsgid = from_kgid_munged(old->user_ns, old->fsgid); kgid = make_kgid(old->user_ns, gid); if (!gid_valid(kgid)) return old_fsgid; new = prepare_creds(); if (!new) return old_fsgid; if (gid_eq(kgid, old->gid) || gid_eq(kgid, old->egid) || gid_eq(kgid, old->sgid) || gid_eq(kgid, old->fsgid) || ns_capable_setid(old->user_ns, CAP_SETGID)) { if (!gid_eq(kgid, old->fsgid)) { new->fsgid = kgid; if (security_task_fix_setgid(new,old,LSM_SETID_FS) == 0) goto change_okay; } } abort_creds(new); return old_fsgid; change_okay: commit_creds(new); return old_fsgid; } SYSCALL_DEFINE1(setfsgid, gid_t, gid) { return __sys_setfsgid(gid); } #endif /* CONFIG_MULTIUSER */ /** * sys_getpid - return the thread group id of the current process * * Note, despite the name, this returns the tgid not the pid. The tgid and * the pid are identical unless CLONE_THREAD was specified on clone() in * which case the tgid is the same in all threads of the same group. * * This is SMP safe as current->tgid does not change. */ SYSCALL_DEFINE0(getpid) { return task_tgid_vnr(current); } /* Thread ID - the internal kernel "pid" */ SYSCALL_DEFINE0(gettid) { return task_pid_vnr(current); } /* * Accessing ->real_parent is not SMP-safe, it could * change from under us. However, we can use a stale * value of ->real_parent under rcu_read_lock(), see * release_task()->call_rcu(delayed_put_task_struct). */ SYSCALL_DEFINE0(getppid) { int pid; rcu_read_lock(); pid = task_tgid_vnr(rcu_dereference(current->real_parent)); rcu_read_unlock(); return pid; } SYSCALL_DEFINE0(getuid) { /* Only we change this so SMP safe */ return from_kuid_munged(current_user_ns(), current_uid()); } SYSCALL_DEFINE0(geteuid) { /* Only we change this so SMP safe */ return from_kuid_munged(current_user_ns(), current_euid()); } SYSCALL_DEFINE0(getgid) { /* Only we change this so SMP safe */ return from_kgid_munged(current_user_ns(), current_gid()); } SYSCALL_DEFINE0(getegid) { /* Only we change this so SMP safe */ return from_kgid_munged(current_user_ns(), current_egid()); } static void do_sys_times(struct tms *tms) { u64 tgutime, tgstime, cutime, cstime; thread_group_cputime_adjusted(current, &tgutime, &tgstime); cutime = current->signal->cutime; cstime = current->signal->cstime; tms->tms_utime = nsec_to_clock_t(tgutime); tms->tms_stime = nsec_to_clock_t(tgstime); tms->tms_cutime = nsec_to_clock_t(cutime); tms->tms_cstime = nsec_to_clock_t(cstime); } SYSCALL_DEFINE1(times, struct tms __user *, tbuf) { if (tbuf) { struct tms tmp; do_sys_times(&tmp); if (copy_to_user(tbuf, &tmp, sizeof(struct tms))) return -EFAULT; } force_successful_syscall_return(); return (long) jiffies_64_to_clock_t(get_jiffies_64()); } #ifdef CONFIG_COMPAT static compat_clock_t clock_t_to_compat_clock_t(clock_t x) { return compat_jiffies_to_clock_t(clock_t_to_jiffies(x)); } COMPAT_SYSCALL_DEFINE1(times, struct compat_tms __user *, tbuf) { if (tbuf) { struct tms tms; struct compat_tms tmp; do_sys_times(&tms); /* Convert our struct tms to the compat version. */ tmp.tms_utime = clock_t_to_compat_clock_t(tms.tms_utime); tmp.tms_stime = clock_t_to_compat_clock_t(tms.tms_stime); tmp.tms_cutime = clock_t_to_compat_clock_t(tms.tms_cutime); tmp.tms_cstime = clock_t_to_compat_clock_t(tms.tms_cstime); if (copy_to_user(tbuf, &tmp, sizeof(tmp))) return -EFAULT; } force_successful_syscall_return(); return compat_jiffies_to_clock_t(jiffies); } #endif /* * This needs some heavy checking ... * I just haven't the stomach for it. I also don't fully * understand sessions/pgrp etc. Let somebody who does explain it. * * OK, I think I have the protection semantics right.... this is really * only important on a multi-user system anyway, to make sure one user * can't send a signal to a process owned by another. -TYT, 12/12/91 * * !PF_FORKNOEXEC check to conform completely to POSIX. */ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) { struct task_struct *p; struct task_struct *group_leader = current->group_leader; struct pid *pgrp; int err; if (!pid) pid = task_pid_vnr(group_leader); if (!pgid) pgid = pid; if (pgid < 0) return -EINVAL; rcu_read_lock(); /* From this point forward we keep holding onto the tasklist lock * so that our parent does not change from under us. -DaveM */ write_lock_irq(&tasklist_lock); err = -ESRCH; p = find_task_by_vpid(pid); if (!p) goto out; err = -EINVAL; if (!thread_group_leader(p)) goto out; if (same_thread_group(p->real_parent, group_leader)) { err = -EPERM; if (task_session(p) != task_session(group_leader)) goto out; err = -EACCES; if (!(p->flags & PF_FORKNOEXEC)) goto out; } else { err = -ESRCH; if (p != group_leader) goto out; } err = -EPERM; if (p->signal->leader) goto out; pgrp = task_pid(p); if (pgid != pid) { struct task_struct *g; pgrp = find_vpid(pgid); g = pid_task(pgrp, PIDTYPE_PGID); if (!g || task_session(g) != task_session(group_leader)) goto out; } err = security_task_setpgid(p, pgid); if (err) goto out; if (task_pgrp(p) != pgrp) change_pid(p, PIDTYPE_PGID, pgrp); err = 0; out: /* All paths lead to here, thus we are safe. -DaveM */ write_unlock_irq(&tasklist_lock); rcu_read_unlock(); return err; } static int do_getpgid(pid_t pid) { struct task_struct *p; struct pid *grp; int retval; rcu_read_lock(); if (!pid) grp = task_pgrp(current); else { retval = -ESRCH; p = find_task_by_vpid(pid); if (!p) goto out; grp = task_pgrp(p); if (!grp) goto out; retval = security_task_getpgid(p); if (retval) goto out; } retval = pid_vnr(grp); out: rcu_read_unlock(); return retval; } SYSCALL_DEFINE1(getpgid, pid_t, pid) { return do_getpgid(pid); } #ifdef __ARCH_WANT_SYS_GETPGRP SYSCALL_DEFINE0(getpgrp) { return do_getpgid(0); } #endif SYSCALL_DEFINE1(getsid, pid_t, pid) { struct task_struct *p; struct pid *sid; int retval; rcu_read_lock(); if (!pid) sid = task_session(current); else { retval = -ESRCH; p = find_task_by_vpid(pid); if (!p) goto out; sid = task_session(p); if (!sid) goto out; retval = security_task_getsid(p); if (retval) goto out; } retval = pid_vnr(sid); out: rcu_read_unlock(); return retval; } static void set_special_pids(struct pid *pid) { struct task_struct *curr = current->group_leader; if (task_session(curr) != pid) change_pid(curr, PIDTYPE_SID, pid); if (task_pgrp(curr) != pid) change_pid(curr, PIDTYPE_PGID, pid); } int ksys_setsid(void) { struct task_struct *group_leader = current->group_leader; struct pid *sid = task_pid(group_leader); pid_t session = pid_vnr(sid); int err = -EPERM; write_lock_irq(&tasklist_lock); /* Fail if I am already a session leader */ if (group_leader->signal->leader) goto out; /* Fail if a process group id already exists that equals the * proposed session id. */ if (pid_task(sid, PIDTYPE_PGID)) goto out; group_leader->signal->leader = 1; set_special_pids(sid); proc_clear_tty(group_leader); err = session; out: write_unlock_irq(&tasklist_lock); if (err > 0) { proc_sid_connector(group_leader); sched_autogroup_create_attach(group_leader); } return err; } SYSCALL_DEFINE0(setsid) { return ksys_setsid(); } DECLARE_RWSEM(uts_sem); #ifdef COMPAT_UTS_MACHINE #define override_architecture(name) \ (personality(current->personality) == PER_LINUX32 && \ copy_to_user(name->machine, COMPAT_UTS_MACHINE, \ sizeof(COMPAT_UTS_MACHINE))) #else #define override_architecture(name) 0 #endif /* * Work around broken programs that cannot handle "Linux 3.0". * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40 * And we map 4.x and later versions to 2.6.60+x, so 4.0/5.0/6.0/... would be * 2.6.60. */ static int override_release(char __user *release, size_t len) { int ret = 0; if (current->personality & UNAME26) { const char *rest = UTS_RELEASE; char buf[65] = { 0 }; int ndots = 0; unsigned v; size_t copy; while (*rest) { if (*rest == '.' && ++ndots >= 3) break; if (!isdigit(*rest) && *rest != '.') break; rest++; } v = LINUX_VERSION_PATCHLEVEL + 60; copy = clamp_t(size_t, len, 1, sizeof(buf)); copy = scnprintf(buf, copy, "2.6.%u%s", v, rest); ret = copy_to_user(release, buf, copy + 1); } return ret; } SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name) { struct new_utsname tmp; down_read(&uts_sem); memcpy(&tmp, utsname(), sizeof(tmp)); up_read(&uts_sem); if (copy_to_user(name, &tmp, sizeof(tmp))) return -EFAULT; if (override_release(name->release, sizeof(name->release))) return -EFAULT; if (override_architecture(name)) return -EFAULT; return 0; } #ifdef __ARCH_WANT_SYS_OLD_UNAME /* * Old cruft */ SYSCALL_DEFINE1(uname, struct old_utsname __user *, name) { struct old_utsname tmp; if (!name) return -EFAULT; down_read(&uts_sem); memcpy(&tmp, utsname(), sizeof(tmp)); up_read(&uts_sem); if (copy_to_user(name, &tmp, sizeof(tmp))) return -EFAULT; if (override_release(name->release, sizeof(name->release))) return -EFAULT; if (override_architecture(name)) return -EFAULT; return 0; } SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name) { struct oldold_utsname tmp; if (!name) return -EFAULT; memset(&tmp, 0, sizeof(tmp)); down_read(&uts_sem); memcpy(&tmp.sysname, &utsname()->sysname, __OLD_UTS_LEN); memcpy(&tmp.nodename, &utsname()->nodename, __OLD_UTS_LEN); memcpy(&tmp.release, &utsname()->release, __OLD_UTS_LEN); memcpy(&tmp.version, &utsname()->version, __OLD_UTS_LEN); memcpy(&tmp.machine, &utsname()->machine, __OLD_UTS_LEN); up_read(&uts_sem); if (copy_to_user(name, &tmp, sizeof(tmp))) return -EFAULT; if (override_architecture(name)) return -EFAULT; if (override_release(name->release, sizeof(name->release))) return -EFAULT; return 0; } #endif SYSCALL_DEFINE2(sethostname, char __user *, name, int, len) { int errno; char tmp[__NEW_UTS_LEN]; if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { struct new_utsname *u; add_device_randomness(tmp, len); down_write(&uts_sem); u = utsname(); memcpy(u->nodename, tmp, len); memset(u->nodename + len, 0, sizeof(u->nodename) - len); errno = 0; uts_proc_notify(UTS_PROC_HOSTNAME); up_write(&uts_sem); } return errno; } #ifdef __ARCH_WANT_SYS_GETHOSTNAME SYSCALL_DEFINE2(gethostname, char __user *, name, int, len) { int i; struct new_utsname *u; char tmp[__NEW_UTS_LEN + 1]; if (len < 0) return -EINVAL; down_read(&uts_sem); u = utsname(); i = 1 + strlen(u->nodename); if (i > len) i = len; memcpy(tmp, u->nodename, i); up_read(&uts_sem); if (copy_to_user(name, tmp, i)) return -EFAULT; return 0; } #endif /* * Only setdomainname; getdomainname can be implemented by calling * uname() */ SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len) { int errno; char tmp[__NEW_UTS_LEN]; if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; if (len < 0 || len > __NEW_UTS_LEN) return -EINVAL; errno = -EFAULT; if (!copy_from_user(tmp, name, len)) { struct new_utsname *u; add_device_randomness(tmp, len); down_write(&uts_sem); u = utsname(); memcpy(u->domainname, tmp, len); memset(u->domainname + len, 0, sizeof(u->domainname) - len); errno = 0; uts_proc_notify(UTS_PROC_DOMAINNAME); up_write(&uts_sem); } return errno; } /* make sure you are allowed to change @tsk limits before calling this */ static int do_prlimit(struct task_struct *tsk, unsigned int resource, struct rlimit *new_rlim, struct rlimit *old_rlim) { struct rlimit *rlim; int retval = 0; if (resource >= RLIM_NLIMITS) return -EINVAL; resource = array_index_nospec(resource, RLIM_NLIMITS); if (new_rlim) { if (new_rlim->rlim_cur > new_rlim->rlim_max) return -EINVAL; if (resource == RLIMIT_NOFILE && new_rlim->rlim_max > sysctl_nr_open) return -EPERM; } /* Holding a refcount on tsk protects tsk->signal from disappearing. */ rlim = tsk->signal->rlim + resource; task_lock(tsk->group_leader); if (new_rlim) { /* * Keep the capable check against init_user_ns until cgroups can * contain all limits. */ if (new_rlim->rlim_max > rlim->rlim_max && !capable(CAP_SYS_RESOURCE)) retval = -EPERM; if (!retval) retval = security_task_setrlimit(tsk, resource, new_rlim); } if (!retval) { if (old_rlim) *old_rlim = *rlim; if (new_rlim) *rlim = *new_rlim; } task_unlock(tsk->group_leader); /* * RLIMIT_CPU handling. Arm the posix CPU timer if the limit is not * infinite. In case of RLIM_INFINITY the posix CPU timer code * ignores the rlimit. */ if (!retval && new_rlim && resource == RLIMIT_CPU && new_rlim->rlim_cur != RLIM_INFINITY && IS_ENABLED(CONFIG_POSIX_TIMERS)) { /* * update_rlimit_cpu can fail if the task is exiting, but there * may be other tasks in the thread group that are not exiting, * and they need their cpu timers adjusted. * * The group_leader is the last task to be released, so if we * cannot update_rlimit_cpu on it, then the entire process is * exiting and we do not need to update at all. */ update_rlimit_cpu(tsk->group_leader, new_rlim->rlim_cur); } return retval; } SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim) { struct rlimit value; int ret; ret = do_prlimit(current, resource, NULL, &value); if (!ret) ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0; return ret; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct compat_rlimit __user *, rlim) { struct rlimit r; struct compat_rlimit r32; if (copy_from_user(&r32, rlim, sizeof(struct compat_rlimit))) return -EFAULT; if (r32.rlim_cur == COMPAT_RLIM_INFINITY) r.rlim_cur = RLIM_INFINITY; else r.rlim_cur = r32.rlim_cur; if (r32.rlim_max == COMPAT_RLIM_INFINITY) r.rlim_max = RLIM_INFINITY; else r.rlim_max = r32.rlim_max; return do_prlimit(current, resource, &r, NULL); } COMPAT_SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct compat_rlimit __user *, rlim) { struct rlimit r; int ret; ret = do_prlimit(current, resource, NULL, &r); if (!ret) { struct compat_rlimit r32; if (r.rlim_cur > COMPAT_RLIM_INFINITY) r32.rlim_cur = COMPAT_RLIM_INFINITY; else r32.rlim_cur = r.rlim_cur; if (r.rlim_max > COMPAT_RLIM_INFINITY) r32.rlim_max = COMPAT_RLIM_INFINITY; else r32.rlim_max = r.rlim_max; if (copy_to_user(rlim, &r32, sizeof(struct compat_rlimit))) return -EFAULT; } return ret; } #endif #ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT /* * Back compatibility for getrlimit. Needed for some apps. */ SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, struct rlimit __user *, rlim) { struct rlimit x; if (resource >= RLIM_NLIMITS) return -EINVAL; resource = array_index_nospec(resource, RLIM_NLIMITS); task_lock(current->group_leader); x = current->signal->rlim[resource]; task_unlock(current->group_leader); if (x.rlim_cur > 0x7FFFFFFF) x.rlim_cur = 0x7FFFFFFF; if (x.rlim_max > 0x7FFFFFFF) x.rlim_max = 0x7FFFFFFF; return copy_to_user(rlim, &x, sizeof(x)) ? -EFAULT : 0; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource, struct compat_rlimit __user *, rlim) { struct rlimit r; if (resource >= RLIM_NLIMITS) return -EINVAL; resource = array_index_nospec(resource, RLIM_NLIMITS); task_lock(current->group_leader); r = current->signal->rlim[resource]; task_unlock(current->group_leader); if (r.rlim_cur > 0x7FFFFFFF) r.rlim_cur = 0x7FFFFFFF; if (r.rlim_max > 0x7FFFFFFF) r.rlim_max = 0x7FFFFFFF; if (put_user(r.rlim_cur, &rlim->rlim_cur) || put_user(r.rlim_max, &rlim->rlim_max)) return -EFAULT; return 0; } #endif #endif static inline bool rlim64_is_infinity(__u64 rlim64) { #if BITS_PER_LONG < 64 return rlim64 >= ULONG_MAX; #else return rlim64 == RLIM64_INFINITY; #endif } static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64) { if (rlim->rlim_cur == RLIM_INFINITY) rlim64->rlim_cur = RLIM64_INFINITY; else rlim64->rlim_cur = rlim->rlim_cur; if (rlim->rlim_max == RLIM_INFINITY) rlim64->rlim_max = RLIM64_INFINITY; else rlim64->rlim_max = rlim->rlim_max; } static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim) { if (rlim64_is_infinity(rlim64->rlim_cur)) rlim->rlim_cur = RLIM_INFINITY; else rlim->rlim_cur = (unsigned long)rlim64->rlim_cur; if (rlim64_is_infinity(rlim64->rlim_max)) rlim->rlim_max = RLIM_INFINITY; else rlim->rlim_max = (unsigned long)rlim64->rlim_max; } /* rcu lock must be held */ static int check_prlimit_permission(struct task_struct *task, unsigned int flags) { const struct cred *cred = current_cred(), *tcred; bool id_match; if (current == task) return 0; tcred = __task_cred(task); id_match = (uid_eq(cred->uid, tcred->euid) && uid_eq(cred->uid, tcred->suid) && uid_eq(cred->uid, tcred->uid) && gid_eq(cred->gid, tcred->egid) && gid_eq(cred->gid, tcred->sgid) && gid_eq(cred->gid, tcred->gid)); if (!id_match && !ns_capable(tcred->user_ns, CAP_SYS_RESOURCE)) return -EPERM; return security_task_prlimit(cred, tcred, flags); } SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource, const struct rlimit64 __user *, new_rlim, struct rlimit64 __user *, old_rlim) { struct rlimit64 old64, new64; struct rlimit old, new; struct task_struct *tsk; unsigned int checkflags = 0; int ret; if (old_rlim) checkflags |= LSM_PRLIMIT_READ; if (new_rlim) { if (copy_from_user(&new64, new_rlim, sizeof(new64))) return -EFAULT; rlim64_to_rlim(&new64, &new); checkflags |= LSM_PRLIMIT_WRITE; } rcu_read_lock(); tsk = pid ? find_task_by_vpid(pid) : current; if (!tsk) { rcu_read_unlock(); return -ESRCH; } ret = check_prlimit_permission(tsk, checkflags); if (ret) { rcu_read_unlock(); return ret; } get_task_struct(tsk); rcu_read_unlock(); ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL, old_rlim ? &old : NULL); if (!ret && old_rlim) { rlim_to_rlim64(&old, &old64); if (copy_to_user(old_rlim, &old64, sizeof(old64))) ret = -EFAULT; } put_task_struct(tsk); return ret; } SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim) { struct rlimit new_rlim; if (copy_from_user(&new_rlim, rlim, sizeof(*rlim))) return -EFAULT; return do_prlimit(current, resource, &new_rlim, NULL); } /* * It would make sense to put struct rusage in the task_struct, * except that would make the task_struct be *really big*. After * task_struct gets moved into malloc'ed memory, it would * make sense to do this. It will make moving the rest of the information * a lot simpler! (Which we're not doing right now because we're not * measuring them yet). * * When sampling multiple threads for RUSAGE_SELF, under SMP we might have * races with threads incrementing their own counters. But since word * reads are atomic, we either get new values or old values and we don't * care which for the sums. We always take the siglock to protect reading * the c* fields from p->signal from races with exit.c updating those * fields when reaping, so a sample either gets all the additions of a * given child after it's reaped, or none so this sample is before reaping. * * Locking: * We need to take the siglock for CHILDEREN, SELF and BOTH * for the cases current multithreaded, non-current single threaded * non-current multithreaded. Thread traversal is now safe with * the siglock held. * Strictly speaking, we donot need to take the siglock if we are current and * single threaded, as no one else can take our signal_struct away, no one * else can reap the children to update signal->c* counters, and no one else * can race with the signal-> fields. If we do not take any lock, the * signal-> fields could be read out of order while another thread was just * exiting. So we should place a read memory barrier when we avoid the lock. * On the writer side, write memory barrier is implied in __exit_signal * as __exit_signal releases the siglock spinlock after updating the signal-> * fields. But we don't do this yet to keep things simple. * */ static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r) { r->ru_nvcsw += t->nvcsw; r->ru_nivcsw += t->nivcsw; r->ru_minflt += t->min_flt; r->ru_majflt += t->maj_flt; r->ru_inblock += task_io_get_inblock(t); r->ru_oublock += task_io_get_oublock(t); } void getrusage(struct task_struct *p, int who, struct rusage *r) { struct task_struct *t; unsigned long flags; u64 tgutime, tgstime, utime, stime; unsigned long maxrss; struct mm_struct *mm; struct signal_struct *sig = p->signal; unsigned int seq = 0; retry: memset(r, 0, sizeof(*r)); utime = stime = 0; maxrss = 0; if (who == RUSAGE_THREAD) { task_cputime_adjusted(current, &utime, &stime); accumulate_thread_rusage(p, r); maxrss = sig->maxrss; goto out_thread; } flags = read_seqbegin_or_lock_irqsave(&sig->stats_lock, &seq); switch (who) { case RUSAGE_BOTH: case RUSAGE_CHILDREN: utime = sig->cutime; stime = sig->cstime; r->ru_nvcsw = sig->cnvcsw; r->ru_nivcsw = sig->cnivcsw; r->ru_minflt = sig->cmin_flt; r->ru_majflt = sig->cmaj_flt; r->ru_inblock = sig->cinblock; r->ru_oublock = sig->coublock; maxrss = sig->cmaxrss; if (who == RUSAGE_CHILDREN) break; fallthrough; case RUSAGE_SELF: r->ru_nvcsw += sig->nvcsw; r->ru_nivcsw += sig->nivcsw; r->ru_minflt += sig->min_flt; r->ru_majflt += sig->maj_flt; r->ru_inblock += sig->inblock; r->ru_oublock += sig->oublock; if (maxrss < sig->maxrss) maxrss = sig->maxrss; rcu_read_lock(); __for_each_thread(sig, t) accumulate_thread_rusage(t, r); rcu_read_unlock(); break; default: BUG(); } if (need_seqretry(&sig->stats_lock, seq)) { seq = 1; goto retry; } done_seqretry_irqrestore(&sig->stats_lock, seq, flags); if (who == RUSAGE_CHILDREN) goto out_children; thread_group_cputime_adjusted(p, &tgutime, &tgstime); utime += tgutime; stime += tgstime; out_thread: mm = get_task_mm(p); if (mm) { setmax_mm_hiwater_rss(&maxrss, mm); mmput(mm); } out_children: r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */ r->ru_utime = ns_to_kernel_old_timeval(utime); r->ru_stime = ns_to_kernel_old_timeval(stime); } SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru) { struct rusage r; if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && who != RUSAGE_THREAD) return -EINVAL; getrusage(current, who, &r); return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0; } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(getrusage, int, who, struct compat_rusage __user *, ru) { struct rusage r; if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN && who != RUSAGE_THREAD) return -EINVAL; getrusage(current, who, &r); return put_compat_rusage(&r, ru); } #endif SYSCALL_DEFINE1(umask, int, mask) { mask = xchg(¤t->fs->umask, mask & S_IRWXUGO); return mask; } static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd) { CLASS(fd, exe)(fd); struct inode *inode; int err; if (fd_empty(exe)) return -EBADF; inode = file_inode(fd_file(exe)); /* * Because the original mm->exe_file points to executable file, make * sure that this one is executable as well, to avoid breaking an * overall picture. */ if (!S_ISREG(inode->i_mode) || path_noexec(&fd_file(exe)->f_path)) return -EACCES; err = file_permission(fd_file(exe), MAY_EXEC); if (err) return err; return replace_mm_exe_file(mm, fd_file(exe)); } /* * Check arithmetic relations of passed addresses. * * WARNING: we don't require any capability here so be very careful * in what is allowed for modification from userspace. */ static int validate_prctl_map_addr(struct prctl_mm_map *prctl_map) { unsigned long mmap_max_addr = TASK_SIZE; int error = -EINVAL, i; static const unsigned char offsets[] = { offsetof(struct prctl_mm_map, start_code), offsetof(struct prctl_mm_map, end_code), offsetof(struct prctl_mm_map, start_data), offsetof(struct prctl_mm_map, end_data), offsetof(struct prctl_mm_map, start_brk), offsetof(struct prctl_mm_map, brk), offsetof(struct prctl_mm_map, start_stack), offsetof(struct prctl_mm_map, arg_start), offsetof(struct prctl_mm_map, arg_end), offsetof(struct prctl_mm_map, env_start), offsetof(struct prctl_mm_map, env_end), }; /* * Make sure the members are not somewhere outside * of allowed address space. */ for (i = 0; i < ARRAY_SIZE(offsets); i++) { u64 val = *(u64 *)((char *)prctl_map + offsets[i]); if ((unsigned long)val >= mmap_max_addr || (unsigned long)val < mmap_min_addr) goto out; } /* * Make sure the pairs are ordered. */ #define __prctl_check_order(__m1, __op, __m2) \ ((unsigned long)prctl_map->__m1 __op \ (unsigned long)prctl_map->__m2) ? 0 : -EINVAL error = __prctl_check_order(start_code, <, end_code); error |= __prctl_check_order(start_data,<=, end_data); error |= __prctl_check_order(start_brk, <=, brk); error |= __prctl_check_order(arg_start, <=, arg_end); error |= __prctl_check_order(env_start, <=, env_end); if (error) goto out; #undef __prctl_check_order error = -EINVAL; /* * Neither we should allow to override limits if they set. */ if (check_data_rlimit(rlimit(RLIMIT_DATA), prctl_map->brk, prctl_map->start_brk, prctl_map->end_data, prctl_map->start_data)) goto out; error = 0; out: return error; } #ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_set_mm_map(int opt, const void __user *addr, unsigned long data_size) { struct prctl_mm_map prctl_map = { .exe_fd = (u32)-1, }; unsigned long user_auxv[AT_VECTOR_SIZE]; struct mm_struct *mm = current->mm; int error; BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); BUILD_BUG_ON(sizeof(struct prctl_mm_map) > 256); if (opt == PR_SET_MM_MAP_SIZE) return put_user((unsigned int)sizeof(prctl_map), (unsigned int __user *)addr); if (data_size != sizeof(prctl_map)) return -EINVAL; if (copy_from_user(&prctl_map, addr, sizeof(prctl_map))) return -EFAULT; error = validate_prctl_map_addr(&prctl_map); if (error) return error; if (prctl_map.auxv_size) { /* * Someone is trying to cheat the auxv vector. */ if (!prctl_map.auxv || prctl_map.auxv_size > sizeof(mm->saved_auxv)) return -EINVAL; memset(user_auxv, 0, sizeof(user_auxv)); if (copy_from_user(user_auxv, (const void __user *)prctl_map.auxv, prctl_map.auxv_size)) return -EFAULT; /* Last entry must be AT_NULL as specification requires */ user_auxv[AT_VECTOR_SIZE - 2] = AT_NULL; user_auxv[AT_VECTOR_SIZE - 1] = AT_NULL; } if (prctl_map.exe_fd != (u32)-1) { /* * Check if the current user is checkpoint/restore capable. * At the time of this writing, it checks for CAP_SYS_ADMIN * or CAP_CHECKPOINT_RESTORE. * Note that a user with access to ptrace can masquerade an * arbitrary program as any executable, even setuid ones. * This may have implications in the tomoyo subsystem. */ if (!checkpoint_restore_ns_capable(current_user_ns())) return -EPERM; error = prctl_set_mm_exe_file(mm, prctl_map.exe_fd); if (error) return error; } /* * arg_lock protects concurrent updates but we still need mmap_lock for * read to exclude races with sys_brk. */ mmap_read_lock(mm); /* * We don't validate if these members are pointing to * real present VMAs because application may have correspond * VMAs already unmapped and kernel uses these members for statistics * output in procfs mostly, except * * - @start_brk/@brk which are used in do_brk_flags but kernel lookups * for VMAs when updating these members so anything wrong written * here cause kernel to swear at userspace program but won't lead * to any problem in kernel itself */ spin_lock(&mm->arg_lock); mm->start_code = prctl_map.start_code; mm->end_code = prctl_map.end_code; mm->start_data = prctl_map.start_data; mm->end_data = prctl_map.end_data; mm->start_brk = prctl_map.start_brk; mm->brk = prctl_map.brk; mm->start_stack = prctl_map.start_stack; mm->arg_start = prctl_map.arg_start; mm->arg_end = prctl_map.arg_end; mm->env_start = prctl_map.env_start; mm->env_end = prctl_map.env_end; spin_unlock(&mm->arg_lock); /* * Note this update of @saved_auxv is lockless thus * if someone reads this member in procfs while we're * updating -- it may get partly updated results. It's * known and acceptable trade off: we leave it as is to * not introduce additional locks here making the kernel * more complex. */ if (prctl_map.auxv_size) memcpy(mm->saved_auxv, user_auxv, sizeof(user_auxv)); mmap_read_unlock(mm); return 0; } #endif /* CONFIG_CHECKPOINT_RESTORE */ static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr, unsigned long len) { /* * This doesn't move the auxiliary vector itself since it's pinned to * mm_struct, but it permits filling the vector with new values. It's * up to the caller to provide sane values here, otherwise userspace * tools which use this vector might be unhappy. */ unsigned long user_auxv[AT_VECTOR_SIZE] = {}; if (len > sizeof(user_auxv)) return -EINVAL; if (copy_from_user(user_auxv, (const void __user *)addr, len)) return -EFAULT; /* Make sure the last entry is always AT_NULL */ user_auxv[AT_VECTOR_SIZE - 2] = 0; user_auxv[AT_VECTOR_SIZE - 1] = 0; BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv)); task_lock(current); memcpy(mm->saved_auxv, user_auxv, len); task_unlock(current); return 0; } static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) { struct mm_struct *mm = current->mm; struct prctl_mm_map prctl_map = { .auxv = NULL, .auxv_size = 0, .exe_fd = -1, }; struct vm_area_struct *vma; int error; if (arg5 || (arg4 && (opt != PR_SET_MM_AUXV && opt != PR_SET_MM_MAP && opt != PR_SET_MM_MAP_SIZE))) return -EINVAL; #ifdef CONFIG_CHECKPOINT_RESTORE if (opt == PR_SET_MM_MAP || opt == PR_SET_MM_MAP_SIZE) return prctl_set_mm_map(opt, (const void __user *)addr, arg4); #endif if (!capable(CAP_SYS_RESOURCE)) return -EPERM; if (opt == PR_SET_MM_EXE_FILE) return prctl_set_mm_exe_file(mm, (unsigned int)addr); if (opt == PR_SET_MM_AUXV) return prctl_set_auxv(mm, addr, arg4); if (addr >= TASK_SIZE || addr < mmap_min_addr) return -EINVAL; error = -EINVAL; /* * arg_lock protects concurrent updates of arg boundaries, we need * mmap_lock for a) concurrent sys_brk, b) finding VMA for addr * validation. */ mmap_read_lock(mm); vma = find_vma(mm, addr); spin_lock(&mm->arg_lock); prctl_map.start_code = mm->start_code; prctl_map.end_code = mm->end_code; prctl_map.start_data = mm->start_data; prctl_map.end_data = mm->end_data; prctl_map.start_brk = mm->start_brk; prctl_map.brk = mm->brk; prctl_map.start_stack = mm->start_stack; prctl_map.arg_start = mm->arg_start; prctl_map.arg_end = mm->arg_end; prctl_map.env_start = mm->env_start; prctl_map.env_end = mm->env_end; switch (opt) { case PR_SET_MM_START_CODE: prctl_map.start_code = addr; break; case PR_SET_MM_END_CODE: prctl_map.end_code = addr; break; case PR_SET_MM_START_DATA: prctl_map.start_data = addr; break; case PR_SET_MM_END_DATA: prctl_map.end_data = addr; break; case PR_SET_MM_START_STACK: prctl_map.start_stack = addr; break; case PR_SET_MM_START_BRK: prctl_map.start_brk = addr; break; case PR_SET_MM_BRK: prctl_map.brk = addr; break; case PR_SET_MM_ARG_START: prctl_map.arg_start = addr; break; case PR_SET_MM_ARG_END: prctl_map.arg_end = addr; break; case PR_SET_MM_ENV_START: prctl_map.env_start = addr; break; case PR_SET_MM_ENV_END: prctl_map.env_end = addr; break; default: goto out; } error = validate_prctl_map_addr(&prctl_map); if (error) goto out; switch (opt) { /* * If command line arguments and environment * are placed somewhere else on stack, we can * set them up here, ARG_START/END to setup * command line arguments and ENV_START/END * for environment. */ case PR_SET_MM_START_STACK: case PR_SET_MM_ARG_START: case PR_SET_MM_ARG_END: case PR_SET_MM_ENV_START: case PR_SET_MM_ENV_END: if (!vma) { error = -EFAULT; goto out; } } mm->start_code = prctl_map.start_code; mm->end_code = prctl_map.end_code; mm->start_data = prctl_map.start_data; mm->end_data = prctl_map.end_data; mm->start_brk = prctl_map.start_brk; mm->brk = prctl_map.brk; mm->start_stack = prctl_map.start_stack; mm->arg_start = prctl_map.arg_start; mm->arg_end = prctl_map.arg_end; mm->env_start = prctl_map.env_start; mm->env_end = prctl_map.env_end; error = 0; out: spin_unlock(&mm->arg_lock); mmap_read_unlock(mm); return error; } #ifdef CONFIG_CHECKPOINT_RESTORE static int prctl_get_tid_address(struct task_struct *me, int __user * __user *tid_addr) { return put_user(me->clear_child_tid, tid_addr); } #else static int prctl_get_tid_address(struct task_struct *me, int __user * __user *tid_addr) { return -EINVAL; } #endif static int propagate_has_child_subreaper(struct task_struct *p, void *data) { /* * If task has has_child_subreaper - all its descendants * already have these flag too and new descendants will * inherit it on fork, skip them. * * If we've found child_reaper - skip descendants in * it's subtree as they will never get out pidns. */ if (p->signal->has_child_subreaper || is_child_reaper(task_pid(p))) return 0; p->signal->has_child_subreaper = 1; return 1; } int __weak arch_prctl_spec_ctrl_get(struct task_struct *t, unsigned long which) { return -EINVAL; } int __weak arch_prctl_spec_ctrl_set(struct task_struct *t, unsigned long which, unsigned long ctrl) { return -EINVAL; } int __weak arch_get_shadow_stack_status(struct task_struct *t, unsigned long __user *status) { return -EINVAL; } int __weak arch_set_shadow_stack_status(struct task_struct *t, unsigned long status) { return -EINVAL; } int __weak arch_lock_shadow_stack_status(struct task_struct *t, unsigned long status) { return -EINVAL; } #define PR_IO_FLUSHER (PF_MEMALLOC_NOIO | PF_LOCAL_THROTTLE) #ifdef CONFIG_ANON_VMA_NAME #define ANON_VMA_NAME_MAX_LEN 80 #define ANON_VMA_NAME_INVALID_CHARS "\\`$[]" static inline bool is_valid_name_char(char ch) { /* printable ascii characters, excluding ANON_VMA_NAME_INVALID_CHARS */ return ch > 0x1f && ch < 0x7f && !strchr(ANON_VMA_NAME_INVALID_CHARS, ch); } static int prctl_set_vma(unsigned long opt, unsigned long addr, unsigned long size, unsigned long arg) { struct mm_struct *mm = current->mm; const char __user *uname; struct anon_vma_name *anon_name = NULL; int error; switch (opt) { case PR_SET_VMA_ANON_NAME: uname = (const char __user *)arg; if (uname) { char *name, *pch; name = strndup_user(uname, ANON_VMA_NAME_MAX_LEN); if (IS_ERR(name)) return PTR_ERR(name); for (pch = name; *pch != '\0'; pch++) { if (!is_valid_name_char(*pch)) { kfree(name); return -EINVAL; } } /* anon_vma has its own copy */ anon_name = anon_vma_name_alloc(name); kfree(name); if (!anon_name) return -ENOMEM; } mmap_write_lock(mm); error = madvise_set_anon_name(mm, addr, size, anon_name); mmap_write_unlock(mm); anon_vma_name_put(anon_name); break; default: error = -EINVAL; } return error; } #else /* CONFIG_ANON_VMA_NAME */ static int prctl_set_vma(unsigned long opt, unsigned long start, unsigned long size, unsigned long arg) { return -EINVAL; } #endif /* CONFIG_ANON_VMA_NAME */ static inline unsigned long get_current_mdwe(void) { unsigned long ret = 0; if (test_bit(MMF_HAS_MDWE, ¤t->mm->flags)) ret |= PR_MDWE_REFUSE_EXEC_GAIN; if (test_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags)) ret |= PR_MDWE_NO_INHERIT; return ret; } static inline int prctl_set_mdwe(unsigned long bits, unsigned long arg3, unsigned long arg4, unsigned long arg5) { unsigned long current_bits; if (arg3 || arg4 || arg5) return -EINVAL; if (bits & ~(PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT)) return -EINVAL; /* NO_INHERIT only makes sense with REFUSE_EXEC_GAIN */ if (bits & PR_MDWE_NO_INHERIT && !(bits & PR_MDWE_REFUSE_EXEC_GAIN)) return -EINVAL; /* * EOPNOTSUPP might be more appropriate here in principle, but * existing userspace depends on EINVAL specifically. */ if (!arch_memory_deny_write_exec_supported()) return -EINVAL; current_bits = get_current_mdwe(); if (current_bits && current_bits != bits) return -EPERM; /* Cannot unset the flags */ if (bits & PR_MDWE_NO_INHERIT) set_bit(MMF_HAS_MDWE_NO_INHERIT, ¤t->mm->flags); if (bits & PR_MDWE_REFUSE_EXEC_GAIN) set_bit(MMF_HAS_MDWE, ¤t->mm->flags); return 0; } static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { if (arg2 || arg3 || arg4 || arg5) return -EINVAL; return get_current_mdwe(); } static int prctl_get_auxv(void __user *addr, unsigned long len) { struct mm_struct *mm = current->mm; unsigned long size = min_t(unsigned long, sizeof(mm->saved_auxv), len); if (size && copy_to_user(addr, mm->saved_auxv, size)) return -EFAULT; return sizeof(mm->saved_auxv); } SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { struct task_struct *me = current; unsigned char comm[sizeof(me->comm)]; long error; error = security_task_prctl(option, arg2, arg3, arg4, arg5); if (error != -ENOSYS) return error; error = 0; switch (option) { case PR_SET_PDEATHSIG: if (!valid_signal(arg2)) { error = -EINVAL; break; } me->pdeath_signal = arg2; break; case PR_GET_PDEATHSIG: error = put_user(me->pdeath_signal, (int __user *)arg2); break; case PR_GET_DUMPABLE: error = get_dumpable(me->mm); break; case PR_SET_DUMPABLE: if (arg2 != SUID_DUMP_DISABLE && arg2 != SUID_DUMP_USER) { error = -EINVAL; break; } set_dumpable(me->mm, arg2); break; case PR_SET_UNALIGN: error = SET_UNALIGN_CTL(me, arg2); break; case PR_GET_UNALIGN: error = GET_UNALIGN_CTL(me, arg2); break; case PR_SET_FPEMU: error = SET_FPEMU_CTL(me, arg2); break; case PR_GET_FPEMU: error = GET_FPEMU_CTL(me, arg2); break; case PR_SET_FPEXC: error = SET_FPEXC_CTL(me, arg2); break; case PR_GET_FPEXC: error = GET_FPEXC_CTL(me, arg2); break; case PR_GET_TIMING: error = PR_TIMING_STATISTICAL; break; case PR_SET_TIMING: if (arg2 != PR_TIMING_STATISTICAL) error = -EINVAL; break; case PR_SET_NAME: comm[sizeof(me->comm) - 1] = 0; if (strncpy_from_user(comm, (char __user *)arg2, sizeof(me->comm) - 1) < 0) return -EFAULT; set_task_comm(me, comm); proc_comm_connector(me); break; case PR_GET_NAME: get_task_comm(comm, me); if (copy_to_user((char __user *)arg2, comm, sizeof(comm))) return -EFAULT; break; case PR_GET_ENDIAN: error = GET_ENDIAN(me, arg2); break; case PR_SET_ENDIAN: error = SET_ENDIAN(me, arg2); break; case PR_GET_SECCOMP: error = prctl_get_seccomp(); break; case PR_SET_SECCOMP: error = prctl_set_seccomp(arg2, (char __user *)arg3); break; case PR_GET_TSC: error = GET_TSC_CTL(arg2); break; case PR_SET_TSC: error = SET_TSC_CTL(arg2); break; case PR_TASK_PERF_EVENTS_DISABLE: error = perf_event_task_disable(); break; case PR_TASK_PERF_EVENTS_ENABLE: error = perf_event_task_enable(); break; case PR_GET_TIMERSLACK: if (current->timer_slack_ns > ULONG_MAX) error = ULONG_MAX; else error = current->timer_slack_ns; break; case PR_SET_TIMERSLACK: if (rt_or_dl_task_policy(current)) break; if (arg2 <= 0) current->timer_slack_ns = current->default_timer_slack_ns; else current->timer_slack_ns = arg2; break; case PR_MCE_KILL: if (arg4 | arg5) return -EINVAL; switch (arg2) { case PR_MCE_KILL_CLEAR: if (arg3 != 0) return -EINVAL; current->flags &= ~PF_MCE_PROCESS; break; case PR_MCE_KILL_SET: current->flags |= PF_MCE_PROCESS; if (arg3 == PR_MCE_KILL_EARLY) current->flags |= PF_MCE_EARLY; else if (arg3 == PR_MCE_KILL_LATE) current->flags &= ~PF_MCE_EARLY; else if (arg3 == PR_MCE_KILL_DEFAULT) current->flags &= ~(PF_MCE_EARLY|PF_MCE_PROCESS); else return -EINVAL; break; default: return -EINVAL; } break; case PR_MCE_KILL_GET: if (arg2 | arg3 | arg4 | arg5) return -EINVAL; if (current->flags & PF_MCE_PROCESS) error = (current->flags & PF_MCE_EARLY) ? PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE; else error = PR_MCE_KILL_DEFAULT; break; case PR_SET_MM: error = prctl_set_mm(arg2, arg3, arg4, arg5); break; case PR_GET_TID_ADDRESS: error = prctl_get_tid_address(me, (int __user * __user *)arg2); break; case PR_SET_CHILD_SUBREAPER: me->signal->is_child_subreaper = !!arg2; if (!arg2) break; walk_process_tree(me, propagate_has_child_subreaper, NULL); break; case PR_GET_CHILD_SUBREAPER: error = put_user(me->signal->is_child_subreaper, (int __user *)arg2); break; case PR_SET_NO_NEW_PRIVS: if (arg2 != 1 || arg3 || arg4 || arg5) return -EINVAL; task_set_no_new_privs(current); break; case PR_GET_NO_NEW_PRIVS: if (arg2 || arg3 || arg4 || arg5) return -EINVAL; return task_no_new_privs(current) ? 1 : 0; case PR_GET_THP_DISABLE: if (arg2 || arg3 || arg4 || arg5) return -EINVAL; error = !!test_bit(MMF_DISABLE_THP, &me->mm->flags); break; case PR_SET_THP_DISABLE: if (arg3 || arg4 || arg5) return -EINVAL; if (mmap_write_lock_killable(me->mm)) return -EINTR; if (arg2) set_bit(MMF_DISABLE_THP, &me->mm->flags); else clear_bit(MMF_DISABLE_THP, &me->mm->flags); mmap_write_unlock(me->mm); break; case PR_MPX_ENABLE_MANAGEMENT: case PR_MPX_DISABLE_MANAGEMENT: /* No longer implemented: */ return -EINVAL; case PR_SET_FP_MODE: error = SET_FP_MODE(me, arg2); break; case PR_GET_FP_MODE: error = GET_FP_MODE(me); break; case PR_SVE_SET_VL: error = SVE_SET_VL(arg2); break; case PR_SVE_GET_VL: error = SVE_GET_VL(); break; case PR_SME_SET_VL: error = SME_SET_VL(arg2); break; case PR_SME_GET_VL: error = SME_GET_VL(); break; case PR_GET_SPECULATION_CTRL: if (arg3 || arg4 || arg5) return -EINVAL; error = arch_prctl_spec_ctrl_get(me, arg2); break; case PR_SET_SPECULATION_CTRL: if (arg4 || arg5) return -EINVAL; error = arch_prctl_spec_ctrl_set(me, arg2, arg3); break; case PR_PAC_RESET_KEYS: if (arg3 || arg4 || arg5) return -EINVAL; error = PAC_RESET_KEYS(me, arg2); break; case PR_PAC_SET_ENABLED_KEYS: if (arg4 || arg5) return -EINVAL; error = PAC_SET_ENABLED_KEYS(me, arg2, arg3); break; case PR_PAC_GET_ENABLED_KEYS: if (arg2 || arg3 || arg4 || arg5) return -EINVAL; error = PAC_GET_ENABLED_KEYS(me); break; case PR_SET_TAGGED_ADDR_CTRL: if (arg3 || arg4 || arg5) return -EINVAL; error = SET_TAGGED_ADDR_CTRL(arg2); break; case PR_GET_TAGGED_ADDR_CTRL: if (arg2 || arg3 || arg4 || arg5) return -EINVAL; error = GET_TAGGED_ADDR_CTRL(); break; case PR_SET_IO_FLUSHER: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; if (arg3 || arg4 || arg5) return -EINVAL; if (arg2 == 1) current->flags |= PR_IO_FLUSHER; else if (!arg2) current->flags &= ~PR_IO_FLUSHER; else return -EINVAL; break; case PR_GET_IO_FLUSHER: if (!capable(CAP_SYS_RESOURCE)) return -EPERM; if (arg2 || arg3 || arg4 || arg5) return -EINVAL; error = (current->flags & PR_IO_FLUSHER) == PR_IO_FLUSHER; break; case PR_SET_SYSCALL_USER_DISPATCH: error = set_syscall_user_dispatch(arg2, arg3, arg4, (char __user *) arg5); break; #ifdef CONFIG_SCHED_CORE case PR_SCHED_CORE: error = sched_core_share_pid(arg2, arg3, arg4, arg5); break; #endif case PR_SET_MDWE: error = prctl_set_mdwe(arg2, arg3, arg4, arg5); break; case PR_GET_MDWE: error = prctl_get_mdwe(arg2, arg3, arg4, arg5); break; case PR_PPC_GET_DEXCR: if (arg3 || arg4 || arg5) return -EINVAL; error = PPC_GET_DEXCR_ASPECT(me, arg2); break; case PR_PPC_SET_DEXCR: if (arg4 || arg5) return -EINVAL; error = PPC_SET_DEXCR_ASPECT(me, arg2, arg3); break; case PR_SET_VMA: error = prctl_set_vma(arg2, arg3, arg4, arg5); break; case PR_GET_AUXV: if (arg4 || arg5) return -EINVAL; error = prctl_get_auxv((void __user *)arg2, arg3); break; #ifdef CONFIG_KSM case PR_SET_MEMORY_MERGE: if (arg3 || arg4 || arg5) return -EINVAL; if (mmap_write_lock_killable(me->mm)) return -EINTR; if (arg2) error = ksm_enable_merge_any(me->mm); else error = ksm_disable_merge_any(me->mm); mmap_write_unlock(me->mm); break; case PR_GET_MEMORY_MERGE: if (arg2 || arg3 || arg4 || arg5) return -EINVAL; error = !!test_bit(MMF_VM_MERGE_ANY, &me->mm->flags); break; #endif case PR_RISCV_V_SET_CONTROL: error = RISCV_V_SET_CONTROL(arg2); break; case PR_RISCV_V_GET_CONTROL: error = RISCV_V_GET_CONTROL(); break; case PR_RISCV_SET_ICACHE_FLUSH_CTX: error = RISCV_SET_ICACHE_FLUSH_CTX(arg2, arg3); break; case PR_GET_SHADOW_STACK_STATUS: if (arg3 || arg4 || arg5) return -EINVAL; error = arch_get_shadow_stack_status(me, (unsigned long __user *) arg2); break; case PR_SET_SHADOW_STACK_STATUS: if (arg3 || arg4 || arg5) return -EINVAL; error = arch_set_shadow_stack_status(me, arg2); break; case PR_LOCK_SHADOW_STACK_STATUS: if (arg3 || arg4 || arg5) return -EINVAL; error = arch_lock_shadow_stack_status(me, arg2); break; default: error = -EINVAL; break; } return error; } SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep, struct getcpu_cache __user *, unused) { int err = 0; int cpu = raw_smp_processor_id(); if (cpup) err |= put_user(cpu, cpup); if (nodep) err |= put_user(cpu_to_node(cpu), nodep); return err ? -EFAULT : 0; } /** * do_sysinfo - fill in sysinfo struct * @info: pointer to buffer to fill */ static int do_sysinfo(struct sysinfo *info) { unsigned long mem_total, sav_total; unsigned int mem_unit, bitcount; struct timespec64 tp; memset(info, 0, sizeof(struct sysinfo)); ktime_get_boottime_ts64(&tp); timens_add_boottime(&tp); info->uptime = tp.tv_sec + (tp.tv_nsec ? 1 : 0); get_avenrun(info->loads, 0, SI_LOAD_SHIFT - FSHIFT); info->procs = nr_threads; si_meminfo(info); si_swapinfo(info); /* * If the sum of all the available memory (i.e. ram + swap) * is less than can be stored in a 32 bit unsigned long then * we can be binary compatible with 2.2.x kernels. If not, * well, in that case 2.2.x was broken anyways... * * -Erik Andersen <andersee@debian.org> */ mem_total = info->totalram + info->totalswap; if (mem_total < info->totalram || mem_total < info->totalswap) goto out; bitcount = 0; mem_unit = info->mem_unit; while (mem_unit > 1) { bitcount++; mem_unit >>= 1; sav_total = mem_total; mem_total <<= 1; if (mem_total < sav_total) goto out; } /* * If mem_total did not overflow, multiply all memory values by * info->mem_unit and set it to 1. This leaves things compatible * with 2.2.x, and also retains compatibility with earlier 2.4.x * kernels... */ info->mem_unit = 1; info->totalram <<= bitcount; info->freeram <<= bitcount; info->sharedram <<= bitcount; info->bufferram <<= bitcount; info->totalswap <<= bitcount; info->freeswap <<= bitcount; info->totalhigh <<= bitcount; info->freehigh <<= bitcount; out: return 0; } SYSCALL_DEFINE1(sysinfo, struct sysinfo __user *, info) { struct sysinfo val; do_sysinfo(&val); if (copy_to_user(info, &val, sizeof(struct sysinfo))) return -EFAULT; return 0; } #ifdef CONFIG_COMPAT struct compat_sysinfo { s32 uptime; u32 loads[3]; u32 totalram; u32 freeram; u32 sharedram; u32 bufferram; u32 totalswap; u32 freeswap; u16 procs; u16 pad; u32 totalhigh; u32 freehigh; u32 mem_unit; char _f[20-2*sizeof(u32)-sizeof(int)]; }; COMPAT_SYSCALL_DEFINE1(sysinfo, struct compat_sysinfo __user *, info) { struct sysinfo s; struct compat_sysinfo s_32; do_sysinfo(&s); /* Check to see if any memory value is too large for 32-bit and scale * down if needed */ if (upper_32_bits(s.totalram) || upper_32_bits(s.totalswap)) { int bitcount = 0; while (s.mem_unit < PAGE_SIZE) { s.mem_unit <<= 1; bitcount++; } s.totalram >>= bitcount; s.freeram >>= bitcount; s.sharedram >>= bitcount; s.bufferram >>= bitcount; s.totalswap >>= bitcount; s.freeswap >>= bitcount; s.totalhigh >>= bitcount; s.freehigh >>= bitcount; } memset(&s_32, 0, sizeof(s_32)); s_32.uptime = s.uptime; s_32.loads[0] = s.loads[0]; s_32.loads[1] = s.loads[1]; s_32.loads[2] = s.loads[2]; s_32.totalram = s.totalram; s_32.freeram = s.freeram; s_32.sharedram = s.sharedram; s_32.bufferram = s.bufferram; s_32.totalswap = s.totalswap; s_32.freeswap = s.freeswap; s_32.procs = s.procs; s_32.totalhigh = s.totalhigh; s_32.freehigh = s.freehigh; s_32.mem_unit = s.mem_unit; if (copy_to_user(info, &s_32, sizeof(s_32))) return -EFAULT; return 0; } #endif /* CONFIG_COMPAT */ |
1 68 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | /* SPDX-License-Identifier: GPL-2.0-only */ /* * VMware VMCI driver (vmciContext.h) * * Copyright (C) 2012 VMware, Inc. All rights reserved. */ #ifndef _VMCI_CONTEXT_H_ #define _VMCI_CONTEXT_H_ #include <linux/vmw_vmci_defs.h> #include <linux/atomic.h> #include <linux/kref.h> #include <linux/types.h> #include <linux/wait.h> #include "vmci_handle_array.h" #include "vmci_datagram.h" /* Used to determine what checkpoint state to get and set. */ enum { VMCI_NOTIFICATION_CPT_STATE = 1, VMCI_WELLKNOWN_CPT_STATE = 2, VMCI_DG_OUT_STATE = 3, VMCI_DG_IN_STATE = 4, VMCI_DG_IN_SIZE_STATE = 5, VMCI_DOORBELL_CPT_STATE = 6, }; /* Host specific struct used for signalling */ struct vmci_host { wait_queue_head_t wait_queue; }; struct vmci_handle_list { struct list_head node; struct vmci_handle handle; }; struct vmci_ctx { struct list_head list_item; /* For global VMCI list. */ u32 cid; struct kref kref; struct list_head datagram_queue; /* Head of per VM queue. */ u32 pending_datagrams; size_t datagram_queue_size; /* Size of datagram queue in bytes. */ /* * Version of the code that created * this context; e.g., VMX. */ int user_version; spinlock_t lock; /* Locks callQueue and handle_arrays. */ /* * queue_pairs attached to. The array of * handles for queue pairs is accessed * from the code for QP API, and there * it is protected by the QP lock. It * is also accessed from the context * clean up path, which does not * require a lock. VMCILock is not * used to protect the QP array field. */ struct vmci_handle_arr *queue_pair_array; /* Doorbells created by context. */ struct vmci_handle_arr *doorbell_array; /* Doorbells pending for context. */ struct vmci_handle_arr *pending_doorbell_array; /* Contexts current context is subscribing to. */ struct list_head notifier_list; unsigned int n_notifiers; struct vmci_host host_context; u32 priv_flags; const struct cred *cred; bool *notify; /* Notify flag pointer - hosted only. */ struct page *notify_page; /* Page backing the notify UVA. */ }; /* VMCINotifyAddRemoveInfo: Used to add/remove remote context notifications. */ struct vmci_ctx_info { u32 remote_cid; int result; }; /* VMCICptBufInfo: Used to set/get current context's checkpoint state. */ struct vmci_ctx_chkpt_buf_info { u64 cpt_buf; u32 cpt_type; u32 buf_size; s32 result; u32 _pad; }; /* * VMCINotificationReceiveInfo: Used to recieve pending notifications * for doorbells and queue pairs. */ struct vmci_ctx_notify_recv_info { u64 db_handle_buf_uva; u64 db_handle_buf_size; u64 qp_handle_buf_uva; u64 qp_handle_buf_size; s32 result; u32 _pad; }; /* * Utilility function that checks whether two entities are allowed * to interact. If one of them is restricted, the other one must * be trusted. */ static inline bool vmci_deny_interaction(u32 part_one, u32 part_two) { return ((part_one & VMCI_PRIVILEGE_FLAG_RESTRICTED) && !(part_two & VMCI_PRIVILEGE_FLAG_TRUSTED)) || ((part_two & VMCI_PRIVILEGE_FLAG_RESTRICTED) && !(part_one & VMCI_PRIVILEGE_FLAG_TRUSTED)); } struct vmci_ctx *vmci_ctx_create(u32 cid, u32 flags, uintptr_t event_hnd, int version, const struct cred *cred); void vmci_ctx_destroy(struct vmci_ctx *context); bool vmci_ctx_supports_host_qp(struct vmci_ctx *context); int vmci_ctx_enqueue_datagram(u32 cid, struct vmci_datagram *dg); int vmci_ctx_dequeue_datagram(struct vmci_ctx *context, size_t *max_size, struct vmci_datagram **dg); int vmci_ctx_pending_datagrams(u32 cid, u32 *pending); struct vmci_ctx *vmci_ctx_get(u32 cid); void vmci_ctx_put(struct vmci_ctx *context); bool vmci_ctx_exists(u32 cid); int vmci_ctx_add_notification(u32 context_id, u32 remote_cid); int vmci_ctx_remove_notification(u32 context_id, u32 remote_cid); int vmci_ctx_get_chkpt_state(u32 context_id, u32 cpt_type, u32 *num_cids, void **cpt_buf_ptr); int vmci_ctx_set_chkpt_state(u32 context_id, u32 cpt_type, u32 num_cids, void *cpt_buf); int vmci_ctx_qp_create(struct vmci_ctx *context, struct vmci_handle handle); int vmci_ctx_qp_destroy(struct vmci_ctx *context, struct vmci_handle handle); bool vmci_ctx_qp_exists(struct vmci_ctx *context, struct vmci_handle handle); void vmci_ctx_check_signal_notify(struct vmci_ctx *context); void vmci_ctx_unset_notify(struct vmci_ctx *context); int vmci_ctx_dbell_create(u32 context_id, struct vmci_handle handle); int vmci_ctx_dbell_destroy(u32 context_id, struct vmci_handle handle); int vmci_ctx_dbell_destroy_all(u32 context_id); int vmci_ctx_notify_dbell(u32 cid, struct vmci_handle handle, u32 src_priv_flags); int vmci_ctx_rcv_notifications_get(u32 context_id, struct vmci_handle_arr **db_handle_array, struct vmci_handle_arr **qp_handle_array); void vmci_ctx_rcv_notifications_release(u32 context_id, struct vmci_handle_arr *db_handle_array, struct vmci_handle_arr *qp_handle_array, bool success); static inline u32 vmci_ctx_get_id(struct vmci_ctx *context) { if (!context) return VMCI_INVALID_ID; return context->cid; } #endif /* _VMCI_CONTEXT_H_ */ |
1327 1327 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/etherdevice.h> #include "ipvlan.h" #include <linux/if_vlan.h> #include <linux/if_tap.h> #include <linux/interrupt.h> #include <linux/nsproxy.h> #include <linux/compat.h> #include <linux/if_tun.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/cache.h> #include <linux/sched.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/wait.h> #include <linux/cdev.h> #include <linux/idr.h> #include <linux/fs.h> #include <linux/uio.h> #include <net/net_namespace.h> #include <net/rtnetlink.h> #include <net/sock.h> #include <linux/virtio_net.h> #define TUN_OFFLOADS (NETIF_F_HW_CSUM | NETIF_F_TSO_ECN | NETIF_F_TSO | \ NETIF_F_TSO6) static dev_t ipvtap_major; static struct cdev ipvtap_cdev; static const void *ipvtap_net_namespace(const struct device *d) { const struct net_device *dev = to_net_dev(d->parent); return dev_net(dev); } static struct class ipvtap_class = { .name = "ipvtap", .ns_type = &net_ns_type_operations, .namespace = ipvtap_net_namespace, }; struct ipvtap_dev { struct ipvl_dev vlan; struct tap_dev tap; }; static void ipvtap_count_tx_dropped(struct tap_dev *tap) { struct ipvtap_dev *vlantap = container_of(tap, struct ipvtap_dev, tap); struct ipvl_dev *vlan = &vlantap->vlan; this_cpu_inc(vlan->pcpu_stats->tx_drps); } static void ipvtap_count_rx_dropped(struct tap_dev *tap) { struct ipvtap_dev *vlantap = container_of(tap, struct ipvtap_dev, tap); struct ipvl_dev *vlan = &vlantap->vlan; ipvlan_count_rx(vlan, 0, 0, 0); } static void ipvtap_update_features(struct tap_dev *tap, netdev_features_t features) { struct ipvtap_dev *vlantap = container_of(tap, struct ipvtap_dev, tap); struct ipvl_dev *vlan = &vlantap->vlan; vlan->sfeatures = features; netdev_update_features(vlan->dev); } static int ipvtap_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct ipvtap_dev *vlantap = netdev_priv(dev); int err; INIT_LIST_HEAD(&vlantap->tap.queue_list); /* Since macvlan supports all offloads by default, make * tap support all offloads also. */ vlantap->tap.tap_features = TUN_OFFLOADS; vlantap->tap.count_tx_dropped = ipvtap_count_tx_dropped; vlantap->tap.update_features = ipvtap_update_features; vlantap->tap.count_rx_dropped = ipvtap_count_rx_dropped; err = netdev_rx_handler_register(dev, tap_handle_frame, &vlantap->tap); if (err) return err; /* Don't put anything that may fail after macvlan_common_newlink * because we can't undo what it does. */ err = ipvlan_link_new(src_net, dev, tb, data, extack); if (err) { netdev_rx_handler_unregister(dev); return err; } vlantap->tap.dev = vlantap->vlan.dev; return err; } static void ipvtap_dellink(struct net_device *dev, struct list_head *head) { struct ipvtap_dev *vlan = netdev_priv(dev); netdev_rx_handler_unregister(dev); tap_del_queues(&vlan->tap); ipvlan_link_delete(dev, head); } static void ipvtap_setup(struct net_device *dev) { ipvlan_link_setup(dev); dev->tx_queue_len = TUN_READQ_SIZE; dev->priv_flags &= ~IFF_NO_QUEUE; } static struct rtnl_link_ops ipvtap_link_ops __read_mostly = { .kind = "ipvtap", .setup = ipvtap_setup, .newlink = ipvtap_newlink, .dellink = ipvtap_dellink, .priv_size = sizeof(struct ipvtap_dev), }; static int ipvtap_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct ipvtap_dev *vlantap; struct device *classdev; dev_t devt; int err; char tap_name[IFNAMSIZ]; if (dev->rtnl_link_ops != &ipvtap_link_ops) return NOTIFY_DONE; snprintf(tap_name, IFNAMSIZ, "tap%d", dev->ifindex); vlantap = netdev_priv(dev); switch (event) { case NETDEV_REGISTER: /* Create the device node here after the network device has * been registered but before register_netdevice has * finished running. */ err = tap_get_minor(ipvtap_major, &vlantap->tap); if (err) return notifier_from_errno(err); devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor); classdev = device_create(&ipvtap_class, &dev->dev, devt, dev, "%s", tap_name); if (IS_ERR(classdev)) { tap_free_minor(ipvtap_major, &vlantap->tap); return notifier_from_errno(PTR_ERR(classdev)); } err = sysfs_create_link(&dev->dev.kobj, &classdev->kobj, tap_name); if (err) return notifier_from_errno(err); break; case NETDEV_UNREGISTER: /* vlan->minor == 0 if NETDEV_REGISTER above failed */ if (vlantap->tap.minor == 0) break; sysfs_remove_link(&dev->dev.kobj, tap_name); devt = MKDEV(MAJOR(ipvtap_major), vlantap->tap.minor); device_destroy(&ipvtap_class, devt); tap_free_minor(ipvtap_major, &vlantap->tap); break; case NETDEV_CHANGE_TX_QUEUE_LEN: if (tap_queue_resize(&vlantap->tap)) return NOTIFY_BAD; break; } return NOTIFY_DONE; } static struct notifier_block ipvtap_notifier_block __read_mostly = { .notifier_call = ipvtap_device_event, }; static int __init ipvtap_init(void) { int err; err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap", THIS_MODULE); if (err) goto out1; err = class_register(&ipvtap_class); if (err) goto out2; err = register_netdevice_notifier(&ipvtap_notifier_block); if (err) goto out3; err = ipvlan_link_register(&ipvtap_link_ops); if (err) goto out4; return 0; out4: unregister_netdevice_notifier(&ipvtap_notifier_block); out3: class_unregister(&ipvtap_class); out2: tap_destroy_cdev(ipvtap_major, &ipvtap_cdev); out1: return err; } module_init(ipvtap_init); static void __exit ipvtap_exit(void) { rtnl_link_unregister(&ipvtap_link_ops); unregister_netdevice_notifier(&ipvtap_notifier_block); class_unregister(&ipvtap_class); tap_destroy_cdev(ipvtap_major, &ipvtap_cdev); } module_exit(ipvtap_exit); MODULE_ALIAS_RTNL_LINK("ipvtap"); MODULE_AUTHOR("Sainath Grandhi <sainath.grandhi@intel.com>"); MODULE_DESCRIPTION("IP-VLAN based tap driver"); MODULE_LICENSE("GPL"); |
7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 | // SPDX-License-Identifier: GPL-2.0-only /* * bitmap.c two-level bitmap (C) Peter T. Breuer (ptb@ot.uc3m.es) 2003 * * bitmap_create - sets up the bitmap structure * bitmap_destroy - destroys the bitmap structure * * additions, Copyright (C) 2003-2004, Paul Clements, SteelEye Technology, Inc.: * - added disk storage for bitmap * - changes to allow various bitmap chunk sizes */ /* * Still to do: * * flush after percent set rather than just time based. (maybe both). */ #include <linux/blkdev.h> #include <linux/module.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/init.h> #include <linux/timer.h> #include <linux/sched.h> #include <linux/list.h> #include <linux/file.h> #include <linux/mount.h> #include <linux/buffer_head.h> #include <linux/seq_file.h> #include <trace/events/block.h> #include "md.h" #include "md-bitmap.h" #define BITMAP_MAJOR_LO 3 /* version 4 insists the bitmap is in little-endian order * with version 3, it is host-endian which is non-portable * Version 5 is currently set only for clustered devices */ #define BITMAP_MAJOR_HI 4 #define BITMAP_MAJOR_CLUSTERED 5 #define BITMAP_MAJOR_HOSTENDIAN 3 /* * in-memory bitmap: * * Use 16 bit block counters to track pending writes to each "chunk". * The 2 high order bits are special-purpose, the first is a flag indicating * whether a resync is needed. The second is a flag indicating whether a * resync is active. * This means that the counter is actually 14 bits: * * +--------+--------+------------------------------------------------+ * | resync | resync | counter | * | needed | active | | * | (0-1) | (0-1) | (0-16383) | * +--------+--------+------------------------------------------------+ * * The "resync needed" bit is set when: * a '1' bit is read from storage at startup. * a write request fails on some drives * a resync is aborted on a chunk with 'resync active' set * It is cleared (and resync-active set) when a resync starts across all drives * of the chunk. * * * The "resync active" bit is set when: * a resync is started on all drives, and resync_needed is set. * resync_needed will be cleared (as long as resync_active wasn't already set). * It is cleared when a resync completes. * * The counter counts pending write requests, plus the on-disk bit. * When the counter is '1' and the resync bits are clear, the on-disk * bit can be cleared as well, thus setting the counter to 0. * When we set a bit, or in the counter (to start a write), if the fields is * 0, we first set the disk bit and set the counter to 1. * * If the counter is 0, the on-disk bit is clear and the stripe is clean * Anything that dirties the stripe pushes the counter to 2 (at least) * and sets the on-disk bit (lazily). * If a periodic sweep find the counter at 2, it is decremented to 1. * If the sweep find the counter at 1, the on-disk bit is cleared and the * counter goes to zero. * * Also, we'll hijack the "map" pointer itself and use it as two 16 bit block * counters as a fallback when "page" memory cannot be allocated: * * Normal case (page memory allocated): * * page pointer (32-bit) * * [ ] ------+ * | * +-------> [ ][ ]..[ ] (4096 byte page == 2048 counters) * c1 c2 c2048 * * Hijacked case (page memory allocation failed): * * hijacked page pointer (32-bit) * * [ ][ ] (no page memory allocated) * counter #1 (16-bit) counter #2 (16-bit) * */ #define PAGE_BITS (PAGE_SIZE << 3) #define PAGE_BIT_SHIFT (PAGE_SHIFT + 3) #define NEEDED(x) (((bitmap_counter_t) x) & NEEDED_MASK) #define RESYNC(x) (((bitmap_counter_t) x) & RESYNC_MASK) #define COUNTER(x) (((bitmap_counter_t) x) & COUNTER_MAX) /* how many counters per page? */ #define PAGE_COUNTER_RATIO (PAGE_BITS / COUNTER_BITS) /* same, except a shift value for more efficient bitops */ #define PAGE_COUNTER_SHIFT (PAGE_BIT_SHIFT - COUNTER_BIT_SHIFT) /* same, except a mask value for more efficient bitops */ #define PAGE_COUNTER_MASK (PAGE_COUNTER_RATIO - 1) #define BITMAP_BLOCK_SHIFT 9 /* * bitmap structures: */ /* the in-memory bitmap is represented by bitmap_pages */ struct bitmap_page { /* * map points to the actual memory page */ char *map; /* * in emergencies (when map cannot be alloced), hijack the map * pointer and use it as two counters itself */ unsigned int hijacked:1; /* * If any counter in this page is '1' or '2' - and so could be * cleared then that page is marked as 'pending' */ unsigned int pending:1; /* * count of dirty bits on the page */ unsigned int count:30; }; /* the main bitmap structure - one per mddev */ struct bitmap { struct bitmap_counts { spinlock_t lock; struct bitmap_page *bp; /* total number of pages in the bitmap */ unsigned long pages; /* number of pages not yet allocated */ unsigned long missing_pages; /* chunksize = 2^chunkshift (for bitops) */ unsigned long chunkshift; /* total number of data chunks for the array */ unsigned long chunks; } counts; struct mddev *mddev; /* the md device that the bitmap is for */ __u64 events_cleared; int need_sync; struct bitmap_storage { /* backing disk file */ struct file *file; /* cached copy of the bitmap file superblock */ struct page *sb_page; unsigned long sb_index; /* list of cache pages for the file */ struct page **filemap; /* attributes associated filemap pages */ unsigned long *filemap_attr; /* number of pages in the file */ unsigned long file_pages; /* total bytes in the bitmap */ unsigned long bytes; } storage; unsigned long flags; int allclean; atomic_t behind_writes; /* highest actual value at runtime */ unsigned long behind_writes_used; /* * the bitmap daemon - periodically wakes up and sweeps the bitmap * file, cleaning up bits and flushing out pages to disk as necessary */ unsigned long daemon_lastrun; /* jiffies of last run */ /* * when we lasted called end_sync to update bitmap with resync * progress. */ unsigned long last_end_sync; /* pending writes to the bitmap file */ atomic_t pending_writes; wait_queue_head_t write_wait; wait_queue_head_t overflow_wait; wait_queue_head_t behind_wait; struct kernfs_node *sysfs_can_clear; /* slot offset for clustered env */ int cluster_slot; }; static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, int chunksize, bool init); static inline char *bmname(struct bitmap *bitmap) { return bitmap->mddev ? mdname(bitmap->mddev) : "mdX"; } static bool __bitmap_enabled(struct bitmap *bitmap) { return bitmap->storage.filemap && !test_bit(BITMAP_STALE, &bitmap->flags); } static bool bitmap_enabled(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return false; return __bitmap_enabled(bitmap); } /* * check a page and, if necessary, allocate it (or hijack it if the alloc fails) * * 1) check to see if this page is allocated, if it's not then try to alloc * 2) if the alloc fails, set the page's hijacked flag so we'll use the * page pointer directly as a counter * * if we find our page, we increment the page's refcount so that it stays * allocated while we're using it */ static int md_bitmap_checkpage(struct bitmap_counts *bitmap, unsigned long page, int create, int no_hijack) __releases(bitmap->lock) __acquires(bitmap->lock) { unsigned char *mappage; WARN_ON_ONCE(page >= bitmap->pages); if (bitmap->bp[page].hijacked) /* it's hijacked, don't try to alloc */ return 0; if (bitmap->bp[page].map) /* page is already allocated, just return */ return 0; if (!create) return -ENOENT; /* this page has not been allocated yet */ spin_unlock_irq(&bitmap->lock); /* It is possible that this is being called inside a * prepare_to_wait/finish_wait loop from raid5c:make_request(). * In general it is not permitted to sleep in that context as it * can cause the loop to spin freely. * That doesn't apply here as we can only reach this point * once with any loop. * When this function completes, either bp[page].map or * bp[page].hijacked. In either case, this function will * abort before getting to this point again. So there is * no risk of a free-spin, and so it is safe to assert * that sleeping here is allowed. */ sched_annotate_sleep(); mappage = kzalloc(PAGE_SIZE, GFP_NOIO); spin_lock_irq(&bitmap->lock); if (mappage == NULL) { pr_debug("md/bitmap: map page allocation failed, hijacking\n"); /* We don't support hijack for cluster raid */ if (no_hijack) return -ENOMEM; /* failed - set the hijacked flag so that we can use the * pointer as a counter */ if (!bitmap->bp[page].map) bitmap->bp[page].hijacked = 1; } else if (bitmap->bp[page].map || bitmap->bp[page].hijacked) { /* somebody beat us to getting the page */ kfree(mappage); } else { /* no page was in place and we have one, so install it */ bitmap->bp[page].map = mappage; bitmap->missing_pages--; } return 0; } /* if page is completely empty, put it back on the free list, or dealloc it */ /* if page was hijacked, unmark the flag so it might get alloced next time */ /* Note: lock should be held when calling this */ static void md_bitmap_checkfree(struct bitmap_counts *bitmap, unsigned long page) { char *ptr; if (bitmap->bp[page].count) /* page is still busy */ return; /* page is no longer in use, it can be released */ if (bitmap->bp[page].hijacked) { /* page was hijacked, undo this now */ bitmap->bp[page].hijacked = 0; bitmap->bp[page].map = NULL; } else { /* normal case, free the page */ ptr = bitmap->bp[page].map; bitmap->bp[page].map = NULL; bitmap->missing_pages++; kfree(ptr); } } /* * bitmap file handling - read and write the bitmap file and its superblock */ /* * basic page I/O operations */ /* IO operations when bitmap is stored near all superblocks */ /* choose a good rdev and read the page from there */ static int read_sb_page(struct mddev *mddev, loff_t offset, struct page *page, unsigned long index, int size) { sector_t sector = mddev->bitmap_info.offset + offset + index * (PAGE_SIZE / SECTOR_SIZE); struct md_rdev *rdev; rdev_for_each(rdev, mddev) { u32 iosize = roundup(size, bdev_logical_block_size(rdev->bdev)); if (!test_bit(In_sync, &rdev->flags) || test_bit(Faulty, &rdev->flags) || test_bit(Bitmap_sync, &rdev->flags)) continue; if (sync_page_io(rdev, sector, iosize, page, REQ_OP_READ, true)) return 0; } return -EIO; } static struct md_rdev *next_active_rdev(struct md_rdev *rdev, struct mddev *mddev) { /* Iterate the disks of an mddev, using rcu to protect access to the * linked list, and raising the refcount of devices we return to ensure * they don't disappear while in use. * As devices are only added or removed when raid_disk is < 0 and * nr_pending is 0 and In_sync is clear, the entries we return will * still be in the same position on the list when we re-enter * list_for_each_entry_continue_rcu. * * Note that if entered with 'rdev == NULL' to start at the * beginning, we temporarily assign 'rdev' to an address which * isn't really an rdev, but which can be used by * list_for_each_entry_continue_rcu() to find the first entry. */ rcu_read_lock(); if (rdev == NULL) /* start at the beginning */ rdev = list_entry(&mddev->disks, struct md_rdev, same_set); else { /* release the previous rdev and start from there. */ rdev_dec_pending(rdev, mddev); } list_for_each_entry_continue_rcu(rdev, &mddev->disks, same_set) { if (rdev->raid_disk >= 0 && !test_bit(Faulty, &rdev->flags)) { /* this is a usable devices */ atomic_inc(&rdev->nr_pending); rcu_read_unlock(); return rdev; } } rcu_read_unlock(); return NULL; } static unsigned int optimal_io_size(struct block_device *bdev, unsigned int last_page_size, unsigned int io_size) { if (bdev_io_opt(bdev) > bdev_logical_block_size(bdev)) return roundup(last_page_size, bdev_io_opt(bdev)); return io_size; } static unsigned int bitmap_io_size(unsigned int io_size, unsigned int opt_size, loff_t start, loff_t boundary) { if (io_size != opt_size && start + opt_size / SECTOR_SIZE <= boundary) return opt_size; if (start + io_size / SECTOR_SIZE <= boundary) return io_size; /* Overflows boundary */ return 0; } static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap, unsigned long pg_index, struct page *page) { struct block_device *bdev; struct mddev *mddev = bitmap->mddev; struct bitmap_storage *store = &bitmap->storage; unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) << PAGE_SHIFT; loff_t sboff, offset = mddev->bitmap_info.offset; sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE; unsigned int size = PAGE_SIZE; unsigned int opt_size = PAGE_SIZE; sector_t doff; bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev; /* we compare length (page numbers), not page offset. */ if ((pg_index - store->sb_index) == store->file_pages - 1) { unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1); if (last_page_size == 0) last_page_size = PAGE_SIZE; size = roundup(last_page_size, bdev_logical_block_size(bdev)); opt_size = optimal_io_size(bdev, last_page_size, size); } sboff = rdev->sb_start + offset; doff = rdev->data_offset; /* Just make sure we aren't corrupting data or metadata */ if (mddev->external) { /* Bitmap could be anywhere. */ if (sboff + ps > doff && sboff < (doff + mddev->dev_sectors + PAGE_SIZE / SECTOR_SIZE)) return -EINVAL; } else if (offset < 0) { /* DATA BITMAP METADATA */ size = bitmap_io_size(size, opt_size, offset + ps, 0); if (size == 0) /* bitmap runs in to metadata */ return -EINVAL; if (doff + mddev->dev_sectors > sboff) /* data runs in to bitmap */ return -EINVAL; } else if (rdev->sb_start < rdev->data_offset) { /* METADATA BITMAP DATA */ size = bitmap_io_size(size, opt_size, sboff + ps, doff); if (size == 0) /* bitmap runs in to data */ return -EINVAL; } md_super_write(mddev, rdev, sboff + ps, (int)min(size, bitmap_limit), page); return 0; } static void write_sb_page(struct bitmap *bitmap, unsigned long pg_index, struct page *page, bool wait) { struct mddev *mddev = bitmap->mddev; do { struct md_rdev *rdev = NULL; while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { if (__write_sb_page(rdev, bitmap, pg_index, page) < 0) { set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); return; } } } while (wait && md_super_wait(mddev) < 0); } static void md_bitmap_file_kick(struct bitmap *bitmap); #ifdef CONFIG_MD_BITMAP_FILE static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) { struct buffer_head *bh = page_buffers(page); while (bh && bh->b_blocknr) { atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); bh = bh->b_this_page; } if (wait) wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes) == 0); } static void end_bitmap_write(struct buffer_head *bh, int uptodate) { struct bitmap *bitmap = bh->b_private; if (!uptodate) set_bit(BITMAP_WRITE_ERROR, &bitmap->flags); if (atomic_dec_and_test(&bitmap->pending_writes)) wake_up(&bitmap->write_wait); } static void free_buffers(struct page *page) { struct buffer_head *bh; if (!PagePrivate(page)) return; bh = page_buffers(page); while (bh) { struct buffer_head *next = bh->b_this_page; free_buffer_head(bh); bh = next; } detach_page_private(page); put_page(page); } /* read a page from a file. * We both read the page, and attach buffers to the page to record the * address of each block (using bmap). These addresses will be used * to write the block later, completely bypassing the filesystem. * This usage is similar to how swap files are handled, and allows us * to write to a file with no concerns of memory allocation failing. */ static int read_file_page(struct file *file, unsigned long index, struct bitmap *bitmap, unsigned long count, struct page *page) { int ret = 0; struct inode *inode = file_inode(file); struct buffer_head *bh; sector_t block, blk_cur; unsigned long blocksize = i_blocksize(inode); pr_debug("read bitmap file (%dB @ %llu)\n", (int)PAGE_SIZE, (unsigned long long)index << PAGE_SHIFT); bh = alloc_page_buffers(page, blocksize); if (!bh) { ret = -ENOMEM; goto out; } attach_page_private(page, bh); blk_cur = index << (PAGE_SHIFT - inode->i_blkbits); while (bh) { block = blk_cur; if (count == 0) bh->b_blocknr = 0; else { ret = bmap(inode, &block); if (ret || !block) { ret = -EINVAL; bh->b_blocknr = 0; goto out; } bh->b_blocknr = block; bh->b_bdev = inode->i_sb->s_bdev; if (count < blocksize) count = 0; else count -= blocksize; bh->b_end_io = end_bitmap_write; bh->b_private = bitmap; atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); submit_bh(REQ_OP_READ, bh); } blk_cur++; bh = bh->b_this_page; } wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) ret = -EIO; out: if (ret) pr_err("md: bitmap read error: (%dB @ %llu): %d\n", (int)PAGE_SIZE, (unsigned long long)index << PAGE_SHIFT, ret); return ret; } #else /* CONFIG_MD_BITMAP_FILE */ static void write_file_page(struct bitmap *bitmap, struct page *page, int wait) { } static int read_file_page(struct file *file, unsigned long index, struct bitmap *bitmap, unsigned long count, struct page *page) { return -EIO; } static void free_buffers(struct page *page) { put_page(page); } #endif /* CONFIG_MD_BITMAP_FILE */ /* * bitmap file superblock operations */ /* * write out a page to a file */ static void filemap_write_page(struct bitmap *bitmap, unsigned long pg_index, bool wait) { struct bitmap_storage *store = &bitmap->storage; struct page *page = store->filemap[pg_index]; if (mddev_is_clustered(bitmap->mddev)) { /* go to node bitmap area starting point */ pg_index += store->sb_index; } if (store->file) write_file_page(bitmap, page, wait); else write_sb_page(bitmap, pg_index, page, wait); } /* * md_bitmap_wait_writes() should be called before writing any bitmap * blocks, to ensure previous writes, particularly from * md_bitmap_daemon_work(), have completed. */ static void md_bitmap_wait_writes(struct bitmap *bitmap) { if (bitmap->storage.file) wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes)==0); else /* Note that we ignore the return value. The writes * might have failed, but that would just mean that * some bits which should be cleared haven't been, * which is safe. The relevant bitmap blocks will * probably get written again, but there is no great * loss if they aren't. */ md_super_wait(bitmap->mddev); } /* update the event counter and sync the superblock to disk */ static void bitmap_update_sb(void *data) { bitmap_super_t *sb; struct bitmap *bitmap = data; if (!bitmap || !bitmap->mddev) /* no bitmap for this array */ return; if (bitmap->mddev->bitmap_info.external) return; if (!bitmap->storage.sb_page) /* no superblock */ return; sb = kmap_atomic(bitmap->storage.sb_page); sb->events = cpu_to_le64(bitmap->mddev->events); if (bitmap->mddev->events < bitmap->events_cleared) /* rocking back to read-only */ bitmap->events_cleared = bitmap->mddev->events; sb->events_cleared = cpu_to_le64(bitmap->events_cleared); /* * clear BITMAP_WRITE_ERROR bit to protect against the case that * a bitmap write error occurred but the later writes succeeded. */ sb->state = cpu_to_le32(bitmap->flags & ~BIT(BITMAP_WRITE_ERROR)); /* Just in case these have been changed via sysfs: */ sb->daemon_sleep = cpu_to_le32(bitmap->mddev->bitmap_info.daemon_sleep/HZ); sb->write_behind = cpu_to_le32(bitmap->mddev->bitmap_info.max_write_behind); /* This might have been changed by a reshape */ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); sb->chunksize = cpu_to_le32(bitmap->mddev->bitmap_info.chunksize); sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes); sb->sectors_reserved = cpu_to_le32(bitmap->mddev-> bitmap_info.space); kunmap_atomic(sb); if (bitmap->storage.file) write_file_page(bitmap, bitmap->storage.sb_page, 1); else write_sb_page(bitmap, bitmap->storage.sb_index, bitmap->storage.sb_page, 1); } static void bitmap_print_sb(struct bitmap *bitmap) { bitmap_super_t *sb; if (!bitmap || !bitmap->storage.sb_page) return; sb = kmap_atomic(bitmap->storage.sb_page); pr_debug("%s: bitmap file superblock:\n", bmname(bitmap)); pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic)); pr_debug(" version: %u\n", le32_to_cpu(sb->version)); pr_debug(" uuid: %08x.%08x.%08x.%08x\n", le32_to_cpu(*(__le32 *)(sb->uuid+0)), le32_to_cpu(*(__le32 *)(sb->uuid+4)), le32_to_cpu(*(__le32 *)(sb->uuid+8)), le32_to_cpu(*(__le32 *)(sb->uuid+12))); pr_debug(" events: %llu\n", (unsigned long long) le64_to_cpu(sb->events)); pr_debug("events cleared: %llu\n", (unsigned long long) le64_to_cpu(sb->events_cleared)); pr_debug(" state: %08x\n", le32_to_cpu(sb->state)); pr_debug(" chunksize: %u B\n", le32_to_cpu(sb->chunksize)); pr_debug(" daemon sleep: %us\n", le32_to_cpu(sb->daemon_sleep)); pr_debug(" sync size: %llu KB\n", (unsigned long long)le64_to_cpu(sb->sync_size)/2); pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind)); kunmap_atomic(sb); } /* * bitmap_new_disk_sb * @bitmap * * This function is somewhat the reverse of bitmap_read_sb. bitmap_read_sb * reads and verifies the on-disk bitmap superblock and populates bitmap_info. * This function verifies 'bitmap_info' and populates the on-disk bitmap * structure, which is to be written to disk. * * Returns: 0 on success, -Exxx on error */ static int md_bitmap_new_disk_sb(struct bitmap *bitmap) { bitmap_super_t *sb; unsigned long chunksize, daemon_sleep, write_behind; bitmap->storage.sb_page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (bitmap->storage.sb_page == NULL) return -ENOMEM; bitmap->storage.sb_index = 0; sb = kmap_atomic(bitmap->storage.sb_page); sb->magic = cpu_to_le32(BITMAP_MAGIC); sb->version = cpu_to_le32(BITMAP_MAJOR_HI); chunksize = bitmap->mddev->bitmap_info.chunksize; BUG_ON(!chunksize); if (!is_power_of_2(chunksize)) { kunmap_atomic(sb); pr_warn("bitmap chunksize not a power of 2\n"); return -EINVAL; } sb->chunksize = cpu_to_le32(chunksize); daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep; if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) { pr_debug("Choosing daemon_sleep default (5 sec)\n"); daemon_sleep = 5 * HZ; } sb->daemon_sleep = cpu_to_le32(daemon_sleep); bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; /* * FIXME: write_behind for RAID1. If not specified, what * is a good choice? We choose COUNTER_MAX / 2 arbitrarily. */ write_behind = bitmap->mddev->bitmap_info.max_write_behind; if (write_behind > COUNTER_MAX) write_behind = COUNTER_MAX / 2; sb->write_behind = cpu_to_le32(write_behind); bitmap->mddev->bitmap_info.max_write_behind = write_behind; /* keep the array size field of the bitmap superblock up to date */ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); memcpy(sb->uuid, bitmap->mddev->uuid, 16); set_bit(BITMAP_STALE, &bitmap->flags); sb->state = cpu_to_le32(bitmap->flags); bitmap->events_cleared = bitmap->mddev->events; sb->events_cleared = cpu_to_le64(bitmap->mddev->events); bitmap->mddev->bitmap_info.nodes = 0; kunmap_atomic(sb); return 0; } /* read the superblock from the bitmap file and initialize some bitmap fields */ static int md_bitmap_read_sb(struct bitmap *bitmap) { char *reason = NULL; bitmap_super_t *sb; unsigned long chunksize, daemon_sleep, write_behind; unsigned long long events; int nodes = 0; unsigned long sectors_reserved = 0; int err = -EINVAL; struct page *sb_page; loff_t offset = 0; if (!bitmap->storage.file && !bitmap->mddev->bitmap_info.offset) { chunksize = 128 * 1024 * 1024; daemon_sleep = 5 * HZ; write_behind = 0; set_bit(BITMAP_STALE, &bitmap->flags); err = 0; goto out_no_sb; } /* page 0 is the superblock, read it... */ sb_page = alloc_page(GFP_KERNEL); if (!sb_page) return -ENOMEM; bitmap->storage.sb_page = sb_page; re_read: /* If cluster_slot is set, the cluster is setup */ if (bitmap->cluster_slot >= 0) { sector_t bm_blocks = bitmap->mddev->resync_max_sectors; bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, (bitmap->mddev->bitmap_info.chunksize >> 9)); /* bits to bytes */ bm_blocks = ((bm_blocks+7) >> 3) + sizeof(bitmap_super_t); /* to 4k blocks */ bm_blocks = DIV_ROUND_UP_SECTOR_T(bm_blocks, 4096); offset = bitmap->cluster_slot * (bm_blocks << 3); pr_debug("%s:%d bm slot: %d offset: %llu\n", __func__, __LINE__, bitmap->cluster_slot, offset); } if (bitmap->storage.file) { loff_t isize = i_size_read(bitmap->storage.file->f_mapping->host); int bytes = isize > PAGE_SIZE ? PAGE_SIZE : isize; err = read_file_page(bitmap->storage.file, 0, bitmap, bytes, sb_page); } else { err = read_sb_page(bitmap->mddev, offset, sb_page, 0, sizeof(bitmap_super_t)); } if (err) return err; err = -EINVAL; sb = kmap_atomic(sb_page); chunksize = le32_to_cpu(sb->chunksize); daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ; write_behind = le32_to_cpu(sb->write_behind); sectors_reserved = le32_to_cpu(sb->sectors_reserved); /* verify that the bitmap-specific fields are valid */ if (sb->magic != cpu_to_le32(BITMAP_MAGIC)) reason = "bad magic"; else if (le32_to_cpu(sb->version) < BITMAP_MAJOR_LO || le32_to_cpu(sb->version) > BITMAP_MAJOR_CLUSTERED) reason = "unrecognized superblock version"; else if (chunksize < 512) reason = "bitmap chunksize too small"; else if (!is_power_of_2(chunksize)) reason = "bitmap chunksize not a power of 2"; else if (daemon_sleep < 1 || daemon_sleep > MAX_SCHEDULE_TIMEOUT) reason = "daemon sleep period out of range"; else if (write_behind > COUNTER_MAX) reason = "write-behind limit out of range (0 - 16383)"; if (reason) { pr_warn("%s: invalid bitmap file superblock: %s\n", bmname(bitmap), reason); goto out; } /* * Setup nodes/clustername only if bitmap version is * cluster-compatible */ if (sb->version == cpu_to_le32(BITMAP_MAJOR_CLUSTERED)) { nodes = le32_to_cpu(sb->nodes); strscpy(bitmap->mddev->bitmap_info.cluster_name, sb->cluster_name, 64); } /* keep the array size field of the bitmap superblock up to date */ sb->sync_size = cpu_to_le64(bitmap->mddev->resync_max_sectors); if (bitmap->mddev->persistent) { /* * We have a persistent array superblock, so compare the * bitmap's UUID and event counter to the mddev's */ if (memcmp(sb->uuid, bitmap->mddev->uuid, 16)) { pr_warn("%s: bitmap superblock UUID mismatch\n", bmname(bitmap)); goto out; } events = le64_to_cpu(sb->events); if (!nodes && (events < bitmap->mddev->events)) { pr_warn("%s: bitmap file is out of date (%llu < %llu) -- forcing full recovery\n", bmname(bitmap), events, (unsigned long long) bitmap->mddev->events); set_bit(BITMAP_STALE, &bitmap->flags); } } /* assign fields using values from superblock */ bitmap->flags |= le32_to_cpu(sb->state); if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN) set_bit(BITMAP_HOSTENDIAN, &bitmap->flags); bitmap->events_cleared = le64_to_cpu(sb->events_cleared); err = 0; out: kunmap_atomic(sb); if (err == 0 && nodes && (bitmap->cluster_slot < 0)) { /* Assigning chunksize is required for "re_read" */ bitmap->mddev->bitmap_info.chunksize = chunksize; err = md_setup_cluster(bitmap->mddev, nodes); if (err) { pr_warn("%s: Could not setup cluster service (%d)\n", bmname(bitmap), err); goto out_no_sb; } bitmap->cluster_slot = md_cluster_ops->slot_number(bitmap->mddev); goto re_read; } out_no_sb: if (err == 0) { if (test_bit(BITMAP_STALE, &bitmap->flags)) bitmap->events_cleared = bitmap->mddev->events; bitmap->mddev->bitmap_info.chunksize = chunksize; bitmap->mddev->bitmap_info.daemon_sleep = daemon_sleep; bitmap->mddev->bitmap_info.max_write_behind = write_behind; bitmap->mddev->bitmap_info.nodes = nodes; if (bitmap->mddev->bitmap_info.space == 0 || bitmap->mddev->bitmap_info.space > sectors_reserved) bitmap->mddev->bitmap_info.space = sectors_reserved; } else { bitmap_print_sb(bitmap); if (bitmap->cluster_slot < 0) md_cluster_stop(bitmap->mddev); } return err; } /* * general bitmap file operations */ /* * on-disk bitmap: * * Use one bit per "chunk" (block set). We do the disk I/O on the bitmap * file a page at a time. There's a superblock at the start of the file. */ /* calculate the index of the page that contains this bit */ static inline unsigned long file_page_index(struct bitmap_storage *store, unsigned long chunk) { if (store->sb_page) chunk += sizeof(bitmap_super_t) << 3; return chunk >> PAGE_BIT_SHIFT; } /* calculate the (bit) offset of this bit within a page */ static inline unsigned long file_page_offset(struct bitmap_storage *store, unsigned long chunk) { if (store->sb_page) chunk += sizeof(bitmap_super_t) << 3; return chunk & (PAGE_BITS - 1); } /* * return a pointer to the page in the filemap that contains the given bit * */ static inline struct page *filemap_get_page(struct bitmap_storage *store, unsigned long chunk) { if (file_page_index(store, chunk) >= store->file_pages) return NULL; return store->filemap[file_page_index(store, chunk)]; } static int md_bitmap_storage_alloc(struct bitmap_storage *store, unsigned long chunks, int with_super, int slot_number) { int pnum, offset = 0; unsigned long num_pages; unsigned long bytes; bytes = DIV_ROUND_UP(chunks, 8); if (with_super) bytes += sizeof(bitmap_super_t); num_pages = DIV_ROUND_UP(bytes, PAGE_SIZE); offset = slot_number * num_pages; store->filemap = kmalloc_array(num_pages, sizeof(struct page *), GFP_KERNEL); if (!store->filemap) return -ENOMEM; if (with_super && !store->sb_page) { store->sb_page = alloc_page(GFP_KERNEL|__GFP_ZERO); if (store->sb_page == NULL) return -ENOMEM; } pnum = 0; if (store->sb_page) { store->filemap[0] = store->sb_page; pnum = 1; store->sb_index = offset; } for ( ; pnum < num_pages; pnum++) { store->filemap[pnum] = alloc_page(GFP_KERNEL|__GFP_ZERO); if (!store->filemap[pnum]) { store->file_pages = pnum; return -ENOMEM; } } store->file_pages = pnum; /* We need 4 bits per page, rounded up to a multiple * of sizeof(unsigned long) */ store->filemap_attr = kzalloc( roundup(DIV_ROUND_UP(num_pages*4, 8), sizeof(unsigned long)), GFP_KERNEL); if (!store->filemap_attr) return -ENOMEM; store->bytes = bytes; return 0; } static void md_bitmap_file_unmap(struct bitmap_storage *store) { struct file *file = store->file; struct page *sb_page = store->sb_page; struct page **map = store->filemap; int pages = store->file_pages; while (pages--) if (map[pages] != sb_page) /* 0 is sb_page, release it below */ free_buffers(map[pages]); kfree(map); kfree(store->filemap_attr); if (sb_page) free_buffers(sb_page); if (file) { struct inode *inode = file_inode(file); invalidate_mapping_pages(inode->i_mapping, 0, -1); fput(file); } } /* * bitmap_file_kick - if an error occurs while manipulating the bitmap file * then it is no longer reliable, so we stop using it and we mark the file * as failed in the superblock */ static void md_bitmap_file_kick(struct bitmap *bitmap) { if (!test_and_set_bit(BITMAP_STALE, &bitmap->flags)) { bitmap_update_sb(bitmap); if (bitmap->storage.file) { pr_warn("%s: kicking failed bitmap file %pD4 from array!\n", bmname(bitmap), bitmap->storage.file); } else pr_warn("%s: disabling internal bitmap due to errors\n", bmname(bitmap)); } } enum bitmap_page_attr { BITMAP_PAGE_DIRTY = 0, /* there are set bits that need to be synced */ BITMAP_PAGE_PENDING = 1, /* there are bits that are being cleaned. * i.e. counter is 1 or 2. */ BITMAP_PAGE_NEEDWRITE = 2, /* there are cleared bits that need to be synced */ }; static inline void set_page_attr(struct bitmap *bitmap, int pnum, enum bitmap_page_attr attr) { set_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); } static inline void clear_page_attr(struct bitmap *bitmap, int pnum, enum bitmap_page_attr attr) { clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); } static inline int test_page_attr(struct bitmap *bitmap, int pnum, enum bitmap_page_attr attr) { return test_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); } static inline int test_and_clear_page_attr(struct bitmap *bitmap, int pnum, enum bitmap_page_attr attr) { return test_and_clear_bit((pnum<<2) + attr, bitmap->storage.filemap_attr); } /* * bitmap_file_set_bit -- called before performing a write to the md device * to set (and eventually sync) a particular bit in the bitmap file * * we set the bit immediately, then we record the page number so that * when an unplug occurs, we can flush the dirty pages out to disk */ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) { unsigned long bit; struct page *page; void *kaddr; unsigned long chunk = block >> bitmap->counts.chunkshift; struct bitmap_storage *store = &bitmap->storage; unsigned long index = file_page_index(store, chunk); unsigned long node_offset = 0; index += store->sb_index; if (mddev_is_clustered(bitmap->mddev)) node_offset = bitmap->cluster_slot * store->file_pages; page = filemap_get_page(&bitmap->storage, chunk); if (!page) return; bit = file_page_offset(&bitmap->storage, chunk); /* set the bit */ kaddr = kmap_atomic(page); if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) set_bit(bit, kaddr); else set_bit_le(bit, kaddr); kunmap_atomic(kaddr); pr_debug("set file bit %lu page %lu\n", bit, index); /* record page number so it gets flushed to disk when unplug occurs */ set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY); } static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block) { unsigned long bit; struct page *page; void *paddr; unsigned long chunk = block >> bitmap->counts.chunkshift; struct bitmap_storage *store = &bitmap->storage; unsigned long index = file_page_index(store, chunk); unsigned long node_offset = 0; index += store->sb_index; if (mddev_is_clustered(bitmap->mddev)) node_offset = bitmap->cluster_slot * store->file_pages; page = filemap_get_page(&bitmap->storage, chunk); if (!page) return; bit = file_page_offset(&bitmap->storage, chunk); paddr = kmap_atomic(page); if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) clear_bit(bit, paddr); else clear_bit_le(bit, paddr); kunmap_atomic(paddr); if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) { set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING); bitmap->allclean = 0; } } static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block) { unsigned long bit; struct page *page; void *paddr; unsigned long chunk = block >> bitmap->counts.chunkshift; int set = 0; page = filemap_get_page(&bitmap->storage, chunk); if (!page) return -EINVAL; bit = file_page_offset(&bitmap->storage, chunk); paddr = kmap_atomic(page); if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) set = test_bit(bit, paddr); else set = test_bit_le(bit, paddr); kunmap_atomic(paddr); return set; } /* this gets called when the md device is ready to unplug its underlying * (slave) device queues -- before we let any writes go down, we need to * sync the dirty pages of the bitmap file to disk */ static void __bitmap_unplug(struct bitmap *bitmap) { unsigned long i; int dirty, need_write; int writing = 0; if (!__bitmap_enabled(bitmap)) return; /* look at each page to see if there are any set bits that need to be * flushed out to disk */ for (i = 0; i < bitmap->storage.file_pages; i++) { dirty = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); need_write = test_and_clear_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); if (dirty || need_write) { if (!writing) { md_bitmap_wait_writes(bitmap); mddev_add_trace_msg(bitmap->mddev, "md bitmap_unplug"); } clear_page_attr(bitmap, i, BITMAP_PAGE_PENDING); filemap_write_page(bitmap, i, false); writing = 1; } } if (writing) md_bitmap_wait_writes(bitmap); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) md_bitmap_file_kick(bitmap); } struct bitmap_unplug_work { struct work_struct work; struct bitmap *bitmap; struct completion *done; }; static void md_bitmap_unplug_fn(struct work_struct *work) { struct bitmap_unplug_work *unplug_work = container_of(work, struct bitmap_unplug_work, work); __bitmap_unplug(unplug_work->bitmap); complete(unplug_work->done); } static void bitmap_unplug_async(struct bitmap *bitmap) { DECLARE_COMPLETION_ONSTACK(done); struct bitmap_unplug_work unplug_work; INIT_WORK_ONSTACK(&unplug_work.work, md_bitmap_unplug_fn); unplug_work.bitmap = bitmap; unplug_work.done = &done; queue_work(md_bitmap_wq, &unplug_work.work); wait_for_completion(&done); destroy_work_on_stack(&unplug_work.work); } static void bitmap_unplug(struct mddev *mddev, bool sync) { struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return; if (sync) __bitmap_unplug(bitmap); else bitmap_unplug_async(bitmap); } static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed); /* * Initialize the in-memory bitmap from the on-disk bitmap and set up the memory * mapping of the bitmap file. * * Special case: If there's no bitmap file, or if the bitmap file had been * previously kicked from the array, we mark all the bits as 1's in order to * cause a full resync. * * We ignore all bits for sectors that end earlier than 'start'. * This is used when reading an out-of-date bitmap. */ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start) { bool outofdate = test_bit(BITMAP_STALE, &bitmap->flags); struct mddev *mddev = bitmap->mddev; unsigned long chunks = bitmap->counts.chunks; struct bitmap_storage *store = &bitmap->storage; struct file *file = store->file; unsigned long node_offset = 0; unsigned long bit_cnt = 0; unsigned long i; int ret; if (!file && !mddev->bitmap_info.offset) { /* No permanent bitmap - fill with '1s'. */ store->filemap = NULL; store->file_pages = 0; for (i = 0; i < chunks ; i++) { /* if the disk bit is set, set the memory bit */ int needed = ((sector_t)(i+1) << (bitmap->counts.chunkshift) >= start); md_bitmap_set_memory_bits(bitmap, (sector_t)i << bitmap->counts.chunkshift, needed); } return 0; } if (file && i_size_read(file->f_mapping->host) < store->bytes) { pr_warn("%s: bitmap file too short %lu < %lu\n", bmname(bitmap), (unsigned long) i_size_read(file->f_mapping->host), store->bytes); ret = -ENOSPC; goto err; } if (mddev_is_clustered(mddev)) node_offset = bitmap->cluster_slot * (DIV_ROUND_UP(store->bytes, PAGE_SIZE)); for (i = 0; i < store->file_pages; i++) { struct page *page = store->filemap[i]; int count; /* unmap the old page, we're done with it */ if (i == store->file_pages - 1) count = store->bytes - i * PAGE_SIZE; else count = PAGE_SIZE; if (file) ret = read_file_page(file, i, bitmap, count, page); else ret = read_sb_page(mddev, 0, page, i + node_offset, count); if (ret) goto err; } if (outofdate) { pr_warn("%s: bitmap file is out of date, doing full recovery\n", bmname(bitmap)); for (i = 0; i < store->file_pages; i++) { struct page *page = store->filemap[i]; unsigned long offset = 0; void *paddr; if (i == 0 && !mddev->bitmap_info.external) offset = sizeof(bitmap_super_t); /* * If the bitmap is out of date, dirty the whole page * and write it out */ paddr = kmap_atomic(page); memset(paddr + offset, 0xff, PAGE_SIZE - offset); kunmap_atomic(paddr); filemap_write_page(bitmap, i, true); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) { ret = -EIO; goto err; } } } for (i = 0; i < chunks; i++) { struct page *page = filemap_get_page(&bitmap->storage, i); unsigned long bit = file_page_offset(&bitmap->storage, i); void *paddr; bool was_set; paddr = kmap_atomic(page); if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags)) was_set = test_bit(bit, paddr); else was_set = test_bit_le(bit, paddr); kunmap_atomic(paddr); if (was_set) { /* if the disk bit is set, set the memory bit */ int needed = ((sector_t)(i+1) << bitmap->counts.chunkshift >= start); md_bitmap_set_memory_bits(bitmap, (sector_t)i << bitmap->counts.chunkshift, needed); bit_cnt++; } } pr_debug("%s: bitmap initialized from disk: read %lu pages, set %lu of %lu bits\n", bmname(bitmap), store->file_pages, bit_cnt, chunks); return 0; err: pr_warn("%s: bitmap initialisation failed: %d\n", bmname(bitmap), ret); return ret; } /* just flag bitmap pages as needing to be written. */ static void bitmap_write_all(struct mddev *mddev) { int i; struct bitmap *bitmap = mddev->bitmap; if (!bitmap || !bitmap->storage.filemap) return; /* Only one copy, so nothing needed */ if (bitmap->storage.file) return; for (i = 0; i < bitmap->storage.file_pages; i++) set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); bitmap->allclean = 0; } static void md_bitmap_count_page(struct bitmap_counts *bitmap, sector_t offset, int inc) { sector_t chunk = offset >> bitmap->chunkshift; unsigned long page = chunk >> PAGE_COUNTER_SHIFT; bitmap->bp[page].count += inc; md_bitmap_checkfree(bitmap, page); } static void md_bitmap_set_pending(struct bitmap_counts *bitmap, sector_t offset) { sector_t chunk = offset >> bitmap->chunkshift; unsigned long page = chunk >> PAGE_COUNTER_SHIFT; struct bitmap_page *bp = &bitmap->bp[page]; if (!bp->pending) bp->pending = 1; } static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, sector_t offset, sector_t *blocks, int create); static void mddev_set_timeout(struct mddev *mddev, unsigned long timeout, bool force) { struct md_thread *thread; rcu_read_lock(); thread = rcu_dereference(mddev->thread); if (!thread) goto out; if (force || thread->timeout < MAX_SCHEDULE_TIMEOUT) thread->timeout = timeout; out: rcu_read_unlock(); } /* * bitmap daemon -- periodically wakes up to clean bits and flush pages * out to disk */ static void bitmap_daemon_work(struct mddev *mddev) { struct bitmap *bitmap; unsigned long j; unsigned long nextpage; sector_t blocks; struct bitmap_counts *counts; /* Use a mutex to guard daemon_work against * bitmap_destroy. */ mutex_lock(&mddev->bitmap_info.mutex); bitmap = mddev->bitmap; if (bitmap == NULL) { mutex_unlock(&mddev->bitmap_info.mutex); return; } if (time_before(jiffies, bitmap->daemon_lastrun + mddev->bitmap_info.daemon_sleep)) goto done; bitmap->daemon_lastrun = jiffies; if (bitmap->allclean) { mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); goto done; } bitmap->allclean = 1; mddev_add_trace_msg(bitmap->mddev, "md bitmap_daemon_work"); /* Any file-page which is PENDING now needs to be written. * So set NEEDWRITE now, then after we make any last-minute changes * we will write it. */ for (j = 0; j < bitmap->storage.file_pages; j++) if (test_and_clear_page_attr(bitmap, j, BITMAP_PAGE_PENDING)) set_page_attr(bitmap, j, BITMAP_PAGE_NEEDWRITE); if (bitmap->need_sync && mddev->bitmap_info.external == 0) { /* Arrange for superblock update as well as * other changes */ bitmap_super_t *sb; bitmap->need_sync = 0; if (bitmap->storage.filemap) { sb = kmap_atomic(bitmap->storage.sb_page); sb->events_cleared = cpu_to_le64(bitmap->events_cleared); kunmap_atomic(sb); set_page_attr(bitmap, 0, BITMAP_PAGE_NEEDWRITE); } } /* Now look at the bitmap counters and if any are '2' or '1', * decrement and handle accordingly. */ counts = &bitmap->counts; spin_lock_irq(&counts->lock); nextpage = 0; for (j = 0; j < counts->chunks; j++) { bitmap_counter_t *bmc; sector_t block = (sector_t)j << counts->chunkshift; if (j == nextpage) { nextpage += PAGE_COUNTER_RATIO; if (!counts->bp[j >> PAGE_COUNTER_SHIFT].pending) { j |= PAGE_COUNTER_MASK; continue; } counts->bp[j >> PAGE_COUNTER_SHIFT].pending = 0; } bmc = md_bitmap_get_counter(counts, block, &blocks, 0); if (!bmc) { j |= PAGE_COUNTER_MASK; continue; } if (*bmc == 1 && !bitmap->need_sync) { /* We can clear the bit */ *bmc = 0; md_bitmap_count_page(counts, block, -1); md_bitmap_file_clear_bit(bitmap, block); } else if (*bmc && *bmc <= 2) { *bmc = 1; md_bitmap_set_pending(counts, block); bitmap->allclean = 0; } } spin_unlock_irq(&counts->lock); md_bitmap_wait_writes(bitmap); /* Now start writeout on any page in NEEDWRITE that isn't DIRTY. * DIRTY pages need to be written by bitmap_unplug so it can wait * for them. * If we find any DIRTY page we stop there and let bitmap_unplug * handle all the rest. This is important in the case where * the first blocking holds the superblock and it has been updated. * We mustn't write any other blocks before the superblock. */ for (j = 0; j < bitmap->storage.file_pages && !test_bit(BITMAP_STALE, &bitmap->flags); j++) { if (test_page_attr(bitmap, j, BITMAP_PAGE_DIRTY)) /* bitmap_unplug will handle the rest */ break; if (bitmap->storage.filemap && test_and_clear_page_attr(bitmap, j, BITMAP_PAGE_NEEDWRITE)) filemap_write_page(bitmap, j, false); } done: if (bitmap->allclean == 0) mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); mutex_unlock(&mddev->bitmap_info.mutex); } static bitmap_counter_t *md_bitmap_get_counter(struct bitmap_counts *bitmap, sector_t offset, sector_t *blocks, int create) __releases(bitmap->lock) __acquires(bitmap->lock) { /* If 'create', we might release the lock and reclaim it. * The lock must have been taken with interrupts enabled. * If !create, we don't release the lock. */ sector_t chunk = offset >> bitmap->chunkshift; unsigned long page = chunk >> PAGE_COUNTER_SHIFT; unsigned long pageoff = (chunk & PAGE_COUNTER_MASK) << COUNTER_BYTE_SHIFT; sector_t csize = ((sector_t)1) << bitmap->chunkshift; int err; if (page >= bitmap->pages) { /* * This can happen if bitmap_start_sync goes beyond * End-of-device while looking for a whole page or * user set a huge number to sysfs bitmap_set_bits. */ *blocks = csize - (offset & (csize - 1)); return NULL; } err = md_bitmap_checkpage(bitmap, page, create, 0); if (bitmap->bp[page].hijacked || bitmap->bp[page].map == NULL) csize = ((sector_t)1) << (bitmap->chunkshift + PAGE_COUNTER_SHIFT); *blocks = csize - (offset & (csize - 1)); if (err < 0) return NULL; /* now locked ... */ if (bitmap->bp[page].hijacked) { /* hijacked pointer */ /* should we use the first or second counter field * of the hijacked pointer? */ int hi = (pageoff > PAGE_COUNTER_MASK); return &((bitmap_counter_t *) &bitmap->bp[page].map)[hi]; } else /* page is allocated */ return (bitmap_counter_t *) &(bitmap->bp[page].map[pageoff]); } static int bitmap_startwrite(struct mddev *mddev, sector_t offset, unsigned long sectors, bool behind) { struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return 0; if (behind) { int bw; atomic_inc(&bitmap->behind_writes); bw = atomic_read(&bitmap->behind_writes); if (bw > bitmap->behind_writes_used) bitmap->behind_writes_used = bw; pr_debug("inc write-behind count %d/%lu\n", bw, bitmap->mddev->bitmap_info.max_write_behind); } while (sectors) { sector_t blocks; bitmap_counter_t *bmc; spin_lock_irq(&bitmap->counts.lock); bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 1); if (!bmc) { spin_unlock_irq(&bitmap->counts.lock); return 0; } if (unlikely(COUNTER(*bmc) == COUNTER_MAX)) { DEFINE_WAIT(__wait); /* note that it is safe to do the prepare_to_wait * after the test as long as we do it before dropping * the spinlock. */ prepare_to_wait(&bitmap->overflow_wait, &__wait, TASK_UNINTERRUPTIBLE); spin_unlock_irq(&bitmap->counts.lock); schedule(); finish_wait(&bitmap->overflow_wait, &__wait); continue; } switch (*bmc) { case 0: md_bitmap_file_set_bit(bitmap, offset); md_bitmap_count_page(&bitmap->counts, offset, 1); fallthrough; case 1: *bmc = 2; } (*bmc)++; spin_unlock_irq(&bitmap->counts.lock); offset += blocks; if (sectors > blocks) sectors -= blocks; else sectors = 0; } return 0; } static void bitmap_endwrite(struct mddev *mddev, sector_t offset, unsigned long sectors, bool success, bool behind) { struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return; if (behind) { if (atomic_dec_and_test(&bitmap->behind_writes)) wake_up(&bitmap->behind_wait); pr_debug("dec write-behind count %d/%lu\n", atomic_read(&bitmap->behind_writes), bitmap->mddev->bitmap_info.max_write_behind); } while (sectors) { sector_t blocks; unsigned long flags; bitmap_counter_t *bmc; spin_lock_irqsave(&bitmap->counts.lock, flags); bmc = md_bitmap_get_counter(&bitmap->counts, offset, &blocks, 0); if (!bmc) { spin_unlock_irqrestore(&bitmap->counts.lock, flags); return; } if (success && !bitmap->mddev->degraded && bitmap->events_cleared < bitmap->mddev->events) { bitmap->events_cleared = bitmap->mddev->events; bitmap->need_sync = 1; sysfs_notify_dirent_safe(bitmap->sysfs_can_clear); } if (!success && !NEEDED(*bmc)) *bmc |= NEEDED_MASK; if (COUNTER(*bmc) == COUNTER_MAX) wake_up(&bitmap->overflow_wait); (*bmc)--; if (*bmc <= 2) { md_bitmap_set_pending(&bitmap->counts, offset); bitmap->allclean = 0; } spin_unlock_irqrestore(&bitmap->counts.lock, flags); offset += blocks; if (sectors > blocks) sectors -= blocks; else sectors = 0; } } static bool __bitmap_start_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, bool degraded) { bitmap_counter_t *bmc; bool rv; if (bitmap == NULL) {/* FIXME or bitmap set as 'failed' */ *blocks = 1024; return true; /* always resync if no bitmap */ } spin_lock_irq(&bitmap->counts.lock); rv = false; bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); if (bmc) { /* locked */ if (RESYNC(*bmc)) { rv = true; } else if (NEEDED(*bmc)) { rv = true; if (!degraded) { /* don't set/clear bits if degraded */ *bmc |= RESYNC_MASK; *bmc &= ~NEEDED_MASK; } } } spin_unlock_irq(&bitmap->counts.lock); return rv; } static bool bitmap_start_sync(struct mddev *mddev, sector_t offset, sector_t *blocks, bool degraded) { /* bitmap_start_sync must always report on multiples of whole * pages, otherwise resync (which is very PAGE_SIZE based) will * get confused. * So call __bitmap_start_sync repeatedly (if needed) until * At least PAGE_SIZE>>9 blocks are covered. * Return the 'or' of the result. */ bool rv = false; sector_t blocks1; *blocks = 0; while (*blocks < (PAGE_SIZE>>9)) { rv |= __bitmap_start_sync(mddev->bitmap, offset, &blocks1, degraded); offset += blocks1; *blocks += blocks1; } return rv; } static void __bitmap_end_sync(struct bitmap *bitmap, sector_t offset, sector_t *blocks, bool aborted) { bitmap_counter_t *bmc; unsigned long flags; if (bitmap == NULL) { *blocks = 1024; return; } spin_lock_irqsave(&bitmap->counts.lock, flags); bmc = md_bitmap_get_counter(&bitmap->counts, offset, blocks, 0); if (bmc == NULL) goto unlock; /* locked */ if (RESYNC(*bmc)) { *bmc &= ~RESYNC_MASK; if (!NEEDED(*bmc) && aborted) *bmc |= NEEDED_MASK; else { if (*bmc <= 2) { md_bitmap_set_pending(&bitmap->counts, offset); bitmap->allclean = 0; } } } unlock: spin_unlock_irqrestore(&bitmap->counts.lock, flags); } static void bitmap_end_sync(struct mddev *mddev, sector_t offset, sector_t *blocks) { __bitmap_end_sync(mddev->bitmap, offset, blocks, true); } static void bitmap_close_sync(struct mddev *mddev) { /* Sync has finished, and any bitmap chunks that weren't synced * properly have been aborted. It remains to us to clear the * RESYNC bit wherever it is still on */ sector_t sector = 0; sector_t blocks; struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return; while (sector < bitmap->mddev->resync_max_sectors) { __bitmap_end_sync(bitmap, sector, &blocks, false); sector += blocks; } } static void bitmap_cond_end_sync(struct mddev *mddev, sector_t sector, bool force) { sector_t s = 0; sector_t blocks; struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return; if (sector == 0) { bitmap->last_end_sync = jiffies; return; } if (!force && time_before(jiffies, (bitmap->last_end_sync + bitmap->mddev->bitmap_info.daemon_sleep))) return; wait_event(bitmap->mddev->recovery_wait, atomic_read(&bitmap->mddev->recovery_active) == 0); bitmap->mddev->curr_resync_completed = sector; set_bit(MD_SB_CHANGE_CLEAN, &bitmap->mddev->sb_flags); sector &= ~((1ULL << bitmap->counts.chunkshift) - 1); s = 0; while (s < sector && s < bitmap->mddev->resync_max_sectors) { __bitmap_end_sync(bitmap, s, &blocks, false); s += blocks; } bitmap->last_end_sync = jiffies; sysfs_notify_dirent_safe(bitmap->mddev->sysfs_completed); } static void bitmap_sync_with_cluster(struct mddev *mddev, sector_t old_lo, sector_t old_hi, sector_t new_lo, sector_t new_hi) { struct bitmap *bitmap = mddev->bitmap; sector_t sector, blocks = 0; for (sector = old_lo; sector < new_lo; ) { __bitmap_end_sync(bitmap, sector, &blocks, false); sector += blocks; } WARN((blocks > new_lo) && old_lo, "alignment is not correct for lo\n"); for (sector = old_hi; sector < new_hi; ) { bitmap_start_sync(mddev, sector, &blocks, false); sector += blocks; } WARN((blocks > new_hi) && old_hi, "alignment is not correct for hi\n"); } static void md_bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) { /* For each chunk covered by any of these sectors, set the * counter to 2 and possibly set resync_needed. They should all * be 0 at this point */ sector_t secs; bitmap_counter_t *bmc; spin_lock_irq(&bitmap->counts.lock); bmc = md_bitmap_get_counter(&bitmap->counts, offset, &secs, 1); if (!bmc) { spin_unlock_irq(&bitmap->counts.lock); return; } if (!*bmc) { *bmc = 2; md_bitmap_count_page(&bitmap->counts, offset, 1); md_bitmap_set_pending(&bitmap->counts, offset); bitmap->allclean = 0; } if (needed) *bmc |= NEEDED_MASK; spin_unlock_irq(&bitmap->counts.lock); } /* dirty the memory and file bits for bitmap chunks "s" to "e" */ static void bitmap_dirty_bits(struct mddev *mddev, unsigned long s, unsigned long e) { unsigned long chunk; struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return; for (chunk = s; chunk <= e; chunk++) { sector_t sec = (sector_t)chunk << bitmap->counts.chunkshift; md_bitmap_set_memory_bits(bitmap, sec, 1); md_bitmap_file_set_bit(bitmap, sec); if (sec < bitmap->mddev->recovery_cp) /* We are asserting that the array is dirty, * so move the recovery_cp address back so * that it is obvious that it is dirty */ bitmap->mddev->recovery_cp = sec; } } static void bitmap_flush(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; long sleep; if (!bitmap) /* there was no bitmap */ return; /* run the daemon_work three time to ensure everything is flushed * that can be */ sleep = mddev->bitmap_info.daemon_sleep * 2; bitmap->daemon_lastrun -= sleep; bitmap_daemon_work(mddev); bitmap->daemon_lastrun -= sleep; bitmap_daemon_work(mddev); bitmap->daemon_lastrun -= sleep; bitmap_daemon_work(mddev); if (mddev->bitmap_info.external) md_super_wait(mddev); bitmap_update_sb(bitmap); } static void md_bitmap_free(void *data) { unsigned long k, pages; struct bitmap_page *bp; struct bitmap *bitmap = data; if (!bitmap) /* there was no bitmap */ return; if (bitmap->sysfs_can_clear) sysfs_put(bitmap->sysfs_can_clear); if (mddev_is_clustered(bitmap->mddev) && bitmap->mddev->cluster_info && bitmap->cluster_slot == md_cluster_ops->slot_number(bitmap->mddev)) md_cluster_stop(bitmap->mddev); /* Shouldn't be needed - but just in case.... */ wait_event(bitmap->write_wait, atomic_read(&bitmap->pending_writes) == 0); /* release the bitmap file */ md_bitmap_file_unmap(&bitmap->storage); bp = bitmap->counts.bp; pages = bitmap->counts.pages; /* free all allocated memory */ if (bp) /* deallocate the page memory */ for (k = 0; k < pages; k++) if (bp[k].map && !bp[k].hijacked) kfree(bp[k].map); kfree(bp); kfree(bitmap); } static void bitmap_wait_behind_writes(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; /* wait for behind writes to complete */ if (bitmap && atomic_read(&bitmap->behind_writes) > 0) { pr_debug("md:%s: behind writes in progress - waiting to stop.\n", mdname(mddev)); /* need to kick something here to make sure I/O goes? */ wait_event(bitmap->behind_wait, atomic_read(&bitmap->behind_writes) == 0); } } static void bitmap_destroy(struct mddev *mddev) { struct bitmap *bitmap = mddev->bitmap; if (!bitmap) /* there was no bitmap */ return; bitmap_wait_behind_writes(mddev); if (!mddev->serialize_policy) mddev_destroy_serial_pool(mddev, NULL); mutex_lock(&mddev->bitmap_info.mutex); spin_lock(&mddev->lock); mddev->bitmap = NULL; /* disconnect from the md device */ spin_unlock(&mddev->lock); mutex_unlock(&mddev->bitmap_info.mutex); mddev_set_timeout(mddev, MAX_SCHEDULE_TIMEOUT, true); md_bitmap_free(bitmap); } /* * initialize the bitmap structure * if this returns an error, bitmap_destroy must be called to do clean up * once mddev->bitmap is set */ static struct bitmap *__bitmap_create(struct mddev *mddev, int slot) { struct bitmap *bitmap; sector_t blocks = mddev->resync_max_sectors; struct file *file = mddev->bitmap_info.file; int err; struct kernfs_node *bm = NULL; BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); BUG_ON(file && mddev->bitmap_info.offset); if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { pr_notice("md/raid:%s: array with journal cannot have bitmap\n", mdname(mddev)); return ERR_PTR(-EBUSY); } bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); if (!bitmap) return ERR_PTR(-ENOMEM); spin_lock_init(&bitmap->counts.lock); atomic_set(&bitmap->pending_writes, 0); init_waitqueue_head(&bitmap->write_wait); init_waitqueue_head(&bitmap->overflow_wait); init_waitqueue_head(&bitmap->behind_wait); bitmap->mddev = mddev; bitmap->cluster_slot = slot; if (mddev->kobj.sd) bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap"); if (bm) { bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear"); sysfs_put(bm); } else bitmap->sysfs_can_clear = NULL; bitmap->storage.file = file; if (file) { get_file(file); /* As future accesses to this file will use bmap, * and bypass the page cache, we must sync the file * first. */ vfs_fsync(file, 1); } /* read superblock from bitmap file (this sets mddev->bitmap_info.chunksize) */ if (!mddev->bitmap_info.external) { /* * If 'MD_ARRAY_FIRST_USE' is set, then device-mapper is * instructing us to create a new on-disk bitmap instance. */ if (test_and_clear_bit(MD_ARRAY_FIRST_USE, &mddev->flags)) err = md_bitmap_new_disk_sb(bitmap); else err = md_bitmap_read_sb(bitmap); } else { err = 0; if (mddev->bitmap_info.chunksize == 0 || mddev->bitmap_info.daemon_sleep == 0) /* chunksize and time_base need to be * set first. */ err = -EINVAL; } if (err) goto error; bitmap->daemon_lastrun = jiffies; err = __bitmap_resize(bitmap, blocks, mddev->bitmap_info.chunksize, true); if (err) goto error; pr_debug("created bitmap (%lu pages) for device %s\n", bitmap->counts.pages, bmname(bitmap)); err = test_bit(BITMAP_WRITE_ERROR, &bitmap->flags) ? -EIO : 0; if (err) goto error; return bitmap; error: md_bitmap_free(bitmap); return ERR_PTR(err); } static int bitmap_create(struct mddev *mddev, int slot) { struct bitmap *bitmap = __bitmap_create(mddev, slot); if (IS_ERR(bitmap)) return PTR_ERR(bitmap); mddev->bitmap = bitmap; return 0; } static int bitmap_load(struct mddev *mddev) { int err = 0; sector_t start = 0; sector_t sector = 0; struct bitmap *bitmap = mddev->bitmap; struct md_rdev *rdev; if (!bitmap) goto out; rdev_for_each(rdev, mddev) mddev_create_serial_pool(mddev, rdev); if (mddev_is_clustered(mddev)) md_cluster_ops->load_bitmaps(mddev, mddev->bitmap_info.nodes); /* Clear out old bitmap info first: Either there is none, or we * are resuming after someone else has possibly changed things, * so we should forget old cached info. * All chunks should be clean, but some might need_sync. */ while (sector < mddev->resync_max_sectors) { sector_t blocks; bitmap_start_sync(mddev, sector, &blocks, false); sector += blocks; } bitmap_close_sync(mddev); if (mddev->degraded == 0 || bitmap->events_cleared == mddev->events) /* no need to keep dirty bits to optimise a * re-add of a missing device */ start = mddev->recovery_cp; mutex_lock(&mddev->bitmap_info.mutex); err = md_bitmap_init_from_disk(bitmap, start); mutex_unlock(&mddev->bitmap_info.mutex); if (err) goto out; clear_bit(BITMAP_STALE, &bitmap->flags); /* Kick recovery in case any bits were set */ set_bit(MD_RECOVERY_NEEDED, &bitmap->mddev->recovery); mddev_set_timeout(mddev, mddev->bitmap_info.daemon_sleep, true); md_wakeup_thread(mddev->thread); bitmap_update_sb(bitmap); if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) err = -EIO; out: return err; } /* caller need to free returned bitmap with md_bitmap_free() */ static void *bitmap_get_from_slot(struct mddev *mddev, int slot) { int rv = 0; struct bitmap *bitmap; bitmap = __bitmap_create(mddev, slot); if (IS_ERR(bitmap)) { rv = PTR_ERR(bitmap); return ERR_PTR(rv); } rv = md_bitmap_init_from_disk(bitmap, 0); if (rv) { md_bitmap_free(bitmap); return ERR_PTR(rv); } return bitmap; } /* Loads the bitmap associated with slot and copies the resync information * to our bitmap */ static int bitmap_copy_from_slot(struct mddev *mddev, int slot, sector_t *low, sector_t *high, bool clear_bits) { int rv = 0, i, j; sector_t block, lo = 0, hi = 0; struct bitmap_counts *counts; struct bitmap *bitmap; bitmap = bitmap_get_from_slot(mddev, slot); if (IS_ERR(bitmap)) { pr_err("%s can't get bitmap from slot %d\n", __func__, slot); return -1; } counts = &bitmap->counts; for (j = 0; j < counts->chunks; j++) { block = (sector_t)j << counts->chunkshift; if (md_bitmap_file_test_bit(bitmap, block)) { if (!lo) lo = block; hi = block; md_bitmap_file_clear_bit(bitmap, block); md_bitmap_set_memory_bits(mddev->bitmap, block, 1); md_bitmap_file_set_bit(mddev->bitmap, block); } } if (clear_bits) { bitmap_update_sb(bitmap); /* BITMAP_PAGE_PENDING is set, but bitmap_unplug needs * BITMAP_PAGE_DIRTY or _NEEDWRITE to write ... */ for (i = 0; i < bitmap->storage.file_pages; i++) if (test_page_attr(bitmap, i, BITMAP_PAGE_PENDING)) set_page_attr(bitmap, i, BITMAP_PAGE_NEEDWRITE); __bitmap_unplug(bitmap); } __bitmap_unplug(mddev->bitmap); *low = lo; *high = hi; md_bitmap_free(bitmap); return rv; } static void bitmap_set_pages(void *data, unsigned long pages) { struct bitmap *bitmap = data; bitmap->counts.pages = pages; } static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats) { struct bitmap_storage *storage; struct bitmap_counts *counts; struct bitmap *bitmap = data; bitmap_super_t *sb; if (!bitmap) return -ENOENT; sb = kmap_local_page(bitmap->storage.sb_page); stats->sync_size = le64_to_cpu(sb->sync_size); kunmap_local(sb); counts = &bitmap->counts; stats->missing_pages = counts->missing_pages; stats->pages = counts->pages; storage = &bitmap->storage; stats->file_pages = storage->file_pages; stats->file = storage->file; stats->behind_writes = atomic_read(&bitmap->behind_writes); stats->behind_wait = wq_has_sleeper(&bitmap->behind_wait); stats->events_cleared = bitmap->events_cleared; return 0; } static int __bitmap_resize(struct bitmap *bitmap, sector_t blocks, int chunksize, bool init) { /* If chunk_size is 0, choose an appropriate chunk size. * Then possibly allocate new storage space. * Then quiesce, copy bits, replace bitmap, and re-start * * This function is called both to set up the initial bitmap * and to resize the bitmap while the array is active. * If this happens as a result of the array being resized, * chunksize will be zero, and we need to choose a suitable * chunksize, otherwise we use what we are given. */ struct bitmap_storage store; struct bitmap_counts old_counts; unsigned long chunks; sector_t block; sector_t old_blocks, new_blocks; int chunkshift; int ret = 0; long pages; struct bitmap_page *new_bp; if (bitmap->storage.file && !init) { pr_info("md: cannot resize file-based bitmap\n"); return -EINVAL; } if (chunksize == 0) { /* If there is enough space, leave the chunk size unchanged, * else increase by factor of two until there is enough space. */ long bytes; long space = bitmap->mddev->bitmap_info.space; if (space == 0) { /* We don't know how much space there is, so limit * to current size - in sectors. */ bytes = DIV_ROUND_UP(bitmap->counts.chunks, 8); if (!bitmap->mddev->bitmap_info.external) bytes += sizeof(bitmap_super_t); space = DIV_ROUND_UP(bytes, 512); bitmap->mddev->bitmap_info.space = space; } chunkshift = bitmap->counts.chunkshift; chunkshift--; do { /* 'chunkshift' is shift from block size to chunk size */ chunkshift++; chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); bytes = DIV_ROUND_UP(chunks, 8); if (!bitmap->mddev->bitmap_info.external) bytes += sizeof(bitmap_super_t); } while (bytes > (space << 9) && (chunkshift + BITMAP_BLOCK_SHIFT) < (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize) - 1)); } else chunkshift = ffz(~chunksize) - BITMAP_BLOCK_SHIFT; chunks = DIV_ROUND_UP_SECTOR_T(blocks, 1 << chunkshift); memset(&store, 0, sizeof(store)); if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file) ret = md_bitmap_storage_alloc(&store, chunks, !bitmap->mddev->bitmap_info.external, mddev_is_clustered(bitmap->mddev) ? bitmap->cluster_slot : 0); if (ret) { md_bitmap_file_unmap(&store); goto err; } pages = DIV_ROUND_UP(chunks, PAGE_COUNTER_RATIO); new_bp = kcalloc(pages, sizeof(*new_bp), GFP_KERNEL); ret = -ENOMEM; if (!new_bp) { md_bitmap_file_unmap(&store); goto err; } if (!init) bitmap->mddev->pers->quiesce(bitmap->mddev, 1); store.file = bitmap->storage.file; bitmap->storage.file = NULL; if (store.sb_page && bitmap->storage.sb_page) memcpy(page_address(store.sb_page), page_address(bitmap->storage.sb_page), sizeof(bitmap_super_t)); spin_lock_irq(&bitmap->counts.lock); md_bitmap_file_unmap(&bitmap->storage); bitmap->storage = store; old_counts = bitmap->counts; bitmap->counts.bp = new_bp; bitmap->counts.pages = pages; bitmap->counts.missing_pages = pages; bitmap->counts.chunkshift = chunkshift; bitmap->counts.chunks = chunks; bitmap->mddev->bitmap_info.chunksize = 1UL << (chunkshift + BITMAP_BLOCK_SHIFT); blocks = min(old_counts.chunks << old_counts.chunkshift, chunks << chunkshift); /* For cluster raid, need to pre-allocate bitmap */ if (mddev_is_clustered(bitmap->mddev)) { unsigned long page; for (page = 0; page < pages; page++) { ret = md_bitmap_checkpage(&bitmap->counts, page, 1, 1); if (ret) { unsigned long k; /* deallocate the page memory */ for (k = 0; k < page; k++) { kfree(new_bp[k].map); } kfree(new_bp); /* restore some fields from old_counts */ bitmap->counts.bp = old_counts.bp; bitmap->counts.pages = old_counts.pages; bitmap->counts.missing_pages = old_counts.pages; bitmap->counts.chunkshift = old_counts.chunkshift; bitmap->counts.chunks = old_counts.chunks; bitmap->mddev->bitmap_info.chunksize = 1UL << (old_counts.chunkshift + BITMAP_BLOCK_SHIFT); blocks = old_counts.chunks << old_counts.chunkshift; pr_warn("Could not pre-allocate in-memory bitmap for cluster raid\n"); break; } else bitmap->counts.bp[page].count += 1; } } for (block = 0; block < blocks; ) { bitmap_counter_t *bmc_old, *bmc_new; int set; bmc_old = md_bitmap_get_counter(&old_counts, block, &old_blocks, 0); set = bmc_old && NEEDED(*bmc_old); if (set) { bmc_new = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); if (bmc_new) { if (*bmc_new == 0) { /* need to set on-disk bits too. */ sector_t end = block + new_blocks; sector_t start = block >> chunkshift; start <<= chunkshift; while (start < end) { md_bitmap_file_set_bit(bitmap, block); start += 1 << chunkshift; } *bmc_new = 2; md_bitmap_count_page(&bitmap->counts, block, 1); md_bitmap_set_pending(&bitmap->counts, block); } *bmc_new |= NEEDED_MASK; } if (new_blocks < old_blocks) old_blocks = new_blocks; } block += old_blocks; } if (bitmap->counts.bp != old_counts.bp) { unsigned long k; for (k = 0; k < old_counts.pages; k++) if (!old_counts.bp[k].hijacked) kfree(old_counts.bp[k].map); kfree(old_counts.bp); } if (!init) { int i; while (block < (chunks << chunkshift)) { bitmap_counter_t *bmc; bmc = md_bitmap_get_counter(&bitmap->counts, block, &new_blocks, 1); if (bmc) { /* new space. It needs to be resynced, so * we set NEEDED_MASK. */ if (*bmc == 0) { *bmc = NEEDED_MASK | 2; md_bitmap_count_page(&bitmap->counts, block, 1); md_bitmap_set_pending(&bitmap->counts, block); } } block += new_blocks; } for (i = 0; i < bitmap->storage.file_pages; i++) set_page_attr(bitmap, i, BITMAP_PAGE_DIRTY); } spin_unlock_irq(&bitmap->counts.lock); if (!init) { __bitmap_unplug(bitmap); bitmap->mddev->pers->quiesce(bitmap->mddev, 0); } ret = 0; err: return ret; } static int bitmap_resize(struct mddev *mddev, sector_t blocks, int chunksize, bool init) { struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return 0; return __bitmap_resize(bitmap, blocks, chunksize, init); } static ssize_t location_show(struct mddev *mddev, char *page) { ssize_t len; if (mddev->bitmap_info.file) len = sprintf(page, "file"); else if (mddev->bitmap_info.offset) len = sprintf(page, "%+lld", (long long)mddev->bitmap_info.offset); else len = sprintf(page, "none"); len += sprintf(page+len, "\n"); return len; } static ssize_t location_store(struct mddev *mddev, const char *buf, size_t len) { int rv; rv = mddev_suspend_and_lock(mddev); if (rv) return rv; if (mddev->pers) { if (mddev->recovery || mddev->sync_thread) { rv = -EBUSY; goto out; } } if (mddev->bitmap || mddev->bitmap_info.file || mddev->bitmap_info.offset) { /* bitmap already configured. Only option is to clear it */ if (strncmp(buf, "none", 4) != 0) { rv = -EBUSY; goto out; } bitmap_destroy(mddev); mddev->bitmap_info.offset = 0; if (mddev->bitmap_info.file) { struct file *f = mddev->bitmap_info.file; mddev->bitmap_info.file = NULL; fput(f); } } else { /* No bitmap, OK to set a location */ long long offset; if (strncmp(buf, "none", 4) == 0) /* nothing to be done */; else if (strncmp(buf, "file:", 5) == 0) { /* Not supported yet */ rv = -EINVAL; goto out; } else { if (buf[0] == '+') rv = kstrtoll(buf+1, 10, &offset); else rv = kstrtoll(buf, 10, &offset); if (rv) goto out; if (offset == 0) { rv = -EINVAL; goto out; } if (mddev->bitmap_info.external == 0 && mddev->major_version == 0 && offset != mddev->bitmap_info.default_offset) { rv = -EINVAL; goto out; } mddev->bitmap_info.offset = offset; rv = bitmap_create(mddev, -1); if (rv) goto out; rv = bitmap_load(mddev); if (rv) { mddev->bitmap_info.offset = 0; bitmap_destroy(mddev); goto out; } } } if (!mddev->external) { /* Ensure new bitmap info is stored in * metadata promptly. */ set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); md_wakeup_thread(mddev->thread); } rv = 0; out: mddev_unlock_and_resume(mddev); if (rv) return rv; return len; } static struct md_sysfs_entry bitmap_location = __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); /* 'bitmap/space' is the space available at 'location' for the * bitmap. This allows the kernel to know when it is safe to * resize the bitmap to match a resized array. */ static ssize_t space_show(struct mddev *mddev, char *page) { return sprintf(page, "%lu\n", mddev->bitmap_info.space); } static ssize_t space_store(struct mddev *mddev, const char *buf, size_t len) { struct bitmap *bitmap; unsigned long sectors; int rv; rv = kstrtoul(buf, 10, §ors); if (rv) return rv; if (sectors == 0) return -EINVAL; bitmap = mddev->bitmap; if (bitmap && sectors < (bitmap->storage.bytes + 511) >> 9) return -EFBIG; /* Bitmap is too big for this small space */ /* could make sure it isn't too big, but that isn't really * needed - user-space should be careful. */ mddev->bitmap_info.space = sectors; return len; } static struct md_sysfs_entry bitmap_space = __ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store); static ssize_t timeout_show(struct mddev *mddev, char *page) { ssize_t len; unsigned long secs = mddev->bitmap_info.daemon_sleep / HZ; unsigned long jifs = mddev->bitmap_info.daemon_sleep % HZ; len = sprintf(page, "%lu", secs); if (jifs) len += sprintf(page+len, ".%03u", jiffies_to_msecs(jifs)); len += sprintf(page+len, "\n"); return len; } static ssize_t timeout_store(struct mddev *mddev, const char *buf, size_t len) { /* timeout can be set at any time */ unsigned long timeout; int rv = strict_strtoul_scaled(buf, &timeout, 4); if (rv) return rv; /* just to make sure we don't overflow... */ if (timeout >= LONG_MAX / HZ) return -EINVAL; timeout = timeout * HZ / 10000; if (timeout >= MAX_SCHEDULE_TIMEOUT) timeout = MAX_SCHEDULE_TIMEOUT-1; if (timeout < 1) timeout = 1; mddev->bitmap_info.daemon_sleep = timeout; mddev_set_timeout(mddev, timeout, false); md_wakeup_thread(mddev->thread); return len; } static struct md_sysfs_entry bitmap_timeout = __ATTR(time_base, S_IRUGO|S_IWUSR, timeout_show, timeout_store); static ssize_t backlog_show(struct mddev *mddev, char *page) { return sprintf(page, "%lu\n", mddev->bitmap_info.max_write_behind); } static ssize_t backlog_store(struct mddev *mddev, const char *buf, size_t len) { unsigned long backlog; unsigned long old_mwb = mddev->bitmap_info.max_write_behind; struct md_rdev *rdev; bool has_write_mostly = false; int rv = kstrtoul(buf, 10, &backlog); if (rv) return rv; if (backlog > COUNTER_MAX) return -EINVAL; rv = mddev_suspend_and_lock(mddev); if (rv) return rv; /* * Without write mostly device, it doesn't make sense to set * backlog for max_write_behind. */ rdev_for_each(rdev, mddev) { if (test_bit(WriteMostly, &rdev->flags)) { has_write_mostly = true; break; } } if (!has_write_mostly) { pr_warn_ratelimited("%s: can't set backlog, no write mostly device available\n", mdname(mddev)); mddev_unlock(mddev); return -EINVAL; } mddev->bitmap_info.max_write_behind = backlog; if (!backlog && mddev->serial_info_pool) { /* serial_info_pool is not needed if backlog is zero */ if (!mddev->serialize_policy) mddev_destroy_serial_pool(mddev, NULL); } else if (backlog && !mddev->serial_info_pool) { /* serial_info_pool is needed since backlog is not zero */ rdev_for_each(rdev, mddev) mddev_create_serial_pool(mddev, rdev); } if (old_mwb != backlog) bitmap_update_sb(mddev->bitmap); mddev_unlock_and_resume(mddev); return len; } static struct md_sysfs_entry bitmap_backlog = __ATTR(backlog, S_IRUGO|S_IWUSR, backlog_show, backlog_store); static ssize_t chunksize_show(struct mddev *mddev, char *page) { return sprintf(page, "%lu\n", mddev->bitmap_info.chunksize); } static ssize_t chunksize_store(struct mddev *mddev, const char *buf, size_t len) { /* Can only be changed when no bitmap is active */ int rv; unsigned long csize; if (mddev->bitmap) return -EBUSY; rv = kstrtoul(buf, 10, &csize); if (rv) return rv; if (csize < 512 || !is_power_of_2(csize)) return -EINVAL; if (BITS_PER_LONG > 32 && csize >= (1ULL << (BITS_PER_BYTE * sizeof(((bitmap_super_t *)0)->chunksize)))) return -EOVERFLOW; mddev->bitmap_info.chunksize = csize; return len; } static struct md_sysfs_entry bitmap_chunksize = __ATTR(chunksize, S_IRUGO|S_IWUSR, chunksize_show, chunksize_store); static ssize_t metadata_show(struct mddev *mddev, char *page) { if (mddev_is_clustered(mddev)) return sprintf(page, "clustered\n"); return sprintf(page, "%s\n", (mddev->bitmap_info.external ? "external" : "internal")); } static ssize_t metadata_store(struct mddev *mddev, const char *buf, size_t len) { if (mddev->bitmap || mddev->bitmap_info.file || mddev->bitmap_info.offset) return -EBUSY; if (strncmp(buf, "external", 8) == 0) mddev->bitmap_info.external = 1; else if ((strncmp(buf, "internal", 8) == 0) || (strncmp(buf, "clustered", 9) == 0)) mddev->bitmap_info.external = 0; else return -EINVAL; return len; } static struct md_sysfs_entry bitmap_metadata = __ATTR(metadata, S_IRUGO|S_IWUSR, metadata_show, metadata_store); static ssize_t can_clear_show(struct mddev *mddev, char *page) { int len; struct bitmap *bitmap; spin_lock(&mddev->lock); bitmap = mddev->bitmap; if (bitmap) len = sprintf(page, "%s\n", (bitmap->need_sync ? "false" : "true")); else len = sprintf(page, "\n"); spin_unlock(&mddev->lock); return len; } static ssize_t can_clear_store(struct mddev *mddev, const char *buf, size_t len) { struct bitmap *bitmap = mddev->bitmap; if (!bitmap) return -ENOENT; if (strncmp(buf, "false", 5) == 0) { bitmap->need_sync = 1; return len; } if (strncmp(buf, "true", 4) == 0) { if (mddev->degraded) return -EBUSY; bitmap->need_sync = 0; return len; } return -EINVAL; } static struct md_sysfs_entry bitmap_can_clear = __ATTR(can_clear, S_IRUGO|S_IWUSR, can_clear_show, can_clear_store); static ssize_t behind_writes_used_show(struct mddev *mddev, char *page) { ssize_t ret; struct bitmap *bitmap; spin_lock(&mddev->lock); bitmap = mddev->bitmap; if (!bitmap) ret = sprintf(page, "0\n"); else ret = sprintf(page, "%lu\n", bitmap->behind_writes_used); spin_unlock(&mddev->lock); return ret; } static ssize_t behind_writes_used_reset(struct mddev *mddev, const char *buf, size_t len) { struct bitmap *bitmap = mddev->bitmap; if (bitmap) bitmap->behind_writes_used = 0; return len; } static struct md_sysfs_entry max_backlog_used = __ATTR(max_backlog_used, S_IRUGO | S_IWUSR, behind_writes_used_show, behind_writes_used_reset); static struct attribute *md_bitmap_attrs[] = { &bitmap_location.attr, &bitmap_space.attr, &bitmap_timeout.attr, &bitmap_backlog.attr, &bitmap_chunksize.attr, &bitmap_metadata.attr, &bitmap_can_clear.attr, &max_backlog_used.attr, NULL }; const struct attribute_group md_bitmap_group = { .name = "bitmap", .attrs = md_bitmap_attrs, }; static struct bitmap_operations bitmap_ops = { .enabled = bitmap_enabled, .create = bitmap_create, .resize = bitmap_resize, .load = bitmap_load, .destroy = bitmap_destroy, .flush = bitmap_flush, .write_all = bitmap_write_all, .dirty_bits = bitmap_dirty_bits, .unplug = bitmap_unplug, .daemon_work = bitmap_daemon_work, .wait_behind_writes = bitmap_wait_behind_writes, .startwrite = bitmap_startwrite, .endwrite = bitmap_endwrite, .start_sync = bitmap_start_sync, .end_sync = bitmap_end_sync, .cond_end_sync = bitmap_cond_end_sync, .close_sync = bitmap_close_sync, .update_sb = bitmap_update_sb, .get_stats = bitmap_get_stats, .sync_with_cluster = bitmap_sync_with_cluster, .get_from_slot = bitmap_get_from_slot, .copy_from_slot = bitmap_copy_from_slot, .set_pages = bitmap_set_pages, .free = md_bitmap_free, }; void mddev_set_bitmap_ops(struct mddev *mddev) { mddev->bitmap_ops = &bitmap_ops; } |
28 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 | // SPDX-License-Identifier: BSD-3-Clause /* * linux/net/sunrpc/auth_gss/auth_gss.c * * RPCSEC_GSS client authentication. * * Copyright (c) 2000 The Regents of the University of Michigan. * All rights reserved. * * Dug Song <dugsong@monkey.org> * Andy Adamson <andros@umich.edu> */ #include <linux/module.h> #include <linux/init.h> #include <linux/types.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/pagemap.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/auth.h> #include <linux/sunrpc/auth_gss.h> #include <linux/sunrpc/gss_krb5.h> #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/gss_err.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/sunrpc/gss_api.h> #include <linux/uaccess.h> #include <linux/hashtable.h> #include "auth_gss_internal.h" #include "../netns.h" #include <trace/events/rpcgss.h> static const struct rpc_authops authgss_ops; static const struct rpc_credops gss_credops; static const struct rpc_credops gss_nullops; #define GSS_RETRY_EXPIRED 5 static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED; #define GSS_KEY_EXPIRE_TIMEO 240 static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) # define RPCDBG_FACILITY RPCDBG_AUTH #endif /* * This compile-time check verifies that we will not exceed the * slack space allotted by the client and server auth_gss code * before they call gss_wrap(). */ #define GSS_KRB5_MAX_SLACK_NEEDED \ (GSS_KRB5_TOK_HDR_LEN /* gss token header */ \ + GSS_KRB5_MAX_CKSUM_LEN /* gss token checksum */ \ + GSS_KRB5_MAX_BLOCKSIZE /* confounder */ \ + GSS_KRB5_MAX_BLOCKSIZE /* possible padding */ \ + GSS_KRB5_TOK_HDR_LEN /* encrypted hdr in v2 token */ \ + GSS_KRB5_MAX_CKSUM_LEN /* encryption hmac */ \ + XDR_UNIT * 2 /* RPC verifier */ \ + GSS_KRB5_TOK_HDR_LEN \ + GSS_KRB5_MAX_CKSUM_LEN) #define GSS_CRED_SLACK (RPC_MAX_AUTH_SIZE * 2) /* length of a krb5 verifier (48), plus data added before arguments when * using integrity (two 4-byte integers): */ #define GSS_VERF_SLACK 100 static DEFINE_HASHTABLE(gss_auth_hash_table, 4); static DEFINE_SPINLOCK(gss_auth_hash_lock); struct gss_pipe { struct rpc_pipe_dir_object pdo; struct rpc_pipe *pipe; struct rpc_clnt *clnt; const char *name; struct kref kref; }; struct gss_auth { struct kref kref; struct hlist_node hash; struct rpc_auth rpc_auth; struct gss_api_mech *mech; enum rpc_gss_svc service; struct rpc_clnt *client; struct net *net; netns_tracker ns_tracker; /* * There are two upcall pipes; dentry[1], named "gssd", is used * for the new text-based upcall; dentry[0] is named after the * mechanism (for example, "krb5") and exists for * backwards-compatibility with older gssd's. */ struct gss_pipe *gss_pipe[2]; const char *target_name; }; /* pipe_version >= 0 if and only if someone has a pipe open. */ static DEFINE_SPINLOCK(pipe_version_lock); static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); static void gss_put_auth(struct gss_auth *gss_auth); static void gss_free_ctx(struct gss_cl_ctx *); static const struct rpc_pipe_ops gss_upcall_ops_v0; static const struct rpc_pipe_ops gss_upcall_ops_v1; static inline struct gss_cl_ctx * gss_get_ctx(struct gss_cl_ctx *ctx) { refcount_inc(&ctx->count); return ctx; } static inline void gss_put_ctx(struct gss_cl_ctx *ctx) { if (refcount_dec_and_test(&ctx->count)) gss_free_ctx(ctx); } /* gss_cred_set_ctx: * called by gss_upcall_callback and gss_create_upcall in order * to set the gss context. The actual exchange of an old context * and a new one is protected by the pipe->lock. */ static void gss_cred_set_ctx(struct rpc_cred *cred, struct gss_cl_ctx *ctx) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) return; gss_get_ctx(ctx); rcu_assign_pointer(gss_cred->gc_ctx, ctx); set_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); smp_mb__before_atomic(); clear_bit(RPCAUTH_CRED_NEW, &cred->cr_flags); } static struct gss_cl_ctx * gss_cred_get_ctx(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = NULL; rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); if (ctx) gss_get_ctx(ctx); rcu_read_unlock(); return ctx; } static struct gss_cl_ctx * gss_alloc_context(void) { struct gss_cl_ctx *ctx; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx != NULL) { ctx->gc_proc = RPC_GSS_PROC_DATA; ctx->gc_seq = 1; /* NetApp 6.4R1 doesn't accept seq. no. 0 */ spin_lock_init(&ctx->gc_seq_lock); refcount_set(&ctx->count,1); } return ctx; } #define GSSD_MIN_TIMEOUT (60 * 60) static const void * gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct gss_api_mech *gm) { const void *q; unsigned int seclen; unsigned int timeout; unsigned long now = jiffies; u32 window_size; int ret; /* First unsigned int gives the remaining lifetime in seconds of the * credential - e.g. the remaining TGT lifetime for Kerberos or * the -t value passed to GSSD. */ p = simple_get_bytes(p, end, &timeout, sizeof(timeout)); if (IS_ERR(p)) goto err; if (timeout == 0) timeout = GSSD_MIN_TIMEOUT; ctx->gc_expiry = now + ((unsigned long)timeout * HZ); /* Sequence number window. Determines the maximum number of * simultaneous requests */ p = simple_get_bytes(p, end, &window_size, sizeof(window_size)); if (IS_ERR(p)) goto err; ctx->gc_win = window_size; /* gssd signals an error by passing ctx->gc_win = 0: */ if (ctx->gc_win == 0) { /* * in which case, p points to an error code. Anything other * than -EKEYEXPIRED gets converted to -EACCES. */ p = simple_get_bytes(p, end, &ret, sizeof(ret)); if (!IS_ERR(p)) p = (ret == -EKEYEXPIRED) ? ERR_PTR(-EKEYEXPIRED) : ERR_PTR(-EACCES); goto err; } /* copy the opaque wire context */ p = simple_get_netobj(p, end, &ctx->gc_wire_ctx); if (IS_ERR(p)) goto err; /* import the opaque security context */ p = simple_get_bytes(p, end, &seclen, sizeof(seclen)); if (IS_ERR(p)) goto err; q = (const void *)((const char *)p + seclen); if (unlikely(q > end || q < p)) { p = ERR_PTR(-EFAULT); goto err; } ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_KERNEL); if (ret < 0) { trace_rpcgss_import_ctx(ret); p = ERR_PTR(ret); goto err; } /* is there any trailing data? */ if (q == end) { p = q; goto done; } /* pull in acceptor name (if there is one) */ p = simple_get_netobj(q, end, &ctx->gc_acceptor); if (IS_ERR(p)) goto err; done: trace_rpcgss_context(window_size, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len, ctx->gc_acceptor.data); err: return p; } /* XXX: Need some documentation about why UPCALL_BUF_LEN is so small. * Is user space expecting no more than UPCALL_BUF_LEN bytes? * Note that there are now _two_ NI_MAXHOST sized data items * being passed in this string. */ #define UPCALL_BUF_LEN 256 struct gss_upcall_msg { refcount_t count; kuid_t uid; const char *service_name; struct rpc_pipe_msg msg; struct list_head list; struct gss_auth *auth; struct rpc_pipe *pipe; struct rpc_wait_queue rpc_waitqueue; wait_queue_head_t waitqueue; struct gss_cl_ctx *ctx; char databuf[UPCALL_BUF_LEN]; }; static int get_pipe_version(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret; spin_lock(&pipe_version_lock); if (sn->pipe_version >= 0) { atomic_inc(&sn->pipe_users); ret = sn->pipe_version; } else ret = -EAGAIN; spin_unlock(&pipe_version_lock); return ret; } static void put_pipe_version(struct net *net) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); if (atomic_dec_and_lock(&sn->pipe_users, &pipe_version_lock)) { sn->pipe_version = -1; spin_unlock(&pipe_version_lock); } } static void gss_release_msg(struct gss_upcall_msg *gss_msg) { struct net *net = gss_msg->auth->net; if (!refcount_dec_and_test(&gss_msg->count)) return; put_pipe_version(net); BUG_ON(!list_empty(&gss_msg->list)); if (gss_msg->ctx != NULL) gss_put_ctx(gss_msg->ctx); rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue); gss_put_auth(gss_msg->auth); kfree_const(gss_msg->service_name); kfree(gss_msg); } static struct gss_upcall_msg * __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth) { struct gss_upcall_msg *pos; list_for_each_entry(pos, &pipe->in_downcall, list) { if (!uid_eq(pos->uid, uid)) continue; if (pos->auth->service != auth->service) continue; refcount_inc(&pos->count); return pos; } return NULL; } /* Try to add an upcall to the pipefs queue. * If an upcall owned by our uid already exists, then we return a reference * to that upcall instead of adding the new upcall. */ static inline struct gss_upcall_msg * gss_add_msg(struct gss_upcall_msg *gss_msg) { struct rpc_pipe *pipe = gss_msg->pipe; struct gss_upcall_msg *old; spin_lock(&pipe->lock); old = __gss_find_upcall(pipe, gss_msg->uid, gss_msg->auth); if (old == NULL) { refcount_inc(&gss_msg->count); list_add(&gss_msg->list, &pipe->in_downcall); } else gss_msg = old; spin_unlock(&pipe->lock); return gss_msg; } static void __gss_unhash_msg(struct gss_upcall_msg *gss_msg) { list_del_init(&gss_msg->list); rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); wake_up_all(&gss_msg->waitqueue); refcount_dec(&gss_msg->count); } static void gss_unhash_msg(struct gss_upcall_msg *gss_msg) { struct rpc_pipe *pipe = gss_msg->pipe; if (list_empty(&gss_msg->list)) return; spin_lock(&pipe->lock); if (!list_empty(&gss_msg->list)) __gss_unhash_msg(gss_msg); spin_unlock(&pipe->lock); } static void gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg) { switch (gss_msg->msg.errno) { case 0: if (gss_msg->ctx == NULL) break; clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags); gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx); break; case -EKEYEXPIRED: set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags); } gss_cred->gc_upcall_timestamp = jiffies; gss_cred->gc_upcall = NULL; rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno); } static void gss_upcall_callback(struct rpc_task *task) { struct gss_cred *gss_cred = container_of(task->tk_rqstp->rq_cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg = gss_cred->gc_upcall; struct rpc_pipe *pipe = gss_msg->pipe; spin_lock(&pipe->lock); gss_handle_downcall_result(gss_cred, gss_msg); spin_unlock(&pipe->lock); task->tk_status = gss_msg->msg.errno; gss_release_msg(gss_msg); } static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg, const struct cred *cred) { struct user_namespace *userns = cred->user_ns; uid_t uid = from_kuid_munged(userns, gss_msg->uid); memcpy(gss_msg->databuf, &uid, sizeof(uid)); gss_msg->msg.data = gss_msg->databuf; gss_msg->msg.len = sizeof(uid); BUILD_BUG_ON(sizeof(uid) > sizeof(gss_msg->databuf)); } static ssize_t gss_v0_upcall(struct file *file, struct rpc_pipe_msg *msg, char __user *buf, size_t buflen) { struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); if (msg->copied == 0) gss_encode_v0_msg(gss_msg, file->f_cred); return rpc_pipe_generic_upcall(file, msg, buf, buflen); } static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg, const char *service_name, const char *target_name, const struct cred *cred) { struct user_namespace *userns = cred->user_ns; struct gss_api_mech *mech = gss_msg->auth->mech; char *p = gss_msg->databuf; size_t buflen = sizeof(gss_msg->databuf); int len; len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name, from_kuid_munged(userns, gss_msg->uid)); buflen -= len; p += len; gss_msg->msg.len = len; /* * target= is a full service principal that names the remote * identity that we are authenticating to. */ if (target_name) { len = scnprintf(p, buflen, " target=%s", target_name); buflen -= len; p += len; gss_msg->msg.len += len; } /* * gssd uses service= and srchost= to select a matching key from * the system's keytab to use as the source principal. * * service= is the service name part of the source principal, * or "*" (meaning choose any). * * srchost= is the hostname part of the source principal. When * not provided, gssd uses the local hostname. */ if (service_name) { char *c = strchr(service_name, '@'); if (!c) len = scnprintf(p, buflen, " service=%s", service_name); else len = scnprintf(p, buflen, " service=%.*s srchost=%s", (int)(c - service_name), service_name, c + 1); buflen -= len; p += len; gss_msg->msg.len += len; } if (mech->gm_upcall_enctypes) { len = scnprintf(p, buflen, " enctypes=%s", mech->gm_upcall_enctypes); buflen -= len; p += len; gss_msg->msg.len += len; } trace_rpcgss_upcall_msg(gss_msg->databuf); len = scnprintf(p, buflen, "\n"); if (len == 0) goto out_overflow; gss_msg->msg.len += len; gss_msg->msg.data = gss_msg->databuf; return 0; out_overflow: WARN_ON_ONCE(1); return -ENOMEM; } static ssize_t gss_v1_upcall(struct file *file, struct rpc_pipe_msg *msg, char __user *buf, size_t buflen) { struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); int err; if (msg->copied == 0) { err = gss_encode_v1_msg(gss_msg, gss_msg->service_name, gss_msg->auth->target_name, file->f_cred); if (err) return err; } return rpc_pipe_generic_upcall(file, msg, buf, buflen); } static struct gss_upcall_msg * gss_alloc_msg(struct gss_auth *gss_auth, kuid_t uid, const char *service_name) { struct gss_upcall_msg *gss_msg; int vers; int err = -ENOMEM; gss_msg = kzalloc(sizeof(*gss_msg), GFP_KERNEL); if (gss_msg == NULL) goto err; vers = get_pipe_version(gss_auth->net); err = vers; if (err < 0) goto err_free_msg; gss_msg->pipe = gss_auth->gss_pipe[vers]->pipe; INIT_LIST_HEAD(&gss_msg->list); rpc_init_wait_queue(&gss_msg->rpc_waitqueue, "RPCSEC_GSS upcall waitq"); init_waitqueue_head(&gss_msg->waitqueue); refcount_set(&gss_msg->count, 1); gss_msg->uid = uid; gss_msg->auth = gss_auth; kref_get(&gss_auth->kref); if (service_name) { gss_msg->service_name = kstrdup_const(service_name, GFP_KERNEL); if (!gss_msg->service_name) { err = -ENOMEM; goto err_put_pipe_version; } } return gss_msg; err_put_pipe_version: put_pipe_version(gss_auth->net); err_free_msg: kfree(gss_msg); err: return ERR_PTR(err); } static struct gss_upcall_msg * gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_new, *gss_msg; kuid_t uid = cred->cr_cred->fsuid; gss_new = gss_alloc_msg(gss_auth, uid, gss_cred->gc_principal); if (IS_ERR(gss_new)) return gss_new; gss_msg = gss_add_msg(gss_new); if (gss_msg == gss_new) { int res; refcount_inc(&gss_msg->count); res = rpc_queue_upcall(gss_new->pipe, &gss_new->msg); if (res) { gss_unhash_msg(gss_new); refcount_dec(&gss_msg->count); gss_release_msg(gss_new); gss_msg = ERR_PTR(res); } } else gss_release_msg(gss_new); return gss_msg; } static void warn_gssd(void) { dprintk("AUTH_GSS upcall failed. Please check user daemon is running.\n"); } static inline int gss_refresh_upcall(struct rpc_task *task) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_upcall_msg *gss_msg; struct rpc_pipe *pipe; int err = 0; gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { /* XXX: warning on the first, under the assumption we * shouldn't normally hit this case on a refresh. */ warn_gssd(); rpc_sleep_on_timeout(&pipe_version_rpc_waitqueue, task, NULL, jiffies + (15 * HZ)); err = -EAGAIN; goto out; } if (IS_ERR(gss_msg)) { err = PTR_ERR(gss_msg); goto out; } pipe = gss_msg->pipe; spin_lock(&pipe->lock); if (gss_cred->gc_upcall != NULL) rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL); else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) { gss_cred->gc_upcall = gss_msg; /* gss_upcall_callback will release the reference to gss_upcall_msg */ refcount_inc(&gss_msg->count); rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback); } else { gss_handle_downcall_result(gss_cred, gss_msg); err = gss_msg->msg.errno; } spin_unlock(&pipe->lock); gss_release_msg(gss_msg); out: trace_rpcgss_upcall_result(from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); return err; } static inline int gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { struct net *net = gss_auth->net; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); struct rpc_pipe *pipe; struct rpc_cred *cred = &gss_cred->gc_base; struct gss_upcall_msg *gss_msg; DEFINE_WAIT(wait); int err; retry: err = 0; /* if gssd is down, just skip upcalling altogether */ if (!gssd_running(net)) { warn_gssd(); err = -EACCES; goto out; } gss_msg = gss_setup_upcall(gss_auth, cred); if (PTR_ERR(gss_msg) == -EAGAIN) { err = wait_event_interruptible_timeout(pipe_version_waitqueue, sn->pipe_version >= 0, 15 * HZ); if (sn->pipe_version < 0) { warn_gssd(); err = -EACCES; } if (err < 0) goto out; goto retry; } if (IS_ERR(gss_msg)) { err = PTR_ERR(gss_msg); goto out; } pipe = gss_msg->pipe; for (;;) { prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE); spin_lock(&pipe->lock); if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) { break; } spin_unlock(&pipe->lock); if (fatal_signal_pending(current)) { err = -ERESTARTSYS; goto out_intr; } schedule(); } if (gss_msg->ctx) { trace_rpcgss_ctx_init(gss_cred); gss_cred_set_ctx(cred, gss_msg->ctx); } else { err = gss_msg->msg.errno; } spin_unlock(&pipe->lock); out_intr: finish_wait(&gss_msg->waitqueue, &wait); gss_release_msg(gss_msg); out: trace_rpcgss_upcall_result(from_kuid(&init_user_ns, cred->cr_cred->fsuid), err); return err; } static struct gss_upcall_msg * gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid) { struct gss_upcall_msg *pos; list_for_each_entry(pos, &pipe->in_downcall, list) { if (!uid_eq(pos->uid, uid)) continue; if (!rpc_msg_is_inflight(&pos->msg)) continue; refcount_inc(&pos->count); return pos; } return NULL; } #define MSG_BUF_MAXSIZE 1024 static ssize_t gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) { const void *p, *end; void *buf; struct gss_upcall_msg *gss_msg; struct rpc_pipe *pipe = RPC_I(file_inode(filp))->pipe; struct gss_cl_ctx *ctx; uid_t id; kuid_t uid; ssize_t err = -EFBIG; if (mlen > MSG_BUF_MAXSIZE) goto out; err = -ENOMEM; buf = kmalloc(mlen, GFP_KERNEL); if (!buf) goto out; err = -EFAULT; if (copy_from_user(buf, src, mlen)) goto err; end = (const void *)((char *)buf + mlen); p = simple_get_bytes(buf, end, &id, sizeof(id)); if (IS_ERR(p)) { err = PTR_ERR(p); goto err; } uid = make_kuid(current_user_ns(), id); if (!uid_valid(uid)) { err = -EINVAL; goto err; } err = -ENOMEM; ctx = gss_alloc_context(); if (ctx == NULL) goto err; err = -ENOENT; /* Find a matching upcall */ spin_lock(&pipe->lock); gss_msg = gss_find_downcall(pipe, uid); if (gss_msg == NULL) { spin_unlock(&pipe->lock); goto err_put_ctx; } list_del_init(&gss_msg->list); spin_unlock(&pipe->lock); p = gss_fill_context(p, end, ctx, gss_msg->auth->mech); if (IS_ERR(p)) { err = PTR_ERR(p); switch (err) { case -EACCES: case -EKEYEXPIRED: gss_msg->msg.errno = err; err = mlen; break; case -EFAULT: case -ENOMEM: case -EINVAL: case -ENOSYS: gss_msg->msg.errno = -EAGAIN; break; default: printk(KERN_CRIT "%s: bad return from " "gss_fill_context: %zd\n", __func__, err); gss_msg->msg.errno = -EIO; } goto err_release_msg; } gss_msg->ctx = gss_get_ctx(ctx); err = mlen; err_release_msg: spin_lock(&pipe->lock); __gss_unhash_msg(gss_msg); spin_unlock(&pipe->lock); gss_release_msg(gss_msg); err_put_ctx: gss_put_ctx(ctx); err: kfree(buf); out: return err; } static int gss_pipe_open(struct inode *inode, int new_version) { struct net *net = inode->i_sb->s_fs_info; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); int ret = 0; spin_lock(&pipe_version_lock); if (sn->pipe_version < 0) { /* First open of any gss pipe determines the version: */ sn->pipe_version = new_version; rpc_wake_up(&pipe_version_rpc_waitqueue); wake_up(&pipe_version_waitqueue); } else if (sn->pipe_version != new_version) { /* Trying to open a pipe of a different version */ ret = -EBUSY; goto out; } atomic_inc(&sn->pipe_users); out: spin_unlock(&pipe_version_lock); return ret; } static int gss_pipe_open_v0(struct inode *inode) { return gss_pipe_open(inode, 0); } static int gss_pipe_open_v1(struct inode *inode) { return gss_pipe_open(inode, 1); } static void gss_pipe_release(struct inode *inode) { struct net *net = inode->i_sb->s_fs_info; struct rpc_pipe *pipe = RPC_I(inode)->pipe; struct gss_upcall_msg *gss_msg; restart: spin_lock(&pipe->lock); list_for_each_entry(gss_msg, &pipe->in_downcall, list) { if (!list_empty(&gss_msg->msg.list)) continue; gss_msg->msg.errno = -EPIPE; refcount_inc(&gss_msg->count); __gss_unhash_msg(gss_msg); spin_unlock(&pipe->lock); gss_release_msg(gss_msg); goto restart; } spin_unlock(&pipe->lock); put_pipe_version(net); } static void gss_pipe_destroy_msg(struct rpc_pipe_msg *msg) { struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); if (msg->errno < 0) { refcount_inc(&gss_msg->count); gss_unhash_msg(gss_msg); if (msg->errno == -ETIMEDOUT) warn_gssd(); gss_release_msg(gss_msg); } gss_release_msg(gss_msg); } static void gss_pipe_dentry_destroy(struct dentry *dir, struct rpc_pipe_dir_object *pdo) { struct gss_pipe *gss_pipe = pdo->pdo_data; struct rpc_pipe *pipe = gss_pipe->pipe; if (pipe->dentry != NULL) { rpc_unlink(pipe->dentry); pipe->dentry = NULL; } } static int gss_pipe_dentry_create(struct dentry *dir, struct rpc_pipe_dir_object *pdo) { struct gss_pipe *p = pdo->pdo_data; struct dentry *dentry; dentry = rpc_mkpipe_dentry(dir, p->name, p->clnt, p->pipe); if (IS_ERR(dentry)) return PTR_ERR(dentry); p->pipe->dentry = dentry; return 0; } static const struct rpc_pipe_dir_object_ops gss_pipe_dir_object_ops = { .create = gss_pipe_dentry_create, .destroy = gss_pipe_dentry_destroy, }; static struct gss_pipe *gss_pipe_alloc(struct rpc_clnt *clnt, const char *name, const struct rpc_pipe_ops *upcall_ops) { struct gss_pipe *p; int err = -ENOMEM; p = kmalloc(sizeof(*p), GFP_KERNEL); if (p == NULL) goto err; p->pipe = rpc_mkpipe_data(upcall_ops, RPC_PIPE_WAIT_FOR_OPEN); if (IS_ERR(p->pipe)) { err = PTR_ERR(p->pipe); goto err_free_gss_pipe; } p->name = name; p->clnt = clnt; kref_init(&p->kref); rpc_init_pipe_dir_object(&p->pdo, &gss_pipe_dir_object_ops, p); return p; err_free_gss_pipe: kfree(p); err: return ERR_PTR(err); } struct gss_alloc_pdo { struct rpc_clnt *clnt; const char *name; const struct rpc_pipe_ops *upcall_ops; }; static int gss_pipe_match_pdo(struct rpc_pipe_dir_object *pdo, void *data) { struct gss_pipe *gss_pipe; struct gss_alloc_pdo *args = data; if (pdo->pdo_ops != &gss_pipe_dir_object_ops) return 0; gss_pipe = container_of(pdo, struct gss_pipe, pdo); if (strcmp(gss_pipe->name, args->name) != 0) return 0; if (!kref_get_unless_zero(&gss_pipe->kref)) return 0; return 1; } static struct rpc_pipe_dir_object *gss_pipe_alloc_pdo(void *data) { struct gss_pipe *gss_pipe; struct gss_alloc_pdo *args = data; gss_pipe = gss_pipe_alloc(args->clnt, args->name, args->upcall_ops); if (!IS_ERR(gss_pipe)) return &gss_pipe->pdo; return NULL; } static struct gss_pipe *gss_pipe_get(struct rpc_clnt *clnt, const char *name, const struct rpc_pipe_ops *upcall_ops) { struct net *net = rpc_net_ns(clnt); struct rpc_pipe_dir_object *pdo; struct gss_alloc_pdo args = { .clnt = clnt, .name = name, .upcall_ops = upcall_ops, }; pdo = rpc_find_or_alloc_pipe_dir_object(net, &clnt->cl_pipedir_objects, gss_pipe_match_pdo, gss_pipe_alloc_pdo, &args); if (pdo != NULL) return container_of(pdo, struct gss_pipe, pdo); return ERR_PTR(-ENOMEM); } static void __gss_pipe_free(struct gss_pipe *p) { struct rpc_clnt *clnt = p->clnt; struct net *net = rpc_net_ns(clnt); rpc_remove_pipe_dir_object(net, &clnt->cl_pipedir_objects, &p->pdo); rpc_destroy_pipe_data(p->pipe); kfree(p); } static void __gss_pipe_release(struct kref *kref) { struct gss_pipe *p = container_of(kref, struct gss_pipe, kref); __gss_pipe_free(p); } static void gss_pipe_free(struct gss_pipe *p) { if (p != NULL) kref_put(&p->kref, __gss_pipe_release); } /* * NOTE: we have the opportunity to use different * parameters based on the input flavor (which must be a pseudoflavor) */ static struct gss_auth * gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { rpc_authflavor_t flavor = args->pseudoflavor; struct gss_auth *gss_auth; struct gss_pipe *gss_pipe; struct rpc_auth * auth; int err = -ENOMEM; /* XXX? */ if (!try_module_get(THIS_MODULE)) return ERR_PTR(err); if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) goto out_dec; INIT_HLIST_NODE(&gss_auth->hash); gss_auth->target_name = NULL; if (args->target_name) { gss_auth->target_name = kstrdup(args->target_name, GFP_KERNEL); if (gss_auth->target_name == NULL) goto err_free; } gss_auth->client = clnt; gss_auth->net = get_net_track(rpc_net_ns(clnt), &gss_auth->ns_tracker, GFP_KERNEL); err = -EINVAL; gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor); if (!gss_auth->mech) goto err_put_net; gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) goto err_put_mech; if (!gssd_running(gss_auth->net)) goto err_put_mech; auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE); auth->au_rslack = GSS_KRB5_MAX_SLACK_NEEDED >> 2; auth->au_verfsize = GSS_VERF_SLACK >> 2; auth->au_ralign = GSS_VERF_SLACK >> 2; __set_bit(RPCAUTH_AUTH_UPDATE_SLACK, &auth->au_flags); auth->au_ops = &authgss_ops; auth->au_flavor = flavor; if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor)) __set_bit(RPCAUTH_AUTH_DATATOUCH, &auth->au_flags); refcount_set(&auth->au_count, 1); kref_init(&gss_auth->kref); err = rpcauth_init_credcache(auth); if (err) goto err_put_mech; /* * Note: if we created the old pipe first, then someone who * examined the directory at the right moment might conclude * that we supported only the old pipe. So we instead create * the new pipe first. */ gss_pipe = gss_pipe_get(clnt, "gssd", &gss_upcall_ops_v1); if (IS_ERR(gss_pipe)) { err = PTR_ERR(gss_pipe); goto err_destroy_credcache; } gss_auth->gss_pipe[1] = gss_pipe; gss_pipe = gss_pipe_get(clnt, gss_auth->mech->gm_name, &gss_upcall_ops_v0); if (IS_ERR(gss_pipe)) { err = PTR_ERR(gss_pipe); goto err_destroy_pipe_1; } gss_auth->gss_pipe[0] = gss_pipe; return gss_auth; err_destroy_pipe_1: gss_pipe_free(gss_auth->gss_pipe[1]); err_destroy_credcache: rpcauth_destroy_credcache(auth); err_put_mech: gss_mech_put(gss_auth->mech); err_put_net: put_net_track(gss_auth->net, &gss_auth->ns_tracker); err_free: kfree(gss_auth->target_name); kfree(gss_auth); out_dec: module_put(THIS_MODULE); trace_rpcgss_createauth(flavor, err); return ERR_PTR(err); } static void gss_free(struct gss_auth *gss_auth) { gss_pipe_free(gss_auth->gss_pipe[0]); gss_pipe_free(gss_auth->gss_pipe[1]); gss_mech_put(gss_auth->mech); put_net_track(gss_auth->net, &gss_auth->ns_tracker); kfree(gss_auth->target_name); kfree(gss_auth); module_put(THIS_MODULE); } static void gss_free_callback(struct kref *kref) { struct gss_auth *gss_auth = container_of(kref, struct gss_auth, kref); gss_free(gss_auth); } static void gss_put_auth(struct gss_auth *gss_auth) { kref_put(&gss_auth->kref, gss_free_callback); } static void gss_destroy(struct rpc_auth *auth) { struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); if (hash_hashed(&gss_auth->hash)) { spin_lock(&gss_auth_hash_lock); hash_del(&gss_auth->hash); spin_unlock(&gss_auth_hash_lock); } gss_pipe_free(gss_auth->gss_pipe[0]); gss_auth->gss_pipe[0] = NULL; gss_pipe_free(gss_auth->gss_pipe[1]); gss_auth->gss_pipe[1] = NULL; rpcauth_destroy_credcache(auth); gss_put_auth(gss_auth); } /* * Auths may be shared between rpc clients that were cloned from a * common client with the same xprt, if they also share the flavor and * target_name. * * The auth is looked up from the oldest parent sharing the same * cl_xprt, and the auth itself references only that common parent * (which is guaranteed to last as long as any of its descendants). */ static struct gss_auth * gss_auth_find_or_add_hashed(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt, struct gss_auth *new) { struct gss_auth *gss_auth; unsigned long hashval = (unsigned long)clnt; spin_lock(&gss_auth_hash_lock); hash_for_each_possible(gss_auth_hash_table, gss_auth, hash, hashval) { if (gss_auth->client != clnt) continue; if (gss_auth->rpc_auth.au_flavor != args->pseudoflavor) continue; if (gss_auth->target_name != args->target_name) { if (gss_auth->target_name == NULL) continue; if (args->target_name == NULL) continue; if (strcmp(gss_auth->target_name, args->target_name)) continue; } if (!refcount_inc_not_zero(&gss_auth->rpc_auth.au_count)) continue; goto out; } if (new) hash_add(gss_auth_hash_table, &new->hash, hashval); gss_auth = new; out: spin_unlock(&gss_auth_hash_lock); return gss_auth; } static struct gss_auth * gss_create_hashed(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { struct gss_auth *gss_auth; struct gss_auth *new; gss_auth = gss_auth_find_or_add_hashed(args, clnt, NULL); if (gss_auth != NULL) goto out; new = gss_create_new(args, clnt); if (IS_ERR(new)) return new; gss_auth = gss_auth_find_or_add_hashed(args, clnt, new); if (gss_auth != new) gss_destroy(&new->rpc_auth); out: return gss_auth; } static struct rpc_auth * gss_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt) { struct gss_auth *gss_auth; struct rpc_xprt_switch *xps = rcu_access_pointer(clnt->cl_xpi.xpi_xpswitch); while (clnt != clnt->cl_parent) { struct rpc_clnt *parent = clnt->cl_parent; /* Find the original parent for this transport */ if (rcu_access_pointer(parent->cl_xpi.xpi_xpswitch) != xps) break; clnt = parent; } gss_auth = gss_create_hashed(args, clnt); if (IS_ERR(gss_auth)) return ERR_CAST(gss_auth); return &gss_auth->rpc_auth; } static struct gss_cred * gss_dup_cred(struct gss_auth *gss_auth, struct gss_cred *gss_cred) { struct gss_cred *new; /* Make a copy of the cred so that we can reference count it */ new = kzalloc(sizeof(*gss_cred), GFP_KERNEL); if (new) { struct auth_cred acred = { .cred = gss_cred->gc_base.cr_cred, }; struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); rpcauth_init_cred(&new->gc_base, &acred, &gss_auth->rpc_auth, &gss_nullops); new->gc_base.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE; new->gc_service = gss_cred->gc_service; new->gc_principal = gss_cred->gc_principal; kref_get(&gss_auth->kref); rcu_assign_pointer(new->gc_ctx, ctx); gss_get_ctx(ctx); } return new; } /* * gss_send_destroy_context will cause the RPCSEC_GSS to send a NULL RPC call * to the server with the GSS control procedure field set to * RPC_GSS_PROC_DESTROY. This should normally cause the server to release * all RPCSEC_GSS state associated with that context. */ static void gss_send_destroy_context(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); struct gss_cred *new; struct rpc_task *task; new = gss_dup_cred(gss_auth, gss_cred); if (new) { ctx->gc_proc = RPC_GSS_PROC_DESTROY; trace_rpcgss_ctx_destroy(gss_cred); task = rpc_call_null(gss_auth->client, &new->gc_base, RPC_TASK_ASYNC); if (!IS_ERR(task)) rpc_put_task(task); put_rpccred(&new->gc_base); } } /* gss_destroy_cred (and gss_free_ctx) are used to clean up after failure * to create a new cred or context, so they check that things have been * allocated before freeing them. */ static void gss_do_free_ctx(struct gss_cl_ctx *ctx) { gss_delete_sec_context(&ctx->gc_gss_ctx); kfree(ctx->gc_wire_ctx.data); kfree(ctx->gc_acceptor.data); kfree(ctx); } static void gss_free_ctx_callback(struct rcu_head *head) { struct gss_cl_ctx *ctx = container_of(head, struct gss_cl_ctx, gc_rcu); gss_do_free_ctx(ctx); } static void gss_free_ctx(struct gss_cl_ctx *ctx) { call_rcu(&ctx->gc_rcu, gss_free_ctx_callback); } static void gss_free_cred(struct gss_cred *gss_cred) { kfree(gss_cred); } static void gss_free_cred_callback(struct rcu_head *head) { struct gss_cred *gss_cred = container_of(head, struct gss_cred, gc_base.cr_rcu); gss_free_cred(gss_cred); } static void gss_destroy_nullcred(struct rpc_cred *cred) { struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_auth *gss_auth = container_of(cred->cr_auth, struct gss_auth, rpc_auth); struct gss_cl_ctx *ctx = rcu_dereference_protected(gss_cred->gc_ctx, 1); RCU_INIT_POINTER(gss_cred->gc_ctx, NULL); put_cred(cred->cr_cred); call_rcu(&cred->cr_rcu, gss_free_cred_callback); if (ctx) gss_put_ctx(ctx); gss_put_auth(gss_auth); } static void gss_destroy_cred(struct rpc_cred *cred) { if (test_and_clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) gss_send_destroy_context(cred); gss_destroy_nullcred(cred); } static int gss_hash_cred(struct auth_cred *acred, unsigned int hashbits) { return hash_64(from_kuid(&init_user_ns, acred->cred->fsuid), hashbits); } /* * Lookup RPCSEC_GSS cred for the current process */ static struct rpc_cred *gss_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags) { return rpcauth_lookup_credcache(auth, acred, flags, rpc_task_gfp_mask()); } static struct rpc_cred * gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t gfp) { struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); struct gss_cred *cred = NULL; int err = -ENOMEM; if (!(cred = kzalloc(sizeof(*cred), gfp))) goto out_err; rpcauth_init_cred(&cred->gc_base, acred, auth, &gss_credops); /* * Note: in order to force a call to call_refresh(), we deliberately * fail to flag the credential as RPCAUTH_CRED_UPTODATE. */ cred->gc_base.cr_flags = 1UL << RPCAUTH_CRED_NEW; cred->gc_service = gss_auth->service; cred->gc_principal = acred->principal; kref_get(&gss_auth->kref); return &cred->gc_base; out_err: return ERR_PTR(err); } static int gss_cred_init(struct rpc_auth *auth, struct rpc_cred *cred) { struct gss_auth *gss_auth = container_of(auth, struct gss_auth, rpc_auth); struct gss_cred *gss_cred = container_of(cred,struct gss_cred, gc_base); int err; do { err = gss_create_upcall(gss_auth, gss_cred); } while (err == -EAGAIN); return err; } static char * gss_stringify_acceptor(struct rpc_cred *cred) { char *string = NULL; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx; unsigned int len; struct xdr_netobj *acceptor; rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); if (!ctx) goto out; len = ctx->gc_acceptor.len; rcu_read_unlock(); /* no point if there's no string */ if (!len) return NULL; realloc: string = kmalloc(len + 1, GFP_KERNEL); if (!string) return NULL; rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); /* did the ctx disappear or was it replaced by one with no acceptor? */ if (!ctx || !ctx->gc_acceptor.len) { kfree(string); string = NULL; goto out; } acceptor = &ctx->gc_acceptor; /* * Did we find a new acceptor that's longer than the original? Allocate * a longer buffer and try again. */ if (len < acceptor->len) { len = acceptor->len; rcu_read_unlock(); kfree(string); goto realloc; } memcpy(string, acceptor->data, acceptor->len); string[acceptor->len] = '\0'; out: rcu_read_unlock(); return string; } /* * Returns -EACCES if GSS context is NULL or will expire within the * timeout (miliseconds) */ static int gss_key_timeout(struct rpc_cred *rc) { struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); struct gss_cl_ctx *ctx; unsigned long timeout = jiffies + (gss_key_expire_timeo * HZ); int ret = 0; rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); if (!ctx || time_after(timeout, ctx->gc_expiry)) ret = -EACCES; rcu_read_unlock(); return ret; } static int gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) { struct gss_cred *gss_cred = container_of(rc, struct gss_cred, gc_base); struct gss_cl_ctx *ctx; int ret; if (test_bit(RPCAUTH_CRED_NEW, &rc->cr_flags)) goto out; /* Don't match with creds that have expired. */ rcu_read_lock(); ctx = rcu_dereference(gss_cred->gc_ctx); if (!ctx || time_after(jiffies, ctx->gc_expiry)) { rcu_read_unlock(); return 0; } rcu_read_unlock(); if (!test_bit(RPCAUTH_CRED_UPTODATE, &rc->cr_flags)) return 0; out: if (acred->principal != NULL) { if (gss_cred->gc_principal == NULL) return 0; ret = strcmp(acred->principal, gss_cred->gc_principal) == 0; } else { if (gss_cred->gc_principal != NULL) return 0; ret = uid_eq(rc->cr_cred->fsuid, acred->cred->fsuid); } return ret; } /* * Marshal credentials. * * The expensive part is computing the verifier. We can't cache a * pre-computed version of the verifier because the seqno, which * is different every time, is included in the MIC. */ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_cred *cred = req->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); __be32 *p, *cred_len; u32 maj_stat = 0; struct xdr_netobj mic; struct kvec iov; struct xdr_buf verf_buf; int status; /* Credential */ p = xdr_reserve_space(xdr, 7 * sizeof(*p) + ctx->gc_wire_ctx.len); if (!p) goto marshal_failed; *p++ = rpc_auth_gss; cred_len = p++; spin_lock(&ctx->gc_seq_lock); req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ; spin_unlock(&ctx->gc_seq_lock); if (req->rq_seqno == MAXSEQ) goto expired; trace_rpcgss_seqno(task); *p++ = cpu_to_be32(RPC_GSS_VERSION); *p++ = cpu_to_be32(ctx->gc_proc); *p++ = cpu_to_be32(req->rq_seqno); *p++ = cpu_to_be32(gss_cred->gc_service); p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); *cred_len = cpu_to_be32((p - (cred_len + 1)) << 2); /* Verifier */ /* We compute the checksum for the verifier over the xdr-encoded bytes * starting with the xid and ending at the end of the credential: */ iov.iov_base = req->rq_snd_buf.head[0].iov_base; iov.iov_len = (u8 *)p - (u8 *)iov.iov_base; xdr_buf_from_iov(&iov, &verf_buf); p = xdr_reserve_space(xdr, sizeof(*p)); if (!p) goto marshal_failed; *p++ = rpc_auth_gss; mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) goto expired; else if (maj_stat != 0) goto bad_mic; if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0) goto marshal_failed; status = 0; out: gss_put_ctx(ctx); return status; expired: clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); status = -EKEYEXPIRED; goto out; marshal_failed: status = -EMSGSIZE; goto out; bad_mic: trace_rpcgss_get_mic(task, maj_stat); status = -EIO; goto out; } static int gss_renew_cred(struct rpc_task *task) { struct rpc_cred *oldcred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(oldcred, struct gss_cred, gc_base); struct rpc_auth *auth = oldcred->cr_auth; struct auth_cred acred = { .cred = oldcred->cr_cred, .principal = gss_cred->gc_principal, }; struct rpc_cred *new; new = gss_lookup_cred(auth, &acred, RPCAUTH_LOOKUP_NEW); if (IS_ERR(new)) return PTR_ERR(new); task->tk_rqstp->rq_cred = new; put_rpccred(oldcred); return 0; } static int gss_cred_is_negative_entry(struct rpc_cred *cred) { if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) { unsigned long now = jiffies; unsigned long begin, expire; struct gss_cred *gss_cred; gss_cred = container_of(cred, struct gss_cred, gc_base); begin = gss_cred->gc_upcall_timestamp; expire = begin + gss_expired_cred_retry_delay * HZ; if (time_in_range_open(now, begin, expire)) return 1; } return 0; } /* * Refresh credentials. XXX - finish */ static int gss_refresh(struct rpc_task *task) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; int ret = 0; if (gss_cred_is_negative_entry(cred)) return -EKEYEXPIRED; if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) && !test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) { ret = gss_renew_cred(task); if (ret < 0) goto out; cred = task->tk_rqstp->rq_cred; } if (test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags)) ret = gss_refresh_upcall(task); out: return ret; } /* Dummy refresh routine: used only when destroying the context */ static int gss_refresh_null(struct rpc_task *task) { return 0; } static int gss_validate(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); __be32 *p, *seq = NULL; struct kvec iov; struct xdr_buf verf_buf; struct xdr_netobj mic; u32 len, maj_stat; int status; p = xdr_inline_decode(xdr, 2 * sizeof(*p)); if (!p) goto validate_failed; if (*p++ != rpc_auth_gss) goto validate_failed; len = be32_to_cpup(p); if (len > RPC_MAX_AUTH_SIZE) goto validate_failed; p = xdr_inline_decode(xdr, len); if (!p) goto validate_failed; seq = kmalloc(4, GFP_KERNEL); if (!seq) goto validate_failed; *seq = cpu_to_be32(task->tk_rqstp->rq_seqno); iov.iov_base = seq; iov.iov_len = 4; xdr_buf_from_iov(&iov, &verf_buf); mic.data = (u8 *)p; mic.len = len; maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat) goto bad_mic; /* We leave it to unwrap to calculate au_rslack. For now we just * calculate the length of the verifier: */ if (test_bit(RPCAUTH_AUTH_UPDATE_SLACK, &cred->cr_auth->au_flags)) cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; status = 0; out: gss_put_ctx(ctx); kfree(seq); return status; validate_failed: status = -EIO; goto out; bad_mic: trace_rpcgss_verify_mic(task, maj_stat); status = -EACCES; goto out; } static noinline_for_stack int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx, struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_rqst *rqstp = task->tk_rqstp; struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf; struct xdr_netobj mic; __be32 *p, *integ_len; u32 offset, maj_stat; p = xdr_reserve_space(xdr, 2 * sizeof(*p)); if (!p) goto wrap_failed; integ_len = p++; *p = cpu_to_be32(rqstp->rq_seqno); if (rpcauth_wrap_req_encode(task, xdr)) goto wrap_failed; offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; if (xdr_buf_subsegment(snd_buf, &integ_buf, offset, snd_buf->len - offset)) goto wrap_failed; *integ_len = cpu_to_be32(integ_buf.len); p = xdr_reserve_space(xdr, 0); if (!p) goto wrap_failed; mic.data = (u8 *)(p + 1); maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) goto bad_mic; /* Check that the trailing MIC fit in the buffer, after the fact */ if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0) goto wrap_failed; return 0; wrap_failed: return -EMSGSIZE; bad_mic: trace_rpcgss_get_mic(task, maj_stat); return -EIO; } static void priv_release_snd_buf(struct rpc_rqst *rqstp) { int i; for (i=0; i < rqstp->rq_enc_pages_num; i++) __free_page(rqstp->rq_enc_pages[i]); kfree(rqstp->rq_enc_pages); rqstp->rq_release_snd_buf = NULL; } static int alloc_enc_pages(struct rpc_rqst *rqstp) { struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; int first, last, i; if (rqstp->rq_release_snd_buf) rqstp->rq_release_snd_buf(rqstp); if (snd_buf->page_len == 0) { rqstp->rq_enc_pages_num = 0; return 0; } first = snd_buf->page_base >> PAGE_SHIFT; last = (snd_buf->page_base + snd_buf->page_len - 1) >> PAGE_SHIFT; rqstp->rq_enc_pages_num = last - first + 1 + 1; rqstp->rq_enc_pages = kmalloc_array(rqstp->rq_enc_pages_num, sizeof(struct page *), GFP_KERNEL); if (!rqstp->rq_enc_pages) goto out; for (i=0; i < rqstp->rq_enc_pages_num; i++) { rqstp->rq_enc_pages[i] = alloc_page(GFP_KERNEL); if (rqstp->rq_enc_pages[i] == NULL) goto out_free; } rqstp->rq_release_snd_buf = priv_release_snd_buf; return 0; out_free: rqstp->rq_enc_pages_num = i; priv_release_snd_buf(rqstp); out: return -EAGAIN; } static noinline_for_stack int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx, struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_rqst *rqstp = task->tk_rqstp; struct xdr_buf *snd_buf = &rqstp->rq_snd_buf; u32 pad, offset, maj_stat; int status; __be32 *p, *opaque_len; struct page **inpages; int first; struct kvec *iov; status = -EIO; p = xdr_reserve_space(xdr, 2 * sizeof(*p)); if (!p) goto wrap_failed; opaque_len = p++; *p = cpu_to_be32(rqstp->rq_seqno); if (rpcauth_wrap_req_encode(task, xdr)) goto wrap_failed; status = alloc_enc_pages(rqstp); if (unlikely(status)) goto wrap_failed; first = snd_buf->page_base >> PAGE_SHIFT; inpages = snd_buf->pages + first; snd_buf->pages = rqstp->rq_enc_pages; snd_buf->page_base -= first << PAGE_SHIFT; /* * Move the tail into its own page, in case gss_wrap needs * more space in the head when wrapping. * * Still... Why can't gss_wrap just slide the tail down? */ if (snd_buf->page_len || snd_buf->tail[0].iov_len) { char *tmp; tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]); memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len); snd_buf->tail[0].iov_base = tmp; } offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages); /* slack space should prevent this ever happening: */ if (unlikely(snd_buf->len > snd_buf->buflen)) { status = -EIO; goto wrap_failed; } /* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was * done anyway, so it's safe to put the request on the wire: */ if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); else if (maj_stat) goto bad_wrap; *opaque_len = cpu_to_be32(snd_buf->len - offset); /* guess whether the pad goes into the head or the tail: */ if (snd_buf->page_len || snd_buf->tail[0].iov_len) iov = snd_buf->tail; else iov = snd_buf->head; p = iov->iov_base + iov->iov_len; pad = xdr_pad_size(snd_buf->len - offset); memset(p, 0, pad); iov->iov_len += pad; snd_buf->len += pad; return 0; wrap_failed: return status; bad_wrap: trace_rpcgss_wrap(task, maj_stat); return -EIO; } static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_cred *cred = task->tk_rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); int status; status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) { /* The spec seems a little ambiguous here, but I think that not * wrapping context destruction requests makes the most sense. */ status = rpcauth_wrap_req_encode(task, xdr); goto out; } switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: status = rpcauth_wrap_req_encode(task, xdr); break; case RPC_GSS_SVC_INTEGRITY: status = gss_wrap_req_integ(cred, ctx, task, xdr); break; case RPC_GSS_SVC_PRIVACY: status = gss_wrap_req_priv(cred, ctx, task, xdr); break; default: status = -EIO; } out: gss_put_ctx(ctx); return status; } /** * gss_update_rslack - Possibly update RPC receive buffer size estimates * @task: rpc_task for incoming RPC Reply being unwrapped * @cred: controlling rpc_cred for @task * @before: XDR words needed before each RPC Reply message * @after: XDR words needed following each RPC Reply message * */ static void gss_update_rslack(struct rpc_task *task, struct rpc_cred *cred, unsigned int before, unsigned int after) { struct rpc_auth *auth = cred->cr_auth; if (test_and_clear_bit(RPCAUTH_AUTH_UPDATE_SLACK, &auth->au_flags)) { auth->au_ralign = auth->au_verfsize + before; auth->au_rslack = auth->au_verfsize + after; trace_rpcgss_update_slack(task, auth); } } static int gss_unwrap_resp_auth(struct rpc_task *task, struct rpc_cred *cred) { gss_update_rslack(task, cred, 0, 0); return 0; } /* * RFC 2203, Section 5.3.2.2 * * struct rpc_gss_integ_data { * opaque databody_integ<>; * opaque checksum<>; * }; * * struct rpc_gss_data_t { * unsigned int seq_num; * proc_req_arg_t arg; * }; */ static noinline_for_stack int gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred, struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp, struct xdr_stream *xdr) { struct xdr_buf gss_data, *rcv_buf = &rqstp->rq_rcv_buf; u32 len, offset, seqno, maj_stat; struct xdr_netobj mic; int ret; ret = -EIO; mic.data = NULL; /* opaque databody_integ<>; */ if (xdr_stream_decode_u32(xdr, &len)) goto unwrap_failed; if (len & 3) goto unwrap_failed; offset = rcv_buf->len - xdr_stream_remaining(xdr); if (xdr_stream_decode_u32(xdr, &seqno)) goto unwrap_failed; if (seqno != rqstp->rq_seqno) goto bad_seqno; if (xdr_buf_subsegment(rcv_buf, &gss_data, offset, len)) goto unwrap_failed; /* * The xdr_stream now points to the beginning of the * upper layer payload, to be passed below to * rpcauth_unwrap_resp_decode(). The checksum, which * follows the upper layer payload in @rcv_buf, is * located and parsed without updating the xdr_stream. */ /* opaque checksum<>; */ offset += len; if (xdr_decode_word(rcv_buf, offset, &len)) goto unwrap_failed; offset += sizeof(__be32); if (offset + len > rcv_buf->len) goto unwrap_failed; mic.len = len; mic.data = kmalloc(len, GFP_KERNEL); if (ZERO_OR_NULL_PTR(mic.data)) goto unwrap_failed; if (read_bytes_from_xdr_buf(rcv_buf, offset, mic.data, mic.len)) goto unwrap_failed; maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &gss_data, &mic); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) goto bad_mic; gss_update_rslack(task, cred, 2, 2 + 1 + XDR_QUADLEN(mic.len)); ret = 0; out: kfree(mic.data); return ret; unwrap_failed: trace_rpcgss_unwrap_failed(task); goto out; bad_seqno: trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, seqno); goto out; bad_mic: trace_rpcgss_verify_mic(task, maj_stat); goto out; } static noinline_for_stack int gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp, struct xdr_stream *xdr) { struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct kvec *head = rqstp->rq_rcv_buf.head; u32 offset, opaque_len, maj_stat; __be32 *p; p = xdr_inline_decode(xdr, 2 * sizeof(*p)); if (unlikely(!p)) goto unwrap_failed; opaque_len = be32_to_cpup(p++); offset = (u8 *)(p) - (u8 *)head->iov_base; if (offset + opaque_len > rcv_buf->len) goto unwrap_failed; maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, offset + opaque_len, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) goto bad_unwrap; /* gss_unwrap decrypted the sequence number */ if (be32_to_cpup(p++) != rqstp->rq_seqno) goto bad_seqno; /* gss_unwrap redacts the opaque blob from the head iovec. * rcv_buf has changed, thus the stream needs to be reset. */ xdr_init_decode(xdr, rcv_buf, p, rqstp); gss_update_rslack(task, cred, 2 + ctx->gc_gss_ctx->align, 2 + ctx->gc_gss_ctx->slack); return 0; unwrap_failed: trace_rpcgss_unwrap_failed(task); return -EIO; bad_seqno: trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p)); return -EIO; bad_unwrap: trace_rpcgss_unwrap(task, maj_stat); return -EIO; } static bool gss_seq_is_newer(u32 new, u32 old) { return (s32)(new - old) > 0; } static bool gss_xmit_need_reencode(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; struct rpc_cred *cred = req->rq_cred; struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); u32 win, seq_xmit = 0; bool ret = true; if (!ctx) goto out; if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq))) goto out_ctx; seq_xmit = READ_ONCE(ctx->gc_seq_xmit); while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) { u32 tmp = seq_xmit; seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno); if (seq_xmit == tmp) { ret = false; goto out_ctx; } } win = ctx->gc_win; if (win > 0) ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win); out_ctx: gss_put_ctx(ctx); out: trace_rpcgss_need_reencode(task, seq_xmit, ret); return ret; } static int gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr) { struct rpc_rqst *rqstp = task->tk_rqstp; struct rpc_cred *cred = rqstp->rq_cred; struct gss_cred *gss_cred = container_of(cred, struct gss_cred, gc_base); struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); int status = -EIO; if (ctx->gc_proc != RPC_GSS_PROC_DATA) goto out_decode; switch (gss_cred->gc_service) { case RPC_GSS_SVC_NONE: status = gss_unwrap_resp_auth(task, cred); break; case RPC_GSS_SVC_INTEGRITY: status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr); break; case RPC_GSS_SVC_PRIVACY: status = gss_unwrap_resp_priv(task, cred, ctx, rqstp, xdr); break; } if (status) goto out; out_decode: status = rpcauth_unwrap_resp_decode(task, xdr); out: gss_put_ctx(ctx); return status; } static const struct rpc_authops authgss_ops = { .owner = THIS_MODULE, .au_flavor = RPC_AUTH_GSS, .au_name = "RPCSEC_GSS", .create = gss_create, .destroy = gss_destroy, .hash_cred = gss_hash_cred, .lookup_cred = gss_lookup_cred, .crcreate = gss_create_cred, .info2flavor = gss_mech_info2flavor, .flavor2info = gss_mech_flavor2info, }; static const struct rpc_credops gss_credops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_cred, .cr_init = gss_cred_init, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh, .crvalidate = gss_validate, .crwrap_req = gss_wrap_req, .crunwrap_resp = gss_unwrap_resp, .crkey_timeout = gss_key_timeout, .crstringify_acceptor = gss_stringify_acceptor, .crneed_reencode = gss_xmit_need_reencode, }; static const struct rpc_credops gss_nullops = { .cr_name = "AUTH_GSS", .crdestroy = gss_destroy_nullcred, .crmatch = gss_match, .crmarshal = gss_marshal, .crrefresh = gss_refresh_null, .crvalidate = gss_validate, .crwrap_req = gss_wrap_req, .crunwrap_resp = gss_unwrap_resp, .crstringify_acceptor = gss_stringify_acceptor, }; static const struct rpc_pipe_ops gss_upcall_ops_v0 = { .upcall = gss_v0_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v0, .release_pipe = gss_pipe_release, }; static const struct rpc_pipe_ops gss_upcall_ops_v1 = { .upcall = gss_v1_upcall, .downcall = gss_pipe_downcall, .destroy_msg = gss_pipe_destroy_msg, .open_pipe = gss_pipe_open_v1, .release_pipe = gss_pipe_release, }; static __net_init int rpcsec_gss_init_net(struct net *net) { return gss_svc_init_net(net); } static __net_exit void rpcsec_gss_exit_net(struct net *net) { gss_svc_shutdown_net(net); } static struct pernet_operations rpcsec_gss_net_ops = { .init = rpcsec_gss_init_net, .exit = rpcsec_gss_exit_net, }; /* * Initialize RPCSEC_GSS module */ static int __init init_rpcsec_gss(void) { int err = 0; err = rpcauth_register(&authgss_ops); if (err) goto out; err = gss_svc_init(); if (err) goto out_unregister; err = register_pernet_subsys(&rpcsec_gss_net_ops); if (err) goto out_svc_exit; rpc_init_wait_queue(&pipe_version_rpc_waitqueue, "gss pipe version"); return 0; out_svc_exit: gss_svc_shutdown(); out_unregister: rpcauth_unregister(&authgss_ops); out: return err; } static void __exit exit_rpcsec_gss(void) { unregister_pernet_subsys(&rpcsec_gss_net_ops); gss_svc_shutdown(); rpcauth_unregister(&authgss_ops); rcu_barrier(); /* Wait for completion of call_rcu()'s */ } MODULE_ALIAS("rpc-auth-6"); MODULE_DESCRIPTION("Sun RPC Kerberos RPCSEC_GSS client authentication"); MODULE_LICENSE("GPL"); module_param_named(expired_cred_retry_delay, gss_expired_cred_retry_delay, uint, 0644); MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until " "the RPC engine retries an expired credential"); module_param_named(key_expire_timeo, gss_key_expire_timeo, uint, 0644); MODULE_PARM_DESC(key_expire_timeo, "Time (in seconds) at the end of a " "credential keys lifetime where the NFS layer cleans up " "prior to key expiration"); module_init(init_rpcsec_gss) module_exit(exit_rpcsec_gss) |
2 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/phy.h> #include <linux/ethtool_netlink.h> #include "netlink.h" #include "common.h" struct plca_req_info { struct ethnl_req_info base; }; struct plca_reply_data { struct ethnl_reply_data base; struct phy_plca_cfg plca_cfg; struct phy_plca_status plca_st; }; // Helpers ------------------------------------------------------------------ // #define PLCA_REPDATA(__reply_base) \ container_of(__reply_base, struct plca_reply_data, base) // PLCA get configuration message ------------------------------------------- // const struct nla_policy ethnl_plca_get_cfg_policy[] = { [ETHTOOL_A_PLCA_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), }; static void plca_update_sint(int *dst, struct nlattr **tb, u32 attrid, bool *mod) { const struct nlattr *attr = tb[attrid]; if (!attr || WARN_ON_ONCE(attrid >= ARRAY_SIZE(ethnl_plca_set_cfg_policy))) return; switch (ethnl_plca_set_cfg_policy[attrid].type) { case NLA_U8: *dst = nla_get_u8(attr); break; case NLA_U32: *dst = nla_get_u32(attr); break; default: WARN_ON_ONCE(1); } *mod = true; } static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct plca_reply_data *data = PLCA_REPDATA(reply_base); struct net_device *dev = reply_base->dev; const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_device *phydev; int ret; phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; goto out; } // note: rtnl_lock is held already by ethnl_default_doit ops = ethtool_phy_ops; if (!ops || !ops->get_plca_cfg) { ret = -EOPNOTSUPP; goto out; } ret = ethnl_ops_begin(dev); if (ret < 0) goto out; memset(&data->plca_cfg, 0xff, sizeof_field(struct plca_reply_data, plca_cfg)); ret = ops->get_plca_cfg(phydev, &data->plca_cfg); ethnl_ops_complete(dev); out: return ret; } static int plca_get_cfg_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { return nla_total_size(sizeof(u16)) + /* _VERSION */ nla_total_size(sizeof(u8)) + /* _ENABLED */ nla_total_size(sizeof(u32)) + /* _NODE_CNT */ nla_total_size(sizeof(u32)) + /* _NODE_ID */ nla_total_size(sizeof(u32)) + /* _TO_TIMER */ nla_total_size(sizeof(u32)) + /* _BURST_COUNT */ nla_total_size(sizeof(u32)); /* _BURST_TIMER */ } static int plca_get_cfg_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct plca_reply_data *data = PLCA_REPDATA(reply_base); const struct phy_plca_cfg *plca = &data->plca_cfg; if ((plca->version >= 0 && nla_put_u16(skb, ETHTOOL_A_PLCA_VERSION, plca->version)) || (plca->enabled >= 0 && nla_put_u8(skb, ETHTOOL_A_PLCA_ENABLED, !!plca->enabled)) || (plca->node_id >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_ID, plca->node_id)) || (plca->node_cnt >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_CNT, plca->node_cnt)) || (plca->to_tmr >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_TO_TMR, plca->to_tmr)) || (plca->burst_cnt >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_CNT, plca->burst_cnt)) || (plca->burst_tmr >= 0 && nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_TMR, plca->burst_tmr))) return -EMSGSIZE; return 0; }; // PLCA set configuration message ------------------------------------------- // const struct nla_policy ethnl_plca_set_cfg_policy[] = { [ETHTOOL_A_PLCA_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), [ETHTOOL_A_PLCA_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1), [ETHTOOL_A_PLCA_NODE_ID] = NLA_POLICY_MAX(NLA_U32, 255), [ETHTOOL_A_PLCA_NODE_CNT] = NLA_POLICY_RANGE(NLA_U32, 1, 255), [ETHTOOL_A_PLCA_TO_TMR] = NLA_POLICY_MAX(NLA_U32, 255), [ETHTOOL_A_PLCA_BURST_CNT] = NLA_POLICY_MAX(NLA_U32, 255), [ETHTOOL_A_PLCA_BURST_TMR] = NLA_POLICY_MAX(NLA_U32, 255), }; static int ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) { const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_plca_cfg plca_cfg; struct phy_device *phydev; bool mod = false; int ret; phydev = ethnl_req_get_phydev(req_info, tb[ETHTOOL_A_PLCA_HEADER], info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) return -EOPNOTSUPP; ops = ethtool_phy_ops; if (!ops || !ops->set_plca_cfg) return -EOPNOTSUPP; memset(&plca_cfg, 0xff, sizeof(plca_cfg)); plca_update_sint(&plca_cfg.enabled, tb, ETHTOOL_A_PLCA_ENABLED, &mod); plca_update_sint(&plca_cfg.node_id, tb, ETHTOOL_A_PLCA_NODE_ID, &mod); plca_update_sint(&plca_cfg.node_cnt, tb, ETHTOOL_A_PLCA_NODE_CNT, &mod); plca_update_sint(&plca_cfg.to_tmr, tb, ETHTOOL_A_PLCA_TO_TMR, &mod); plca_update_sint(&plca_cfg.burst_cnt, tb, ETHTOOL_A_PLCA_BURST_CNT, &mod); plca_update_sint(&plca_cfg.burst_tmr, tb, ETHTOOL_A_PLCA_BURST_TMR, &mod); if (!mod) return 0; ret = ops->set_plca_cfg(phydev, &plca_cfg, info->extack); return ret < 0 ? ret : 1; } const struct ethnl_request_ops ethnl_plca_cfg_request_ops = { .request_cmd = ETHTOOL_MSG_PLCA_GET_CFG, .reply_cmd = ETHTOOL_MSG_PLCA_GET_CFG_REPLY, .hdr_attr = ETHTOOL_A_PLCA_HEADER, .req_info_size = sizeof(struct plca_req_info), .reply_data_size = sizeof(struct plca_reply_data), .prepare_data = plca_get_cfg_prepare_data, .reply_size = plca_get_cfg_reply_size, .fill_reply = plca_get_cfg_fill_reply, .set = ethnl_set_plca, .set_ntf_cmd = ETHTOOL_MSG_PLCA_NTF, }; // PLCA get status message -------------------------------------------------- // const struct nla_policy ethnl_plca_get_status_policy[] = { [ETHTOOL_A_PLCA_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), }; static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct plca_reply_data *data = PLCA_REPDATA(reply_base); struct net_device *dev = reply_base->dev; const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_device *phydev; int ret; phydev = ethnl_req_get_phydev(req_base, tb[ETHTOOL_A_PLCA_HEADER], info->extack); // check that the PHY device is available and connected if (IS_ERR_OR_NULL(phydev)) { ret = -EOPNOTSUPP; goto out; } // note: rtnl_lock is held already by ethnl_default_doit ops = ethtool_phy_ops; if (!ops || !ops->get_plca_status) { ret = -EOPNOTSUPP; goto out; } ret = ethnl_ops_begin(dev); if (ret < 0) goto out; memset(&data->plca_st, 0xff, sizeof_field(struct plca_reply_data, plca_st)); ret = ops->get_plca_status(phydev, &data->plca_st); ethnl_ops_complete(dev); out: return ret; } static int plca_get_status_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { return nla_total_size(sizeof(u8)); /* _STATUS */ } static int plca_get_status_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct plca_reply_data *data = PLCA_REPDATA(reply_base); const u8 status = data->plca_st.pst; if (nla_put_u8(skb, ETHTOOL_A_PLCA_STATUS, !!status)) return -EMSGSIZE; return 0; }; const struct ethnl_request_ops ethnl_plca_status_request_ops = { .request_cmd = ETHTOOL_MSG_PLCA_GET_STATUS, .reply_cmd = ETHTOOL_MSG_PLCA_GET_STATUS_REPLY, .hdr_attr = ETHTOOL_A_PLCA_HEADER, .req_info_size = sizeof(struct plca_req_info), .reply_data_size = sizeof(struct plca_reply_data), .prepare_data = plca_get_status_prepare_data, .reply_size = plca_get_status_reply_size, .fill_reply = plca_get_status_fill_reply, }; |
4 4 4 4 4 4 4 4 28 28 957 3 3 1 3 3 3 10 5 7 6 15 191 193 192 192 3 13 9 9 42 12 17 17 17 2 17 2 2 2 2 2 2 2 17 17 15 2 17 17 7 14 8 8 8 7 1 1 1 1 1 1 17 17 17 17 23 23 6 19 18 2 17 17 18 9 9 9 15 15 5 11 8 8 4 4 11 1 10 10 8 4 3 2 2 3 3 2 2 2 2 8 3 5 6 6 2 4 442 444 17 4 12 9 9 9 9 16 17 15 17 4 4 756 138 138 622 648 531 530 526 29 29 23 12 12 12 12 12 1 11 7 4 451 159 452 311 281 711 313 314 4 314 313 2 2 316 6 753 499 501 498 230 333 1 230 229 307 749 751 751 152 745 43 43 42 6 3 1 46 48 47 94 95 89 88 90 90 14 14 199 159 507 1 510 506 1 147 2 3 351 2 354 53 2 298 4 2 121 21 490 489 198 299 157 188 352 300 57 197 159 159 5 603 101 100 6 1 96 510 600 605 603 786 60 574 241 1 101 125 126 513 644 221 22 379 377 375 756 616 87 76 28 125 751 935 934 930 953 642 12 69 501 6 118 15 536 749 8 103 28 9 21 60 23 17 122 253 1 17 14 395 369 2 511 77 789 2 2 2 845 262 63 622 14 12 2 14 11 3 14 14 14 5 9 14 13 14 14 1 1 13 13 11 2 2 14 4 4 3 3 2 2 2 1 8 8 8 8 8 8 4 8 23 23 23 1 23 24 23 4 2 9 1 2 12 9 2 5 10 9 5 47 48 25 25 25 25 25 4 21 17 8 19 4 20 3 23 23 20 3 21 2 23 2 2 2 12 17 12 13 6 13 6 19 19 19 3 2 1 2 2 13 16 15 555 28 28 28 28 28 28 28 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 | // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * ROUTE - implementation of the IP router. * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Alan Cox, <gw4pts@gw4pts.ampr.org> * Linus Torvalds, <Linus.Torvalds@helsinki.fi> * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> * * Fixes: * Alan Cox : Verify area fixes. * Alan Cox : cli() protects routing changes * Rui Oliveira : ICMP routing table updates * (rco@di.uminho.pt) Routing table insertion and update * Linus Torvalds : Rewrote bits to be sensible * Alan Cox : Added BSD route gw semantics * Alan Cox : Super /proc >4K * Alan Cox : MTU in route table * Alan Cox : MSS actually. Also added the window * clamper. * Sam Lantinga : Fixed route matching in rt_del() * Alan Cox : Routing cache support. * Alan Cox : Removed compatibility cruft. * Alan Cox : RTF_REJECT support. * Alan Cox : TCP irtt support. * Jonathan Naylor : Added Metric support. * Miquel van Smoorenburg : BSD API fixes. * Miquel van Smoorenburg : Metrics. * Alan Cox : Use __u32 properly * Alan Cox : Aligned routing errors more closely with BSD * our system is still very different. * Alan Cox : Faster /proc handling * Alexey Kuznetsov : Massive rework to support tree based routing, * routing caches and better behaviour. * * Olaf Erb : irtt wasn't being copied right. * Bjorn Ekwall : Kerneld route support. * Alan Cox : Multicast fixed (I hope) * Pavel Krauz : Limited broadcast fixed * Mike McLagan : Routing by source * Alexey Kuznetsov : End of old history. Split to fib.c and * route.c and rewritten from scratch. * Andi Kleen : Load-limit warning messages. * Vitaly E. Lavrov : Transparent proxy revived after year coma. * Vitaly E. Lavrov : Race condition in ip_route_input_slow. * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow. * Vladimir V. Ivanov : IP rule info (flowid) is really useful. * Marc Boucher : routing by fwmark * Robert Olsson : Added rt_cache statistics * Arnaldo C. Melo : Convert proc stuff to seq_file * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes. * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect * Ilia Sotnikov : Removed TOS from hash calculations */ #define pr_fmt(fmt) "IPv4: " fmt #include <linux/module.h> #include <linux/bitops.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/memblock.h> #include <linux/socket.h> #include <linux/errno.h> #include <linux/in.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/init.h> #include <linux/skbuff.h> #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/pkt_sched.h> #include <linux/mroute.h> #include <linux/netfilter_ipv4.h> #include <linux/random.h> #include <linux/rcupdate.h> #include <linux/slab.h> #include <linux/jhash.h> #include <net/dst.h> #include <net/dst_metadata.h> #include <net/inet_dscp.h> #include <net/net_namespace.h> #include <net/ip.h> #include <net/route.h> #include <net/inetpeer.h> #include <net/sock.h> #include <net/ip_fib.h> #include <net/nexthop.h> #include <net/tcp.h> #include <net/icmp.h> #include <net/xfrm.h> #include <net/lwtunnel.h> #include <net/netevent.h> #include <net/rtnetlink.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif #include <net/secure_seq.h> #include <net/ip_tunnels.h> #include "fib_lookup.h" #define RT_GC_TIMEOUT (300*HZ) #define DEFAULT_MIN_PMTU (512 + 20 + 20) #define DEFAULT_MTU_EXPIRES (10 * 60 * HZ) #define DEFAULT_MIN_ADVMSS 256 static int ip_rt_max_size; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1)); static int ip_rt_error_cost __read_mostly = HZ; static int ip_rt_error_burst __read_mostly = 5 * HZ; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; /* * Interface to generic destination cache. */ INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ipv4_default_advmss(const struct dst_entry *dst); INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst); static void ipv4_negative_advice(struct sock *sk, struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void ipv4_dst_destroy(struct dst_entry *dst); static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old) { WARN_ON(1); return NULL; } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr); static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr); static struct dst_ops ipv4_dst_ops = { .family = AF_INET, .check = ipv4_dst_check, .default_advmss = ipv4_default_advmss, .mtu = ipv4_mtu, .cow_metrics = ipv4_cow_metrics, .destroy = ipv4_dst_destroy, .negative_advice = ipv4_negative_advice, .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, .redirect = ip_do_redirect, .local_out = __ip_local_out, .neigh_lookup = ipv4_neigh_lookup, .confirm_neigh = ipv4_confirm_neigh, }; #define ECN_OR_COST(class) TC_PRIO_##class const __u8 ip_tos2prio[16] = { TC_PRIO_BESTEFFORT, ECN_OR_COST(BESTEFFORT), TC_PRIO_BESTEFFORT, ECN_OR_COST(BESTEFFORT), TC_PRIO_BULK, ECN_OR_COST(BULK), TC_PRIO_BULK, ECN_OR_COST(BULK), TC_PRIO_INTERACTIVE, ECN_OR_COST(INTERACTIVE), TC_PRIO_INTERACTIVE, ECN_OR_COST(INTERACTIVE), TC_PRIO_INTERACTIVE_BULK, ECN_OR_COST(INTERACTIVE_BULK), TC_PRIO_INTERACTIVE_BULK, ECN_OR_COST(INTERACTIVE_BULK) }; EXPORT_SYMBOL(ip_tos2prio); static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat); #define RT_CACHE_STAT_INC(field) raw_cpu_inc(rt_cache_stat.field) #ifdef CONFIG_PROC_FS static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos) { if (*pos) return NULL; return SEQ_START_TOKEN; } static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) { ++*pos; return NULL; } static void rt_cache_seq_stop(struct seq_file *seq, void *v) { } static int rt_cache_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t" "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t" "HHUptod\tSpecDst"); return 0; } static const struct seq_operations rt_cache_seq_ops = { .start = rt_cache_seq_start, .next = rt_cache_seq_next, .stop = rt_cache_seq_stop, .show = rt_cache_seq_show, }; static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos) { int cpu; if (*pos == 0) return SEQ_START_TOKEN; for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; return &per_cpu(rt_cache_stat, cpu); } return NULL; } static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { int cpu; for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; return &per_cpu(rt_cache_stat, cpu); } (*pos)++; return NULL; } static void rt_cpu_seq_stop(struct seq_file *seq, void *v) { } static int rt_cpu_seq_show(struct seq_file *seq, void *v) { struct rt_cache_stat *st = v; if (v == SEQ_START_TOKEN) { seq_puts(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n"); return 0; } seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x\n", dst_entries_get_slow(&ipv4_dst_ops), 0, /* st->in_hit */ st->in_slow_tot, st->in_slow_mc, st->in_no_route, st->in_brd, st->in_martian_dst, st->in_martian_src, 0, /* st->out_hit */ st->out_slow_tot, st->out_slow_mc, 0, /* st->gc_total */ 0, /* st->gc_ignored */ 0, /* st->gc_goal_miss */ 0, /* st->gc_dst_overflow */ 0, /* st->in_hlist_search */ 0 /* st->out_hlist_search */ ); return 0; } static const struct seq_operations rt_cpu_seq_ops = { .start = rt_cpu_seq_start, .next = rt_cpu_seq_next, .stop = rt_cpu_seq_stop, .show = rt_cpu_seq_show, }; #ifdef CONFIG_IP_ROUTE_CLASSID static int rt_acct_proc_show(struct seq_file *m, void *v) { struct ip_rt_acct *dst, *src; unsigned int i, j; dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL); if (!dst) return -ENOMEM; for_each_possible_cpu(i) { src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i); for (j = 0; j < 256; j++) { dst[j].o_bytes += src[j].o_bytes; dst[j].o_packets += src[j].o_packets; dst[j].i_bytes += src[j].i_bytes; dst[j].i_packets += src[j].i_packets; } } seq_write(m, dst, 256 * sizeof(struct ip_rt_acct)); kfree(dst); return 0; } #endif static int __net_init ip_rt_do_proc_init(struct net *net) { struct proc_dir_entry *pde; pde = proc_create_seq("rt_cache", 0444, net->proc_net, &rt_cache_seq_ops); if (!pde) goto err1; pde = proc_create_seq("rt_cache", 0444, net->proc_net_stat, &rt_cpu_seq_ops); if (!pde) goto err2; #ifdef CONFIG_IP_ROUTE_CLASSID pde = proc_create_single("rt_acct", 0, net->proc_net, rt_acct_proc_show); if (!pde) goto err3; #endif return 0; #ifdef CONFIG_IP_ROUTE_CLASSID err3: remove_proc_entry("rt_cache", net->proc_net_stat); #endif err2: remove_proc_entry("rt_cache", net->proc_net); err1: return -ENOMEM; } static void __net_exit ip_rt_do_proc_exit(struct net *net) { remove_proc_entry("rt_cache", net->proc_net_stat); remove_proc_entry("rt_cache", net->proc_net); #ifdef CONFIG_IP_ROUTE_CLASSID remove_proc_entry("rt_acct", net->proc_net); #endif } static struct pernet_operations ip_rt_proc_ops __net_initdata = { .init = ip_rt_do_proc_init, .exit = ip_rt_do_proc_exit, }; static int __init ip_rt_proc_init(void) { return register_pernet_subsys(&ip_rt_proc_ops); } #else static inline int ip_rt_proc_init(void) { return 0; } #endif /* CONFIG_PROC_FS */ static inline bool rt_is_expired(const struct rtable *rth) { return rth->rt_genid != rt_genid_ipv4(dev_net(rth->dst.dev)); } void rt_cache_flush(struct net *net) { rt_genid_bump_ipv4(net); } static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst, struct sk_buff *skb, const void *daddr) { const struct rtable *rt = container_of(dst, struct rtable, dst); struct net_device *dev = dst->dev; struct neighbour *n; rcu_read_lock(); if (likely(rt->rt_gw_family == AF_INET)) { n = ip_neigh_gw4(dev, rt->rt_gw4); } else if (rt->rt_gw_family == AF_INET6) { n = ip_neigh_gw6(dev, &rt->rt_gw6); } else { __be32 pkey; pkey = skb ? ip_hdr(skb)->daddr : *((__be32 *) daddr); n = ip_neigh_gw4(dev, pkey); } if (!IS_ERR(n) && !refcount_inc_not_zero(&n->refcnt)) n = NULL; rcu_read_unlock(); return n; } static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) { const struct rtable *rt = container_of(dst, struct rtable, dst); struct net_device *dev = dst->dev; const __be32 *pkey = daddr; if (rt->rt_gw_family == AF_INET) { pkey = (const __be32 *)&rt->rt_gw4; } else if (rt->rt_gw_family == AF_INET6) { return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6); } else if (!daddr || (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) { return; } __ipv4_confirm_neigh(dev, *(__force u32 *)pkey); } /* Hash tables of size 2048..262144 depending on RAM size. * Each bucket uses 8 bytes. */ static u32 ip_idents_mask __read_mostly; static atomic_t *ip_idents __read_mostly; static u32 *ip_tstamps __read_mostly; /* In order to protect privacy, we add a perturbation to identifiers * if one generator is seldom used. This makes hard for an attacker * to infer how many packets were sent between two points in time. */ static u32 ip_idents_reserve(u32 hash, int segs) { u32 bucket, old, now = (u32)jiffies; atomic_t *p_id; u32 *p_tstamp; u32 delta = 0; bucket = hash & ip_idents_mask; p_tstamp = ip_tstamps + bucket; p_id = ip_idents + bucket; old = READ_ONCE(*p_tstamp); if (old != now && cmpxchg(p_tstamp, old, now) == old) delta = get_random_u32_below(now - old); /* If UBSAN reports an error there, please make sure your compiler * supports -fno-strict-overflow before reporting it that was a bug * in UBSAN, and it has been fixed in GCC-8. */ return atomic_add_return(segs + delta, p_id) - segs; } void __ip_select_ident(struct net *net, struct iphdr *iph, int segs) { u32 hash, id; /* Note the following code is not safe, but this is okay. */ if (unlikely(siphash_key_is_zero(&net->ipv4.ip_id_key))) get_random_bytes(&net->ipv4.ip_id_key, sizeof(net->ipv4.ip_id_key)); hash = siphash_3u32((__force u32)iph->daddr, (__force u32)iph->saddr, iph->protocol, &net->ipv4.ip_id_key); id = ip_idents_reserve(hash, segs); iph->id = htons(id); } EXPORT_SYMBOL(__ip_select_ident); static void __build_flow_key(const struct net *net, struct flowi4 *fl4, const struct sock *sk, const struct iphdr *iph, int oif, __u8 tos, u8 prot, u32 mark, int flow_flags) { __u8 scope = RT_SCOPE_UNIVERSE; if (sk) { oif = sk->sk_bound_dev_if; mark = READ_ONCE(sk->sk_mark); tos = ip_sock_rt_tos(sk); scope = ip_sock_rt_scope(sk); prot = inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW : sk->sk_protocol; } flowi4_init_output(fl4, oif, mark, tos & INET_DSCP_MASK, scope, prot, flow_flags, iph->daddr, iph->saddr, 0, 0, sock_net_uid(net, sk)); } static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb, const struct sock *sk) { const struct net *net = dev_net(skb->dev); const struct iphdr *iph = ip_hdr(skb); int oif = skb->dev->ifindex; u8 prot = iph->protocol; u32 mark = skb->mark; __u8 tos = iph->tos; __build_flow_key(net, fl4, sk, iph, oif, tos, prot, mark, 0); } static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); const struct ip_options_rcu *inet_opt; __be32 daddr = inet->inet_daddr; rcu_read_lock(); inet_opt = rcu_dereference(inet->inet_opt); if (inet_opt && inet_opt->opt.srr) daddr = inet_opt->opt.faddr; flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk), ip_sock_rt_scope(sk), inet_test_bit(HDRINCL, sk) ? IPPROTO_RAW : sk->sk_protocol, inet_sk_flowi_flags(sk), daddr, inet->inet_saddr, 0, 0, sk->sk_uid); rcu_read_unlock(); } static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk, const struct sk_buff *skb) { if (skb) build_skb_flow_key(fl4, skb, sk); else build_sk_flow_key(fl4, sk); } static DEFINE_SPINLOCK(fnhe_lock); static void fnhe_flush_routes(struct fib_nh_exception *fnhe) { struct rtable *rt; rt = rcu_dereference(fnhe->fnhe_rth_input); if (rt) { RCU_INIT_POINTER(fnhe->fnhe_rth_input, NULL); dst_dev_put(&rt->dst); dst_release(&rt->dst); } rt = rcu_dereference(fnhe->fnhe_rth_output); if (rt) { RCU_INIT_POINTER(fnhe->fnhe_rth_output, NULL); dst_dev_put(&rt->dst); dst_release(&rt->dst); } } static void fnhe_remove_oldest(struct fnhe_hash_bucket *hash) { struct fib_nh_exception __rcu **fnhe_p, **oldest_p; struct fib_nh_exception *fnhe, *oldest = NULL; for (fnhe_p = &hash->chain; ; fnhe_p = &fnhe->fnhe_next) { fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); if (!fnhe) break; if (!oldest || time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp)) { oldest = fnhe; oldest_p = fnhe_p; } } fnhe_flush_routes(oldest); *oldest_p = oldest->fnhe_next; kfree_rcu(oldest, rcu); } static u32 fnhe_hashfun(__be32 daddr) { static siphash_aligned_key_t fnhe_hash_key; u64 hval; net_get_random_once(&fnhe_hash_key, sizeof(fnhe_hash_key)); hval = siphash_1u32((__force u32)daddr, &fnhe_hash_key); return hash_64(hval, FNHE_HASH_SHIFT); } static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe) { rt->rt_pmtu = fnhe->fnhe_pmtu; rt->rt_mtu_locked = fnhe->fnhe_mtu_locked; rt->dst.expires = fnhe->fnhe_expires; if (fnhe->fnhe_gw) { rt->rt_flags |= RTCF_REDIRECTED; rt->rt_uses_gateway = 1; rt->rt_gw_family = AF_INET; rt->rt_gw4 = fnhe->fnhe_gw; } } static void update_or_create_fnhe(struct fib_nh_common *nhc, __be32 daddr, __be32 gw, u32 pmtu, bool lock, unsigned long expires) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; struct rtable *rt; u32 genid, hval; unsigned int i; int depth; genid = fnhe_genid(dev_net(nhc->nhc_dev)); hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); hash = rcu_dereference(nhc->nhc_exceptions); if (!hash) { hash = kcalloc(FNHE_HASH_SIZE, sizeof(*hash), GFP_ATOMIC); if (!hash) goto out_unlock; rcu_assign_pointer(nhc->nhc_exceptions, hash); } hash += hval; depth = 0; for (fnhe = rcu_dereference(hash->chain); fnhe; fnhe = rcu_dereference(fnhe->fnhe_next)) { if (fnhe->fnhe_daddr == daddr) break; depth++; } if (fnhe) { if (fnhe->fnhe_genid != genid) fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; if (pmtu) { fnhe->fnhe_pmtu = pmtu; fnhe->fnhe_mtu_locked = lock; } fnhe->fnhe_expires = max(1UL, expires); /* Update all cached dsts too */ rt = rcu_dereference(fnhe->fnhe_rth_input); if (rt) fill_route_from_fnhe(rt, fnhe); rt = rcu_dereference(fnhe->fnhe_rth_output); if (rt) fill_route_from_fnhe(rt, fnhe); } else { /* Randomize max depth to avoid some side channels attacks. */ int max_depth = FNHE_RECLAIM_DEPTH + get_random_u32_below(FNHE_RECLAIM_DEPTH); while (depth > max_depth) { fnhe_remove_oldest(hash); depth--; } fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC); if (!fnhe) goto out_unlock; fnhe->fnhe_next = hash->chain; fnhe->fnhe_genid = genid; fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; fnhe->fnhe_mtu_locked = lock; fnhe->fnhe_expires = max(1UL, expires); rcu_assign_pointer(hash->chain, fnhe); /* Exception created; mark the cached routes for the nexthop * stale, so anyone caching it rechecks if this exception * applies to them. */ rt = rcu_dereference(nhc->nhc_rth_input); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; for_each_possible_cpu(i) { struct rtable __rcu **prt; prt = per_cpu_ptr(nhc->nhc_pcpu_rth_output, i); rt = rcu_dereference(*prt); if (rt) rt->dst.obsolete = DST_OBSOLETE_KILL; } } fnhe->fnhe_stamp = jiffies; out_unlock: spin_unlock_bh(&fnhe_lock); } static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4, bool kill_route) { __be32 new_gw = icmp_hdr(skb)->un.gateway; __be32 old_gw = ip_hdr(skb)->saddr; struct net_device *dev = skb->dev; struct in_device *in_dev; struct fib_result res; struct neighbour *n; struct net *net; switch (icmp_hdr(skb)->code & 7) { case ICMP_REDIR_NET: case ICMP_REDIR_NETTOS: case ICMP_REDIR_HOST: case ICMP_REDIR_HOSTTOS: break; default: return; } if (rt->rt_gw_family != AF_INET || rt->rt_gw4 != old_gw) return; in_dev = __in_dev_get_rcu(dev); if (!in_dev) return; net = dev_net(dev); if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) || ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) || ipv4_is_zeronet(new_gw)) goto reject_redirect; if (!IN_DEV_SHARED_MEDIA(in_dev)) { if (!inet_addr_onlink(in_dev, new_gw, old_gw)) goto reject_redirect; if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev)) goto reject_redirect; } else { if (inet_addr_type(net, new_gw) != RTN_UNICAST) goto reject_redirect; } n = __ipv4_neigh_lookup(rt->dst.dev, (__force u32)new_gw); if (!n) n = neigh_create(&arp_tbl, &new_gw, rt->dst.dev); if (!IS_ERR(n)) { if (!(READ_ONCE(n->nud_state) & NUD_VALID)) { neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh_common *nhc; fib_select_path(net, &res, fl4, skb); nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); } if (kill_route) rt->dst.obsolete = DST_OBSOLETE_KILL; call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n); } neigh_release(n); } return; reject_redirect: #ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev)) { const struct iphdr *iph = (const struct iphdr *) skb->data; __be32 daddr = iph->daddr; __be32 saddr = iph->saddr; net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n" " Advised path = %pI4 -> %pI4\n", &old_gw, dev->name, &new_gw, &saddr, &daddr); } #endif ; } static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb) { struct rtable *rt; struct flowi4 fl4; const struct iphdr *iph = (const struct iphdr *) skb->data; struct net *net = dev_net(skb->dev); int oif = skb->dev->ifindex; u8 prot = iph->protocol; u32 mark = skb->mark; __u8 tos = iph->tos; rt = dst_rtable(dst); __build_flow_key(net, &fl4, sk, iph, oif, tos, prot, mark, 0); __ip_do_redirect(rt, skb, &fl4, true); } static void ipv4_negative_advice(struct sock *sk, struct dst_entry *dst) { struct rtable *rt = dst_rtable(dst); if ((dst->obsolete > 0) || (rt->rt_flags & RTCF_REDIRECTED) || rt->dst.expires) sk_dst_reset(sk); } /* * Algorithm: * 1. The first ip_rt_redirect_number redirects are sent * with exponential backoff, then we stop sending them at all, * assuming that the host ignores our redirects. * 2. If we did not see packets requiring redirects * during ip_rt_redirect_silence, we assume that the host * forgot redirected route and start to send redirects again. * * This algorithm is much cheaper and more intelligent than dumb load limiting * in icmp.c. * * NOTE. Do not forget to inhibit load limiting for redirects (redundant) * and "frag. need" (breaks PMTU discovery) in icmp.c. */ void ip_rt_send_redirect(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); struct in_device *in_dev; struct inet_peer *peer; struct net *net; int log_martians; int vif; rcu_read_lock(); in_dev = __in_dev_get_rcu(rt->dst.dev); if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) { rcu_read_unlock(); return; } log_martians = IN_DEV_LOG_MARTIANS(in_dev); vif = l3mdev_master_ifindex_rcu(rt->dst.dev); rcu_read_unlock(); net = dev_net(rt->dst.dev); peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif, 1); if (!peer) { icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt_nexthop(rt, ip_hdr(skb)->daddr)); return; } /* No redirected packets during ip_rt_redirect_silence; * reset the algorithm. */ if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) { peer->rate_tokens = 0; peer->n_redirects = 0; } /* Too many ignored redirects; do not send anything * set dst.rate_last to the last seen redirected packet. */ if (peer->n_redirects >= ip_rt_redirect_number) { peer->rate_last = jiffies; goto out_put_peer; } /* Check for load limit; set rate_last to the latest sent * redirect. */ if (peer->n_redirects == 0 || time_after(jiffies, (peer->rate_last + (ip_rt_redirect_load << peer->n_redirects)))) { __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr); icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->n_redirects; if (IS_ENABLED(CONFIG_IP_ROUTE_VERBOSE) && log_martians && peer->n_redirects == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), &ip_hdr(skb)->daddr, &gw); } out_put_peer: inet_putpeer(peer); } static int ip_error(struct sk_buff *skb) { struct rtable *rt = skb_rtable(skb); struct net_device *dev = skb->dev; struct in_device *in_dev; struct inet_peer *peer; unsigned long now; struct net *net; SKB_DR(reason); bool send; int code; if (netif_is_l3_master(skb->dev)) { dev = __dev_get_by_index(dev_net(skb->dev), IPCB(skb)->iif); if (!dev) goto out; } in_dev = __in_dev_get_rcu(dev); /* IP on this device is disabled. */ if (!in_dev) goto out; net = dev_net(rt->dst.dev); if (!IN_DEV_FORWARD(in_dev)) { switch (rt->dst.error) { case EHOSTUNREACH: SKB_DR_SET(reason, IP_INADDRERRORS); __IP_INC_STATS(net, IPSTATS_MIB_INADDRERRORS); break; case ENETUNREACH: SKB_DR_SET(reason, IP_INNOROUTES); __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; } goto out; } switch (rt->dst.error) { case EINVAL: default: goto out; case EHOSTUNREACH: code = ICMP_HOST_UNREACH; break; case ENETUNREACH: code = ICMP_NET_UNREACH; SKB_DR_SET(reason, IP_INNOROUTES); __IP_INC_STATS(net, IPSTATS_MIB_INNOROUTES); break; case EACCES: code = ICMP_PKT_FILTERED; break; } peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, l3mdev_master_ifindex(skb->dev), 1); send = true; if (peer) { now = jiffies; peer->rate_tokens += now - peer->rate_last; if (peer->rate_tokens > ip_rt_error_burst) peer->rate_tokens = ip_rt_error_burst; peer->rate_last = now; if (peer->rate_tokens >= ip_rt_error_cost) peer->rate_tokens -= ip_rt_error_cost; else send = false; inet_putpeer(peer); } if (send) icmp_send(skb, ICMP_DEST_UNREACH, code, 0); out: kfree_skb_reason(skb, reason); return 0; } static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { struct dst_entry *dst = &rt->dst; struct net *net = dev_net(dst->dev); struct fib_result res; bool lock = false; u32 old_mtu; if (ip_mtu_locked(dst)) return; old_mtu = ipv4_mtu(dst); if (old_mtu < mtu) return; if (mtu < net->ipv4.ip_rt_min_pmtu) { lock = true; mtu = min(old_mtu, net->ipv4.ip_rt_min_pmtu); } if (rt->rt_pmtu == mtu && !lock && time_before(jiffies, dst->expires - net->ipv4.ip_rt_mtu_expires / 2)) return; rcu_read_lock(); if (fib_lookup(net, fl4, &res, 0) == 0) { struct fib_nh_common *nhc; fib_select_path(net, &res, fl4, NULL); #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fib_info_num_path(res.fi) > 1) { int nhsel; for (nhsel = 0; nhsel < fib_info_num_path(res.fi); nhsel++) { nhc = fib_info_nhc(res.fi, nhsel); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + net->ipv4.ip_rt_mtu_expires); } rcu_read_unlock(); return; } #endif /* CONFIG_IP_ROUTE_MULTIPATH */ nhc = FIB_RES_NHC(res); update_or_create_fnhe(nhc, fl4->daddr, 0, mtu, lock, jiffies + net->ipv4.ip_rt_mtu_expires); } rcu_read_unlock(); } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu, bool confirm_neigh) { struct rtable *rt = dst_rtable(dst); struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); /* Don't make lookup fail for bridged encapsulations */ if (skb && netif_is_any_bridge_port(skb->dev)) fl4.flowi4_oif = 0; __ip_rt_update_pmtu(rt, &fl4, mtu); } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, int oif, u8 protocol) { const struct iphdr *iph = (const struct iphdr *)skb->data; struct flowi4 fl4; struct rtable *rt; u32 mark = IP4_REPLY_MARK(net, skb->mark); __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, mark, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_rt_update_pmtu(rt, &fl4, mtu); ip_rt_put(rt); } } EXPORT_SYMBOL_GPL(ipv4_update_pmtu); static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) { const struct iphdr *iph = (const struct iphdr *)skb->data; struct flowi4 fl4; struct rtable *rt; __build_flow_key(sock_net(sk), &fl4, sk, iph, 0, 0, 0, 0, 0); if (!fl4.flowi4_mark) fl4.flowi4_mark = IP4_REPLY_MARK(sock_net(sk), skb->mark); rt = __ip_route_output_key(sock_net(sk), &fl4); if (!IS_ERR(rt)) { __ip_rt_update_pmtu(rt, &fl4, mtu); ip_rt_put(rt); } } void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu) { const struct iphdr *iph = (const struct iphdr *)skb->data; struct flowi4 fl4; struct rtable *rt; struct dst_entry *odst = NULL; bool new = false; struct net *net = sock_net(sk); bh_lock_sock(sk); if (!ip_sk_accept_pmtu(sk)) goto out; odst = sk_dst_get(sk); if (sock_owned_by_user(sk) || !odst) { __ipv4_sk_update_pmtu(skb, sk, mtu); goto out; } __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = dst_rtable(odst); if (odst->obsolete && !odst->ops->check(odst, 0)) { rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; new = true; } __ip_rt_update_pmtu(dst_rtable(xfrm_dst_path(&rt->dst)), &fl4, mtu); if (!dst_check(&rt->dst, 0)) { if (new) dst_release(&rt->dst); rt = ip_route_output_flow(sock_net(sk), &fl4, sk); if (IS_ERR(rt)) goto out; new = true; } if (new) sk_dst_set(sk, &rt->dst); out: bh_unlock_sock(sk); dst_release(odst); } EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu); void ipv4_redirect(struct sk_buff *skb, struct net *net, int oif, u8 protocol) { const struct iphdr *iph = (const struct iphdr *)skb->data; struct flowi4 fl4; struct rtable *rt; __build_flow_key(net, &fl4, NULL, iph, oif, iph->tos, protocol, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); } } EXPORT_SYMBOL_GPL(ipv4_redirect); void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk) { const struct iphdr *iph = (const struct iphdr *)skb->data; struct flowi4 fl4; struct rtable *rt; struct net *net = sock_net(sk); __build_flow_key(net, &fl4, sk, iph, 0, 0, 0, 0, 0); rt = __ip_route_output_key(net, &fl4); if (!IS_ERR(rt)) { __ip_do_redirect(rt, skb, &fl4, false); ip_rt_put(rt); } } EXPORT_SYMBOL_GPL(ipv4_sk_redirect); INDIRECT_CALLABLE_SCOPE struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie) { struct rtable *rt = dst_rtable(dst); /* All IPV4 dsts are created with ->obsolete set to the value * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. * * When a PMTU/redirect information update invalidates a route, * this is indicated by setting obsolete to DST_OBSOLETE_KILL or * DST_OBSOLETE_DEAD. */ if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt)) return NULL; return dst; } EXPORT_INDIRECT_CALLABLE(ipv4_dst_check); static void ipv4_send_dest_unreach(struct sk_buff *skb) { struct net_device *dev; struct ip_options opt; int res; /* Recompile ip options since IPCB may not be valid anymore. * Also check we have a reasonable ipv4 header. */ if (!pskb_network_may_pull(skb, sizeof(struct iphdr)) || ip_hdr(skb)->version != 4 || ip_hdr(skb)->ihl < 5) return; memset(&opt, 0, sizeof(opt)); if (ip_hdr(skb)->ihl > 5) { if (!pskb_network_may_pull(skb, ip_hdr(skb)->ihl * 4)) return; opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr); rcu_read_lock(); dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev; res = __ip_options_compile(dev_net(dev), &opt, skb, NULL); rcu_read_unlock(); if (res) return; } __icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0, &opt); } static void ipv4_link_failure(struct sk_buff *skb) { struct rtable *rt; ipv4_send_dest_unreach(skb); rt = skb_rtable(skb); if (rt) dst_set_expires(&rt->dst, 0); } static int ip_rt_bug(struct net *net, struct sock *sk, struct sk_buff *skb) { pr_debug("%s: %pI4 -> %pI4, %s\n", __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr, skb->dev ? skb->dev->name : "?"); kfree_skb(skb); WARN_ON(1); return 0; } /* * We do not cache source address of outgoing interface, * because it is used only by IP RR, TS and SRR options, * so that it out of fast path. * * BTW remember: "addr" is allowed to be not aligned * in IP options! */ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) { __be32 src; if (rt_is_output_route(rt)) src = ip_hdr(skb)->saddr; else { struct fib_result res; struct iphdr *iph = ip_hdr(skb); struct flowi4 fl4 = { .daddr = iph->daddr, .saddr = iph->saddr, .flowi4_tos = inet_dscp_to_dsfield(ip4h_dscp(iph)), .flowi4_oif = rt->dst.dev->ifindex, .flowi4_iif = skb->dev->ifindex, .flowi4_mark = skb->mark, }; rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) src = fib_result_prefsrc(dev_net(rt->dst.dev), &res); else src = inet_select_addr(rt->dst.dev, rt_nexthop(rt, iph->daddr), RT_SCOPE_UNIVERSE); rcu_read_unlock(); } memcpy(addr, &src, 4); } #ifdef CONFIG_IP_ROUTE_CLASSID static void set_class_tag(struct rtable *rt, u32 tag) { if (!(rt->dst.tclassid & 0xFFFF)) rt->dst.tclassid |= tag & 0xFFFF; if (!(rt->dst.tclassid & 0xFFFF0000)) rt->dst.tclassid |= tag & 0xFFFF0000; } #endif static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { struct net *net = dev_net(dst->dev); unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, net->ipv4.ip_rt_min_advmss); return min(advmss, IPV4_MAX_PMTU - header_size); } INDIRECT_CALLABLE_SCOPE unsigned int ipv4_mtu(const struct dst_entry *dst) { return ip_dst_mtu_maybe_forward(dst, false); } EXPORT_INDIRECT_CALLABLE(ipv4_mtu); static void ip_del_fnhe(struct fib_nh_common *nhc, __be32 daddr) { struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe, __rcu **fnhe_p; u32 hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); hash = rcu_dereference_protected(nhc->nhc_exceptions, lockdep_is_held(&fnhe_lock)); hash += hval; fnhe_p = &hash->chain; fnhe = rcu_dereference_protected(*fnhe_p, lockdep_is_held(&fnhe_lock)); while (fnhe) { if (fnhe->fnhe_daddr == daddr) { rcu_assign_pointer(*fnhe_p, rcu_dereference_protected( fnhe->fnhe_next, lockdep_is_held(&fnhe_lock))); /* set fnhe_daddr to 0 to ensure it won't bind with * new dsts in rt_bind_exception(). */ fnhe->fnhe_daddr = 0; fnhe_flush_routes(fnhe); kfree_rcu(fnhe, rcu); break; } fnhe_p = &fnhe->fnhe_next; fnhe = rcu_dereference_protected(fnhe->fnhe_next, lockdep_is_held(&fnhe_lock)); } spin_unlock_bh(&fnhe_lock); } static struct fib_nh_exception *find_exception(struct fib_nh_common *nhc, __be32 daddr) { struct fnhe_hash_bucket *hash = rcu_dereference(nhc->nhc_exceptions); struct fib_nh_exception *fnhe; u32 hval; if (!hash) return NULL; hval = fnhe_hashfun(daddr); for (fnhe = rcu_dereference(hash[hval].chain); fnhe; fnhe = rcu_dereference(fnhe->fnhe_next)) { if (fnhe->fnhe_daddr == daddr) { if (fnhe->fnhe_expires && time_after(jiffies, fnhe->fnhe_expires)) { ip_del_fnhe(nhc, daddr); break; } return fnhe; } } return NULL; } /* MTU selection: * 1. mtu on route is locked - use it * 2. mtu from nexthop exception * 3. mtu from egress device */ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) { struct fib_nh_common *nhc = res->nhc; struct net_device *dev = nhc->nhc_dev; struct fib_info *fi = res->fi; u32 mtu = 0; if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) || fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU)) mtu = fi->fib_mtu; if (likely(!mtu)) { struct fib_nh_exception *fnhe; fnhe = find_exception(nhc, daddr); if (fnhe && !time_after_eq(jiffies, fnhe->fnhe_expires)) mtu = fnhe->fnhe_pmtu; } if (likely(!mtu)) mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu); } static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, __be32 daddr, const bool do_cache) { bool ret = false; spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { struct rtable __rcu **porig; struct rtable *orig; int genid = fnhe_genid(dev_net(rt->dst.dev)); if (rt_is_input_route(rt)) porig = &fnhe->fnhe_rth_input; else porig = &fnhe->fnhe_rth_output; orig = rcu_dereference(*porig); if (fnhe->fnhe_genid != genid) { fnhe->fnhe_genid = genid; fnhe->fnhe_gw = 0; fnhe->fnhe_pmtu = 0; fnhe->fnhe_expires = 0; fnhe->fnhe_mtu_locked = false; fnhe_flush_routes(fnhe); orig = NULL; } fill_route_from_fnhe(rt, fnhe); if (!rt->rt_gw4) { rt->rt_gw4 = daddr; rt->rt_gw_family = AF_INET; } if (do_cache) { dst_hold(&rt->dst); rcu_assign_pointer(*porig, rt); if (orig) { dst_dev_put(&orig->dst); dst_release(&orig->dst); } ret = true; } fnhe->fnhe_stamp = jiffies; } spin_unlock_bh(&fnhe_lock); return ret; } static bool rt_cache_route(struct fib_nh_common *nhc, struct rtable *rt) { struct rtable *orig, *prev, **p; bool ret = true; if (rt_is_input_route(rt)) { p = (struct rtable **)&nhc->nhc_rth_input; } else { p = (struct rtable **)raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } orig = *p; /* hold dst before doing cmpxchg() to avoid race condition * on this dst */ dst_hold(&rt->dst); prev = cmpxchg(p, orig, rt); if (prev == orig) { if (orig) { rt_add_uncached_list(orig); dst_release(&orig->dst); } } else { dst_release(&rt->dst); ret = false; } return ret; } struct uncached_list { spinlock_t lock; struct list_head head; }; static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list); void rt_add_uncached_list(struct rtable *rt) { struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list); rt->dst.rt_uncached_list = ul; spin_lock_bh(&ul->lock); list_add_tail(&rt->dst.rt_uncached, &ul->head); spin_unlock_bh(&ul->lock); } void rt_del_uncached_list(struct rtable *rt) { if (!list_empty(&rt->dst.rt_uncached)) { struct uncached_list *ul = rt->dst.rt_uncached_list; spin_lock_bh(&ul->lock); list_del_init(&rt->dst.rt_uncached); spin_unlock_bh(&ul->lock); } } static void ipv4_dst_destroy(struct dst_entry *dst) { ip_dst_metrics_put(dst); rt_del_uncached_list(dst_rtable(dst)); } void rt_flush_dev(struct net_device *dev) { struct rtable *rt, *safe; int cpu; for_each_possible_cpu(cpu) { struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); if (list_empty(&ul->head)) continue; spin_lock_bh(&ul->lock); list_for_each_entry_safe(rt, safe, &ul->head, dst.rt_uncached) { if (rt->dst.dev != dev) continue; rt->dst.dev = blackhole_netdev; netdev_ref_replace(dev, blackhole_netdev, &rt->dst.dev_tracker, GFP_ATOMIC); list_del_init(&rt->dst.rt_uncached); } spin_unlock_bh(&ul->lock); } } static bool rt_cache_valid(const struct rtable *rt) { return rt && rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && !rt_is_expired(rt); } static void rt_set_nexthop(struct rtable *rt, __be32 daddr, const struct fib_result *res, struct fib_nh_exception *fnhe, struct fib_info *fi, u16 type, u32 itag, const bool do_cache) { bool cached = false; if (fi) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); if (nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK) { rt->rt_uses_gateway = 1; rt->rt_gw_family = nhc->nhc_gw_family; /* only INET and INET6 are supported */ if (likely(nhc->nhc_gw_family == AF_INET)) rt->rt_gw4 = nhc->nhc_gw.ipv4; else rt->rt_gw6 = nhc->nhc_gw.ipv6; } ip_dst_init_metrics(&rt->dst, fi->fib_metrics); #ifdef CONFIG_IP_ROUTE_CLASSID if (nhc->nhc_family == AF_INET) { struct fib_nh *nh; nh = container_of(nhc, struct fib_nh, nh_common); rt->dst.tclassid = nh->nh_tclassid; } #endif rt->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (unlikely(fnhe)) cached = rt_bind_exception(rt, fnhe, daddr, do_cache); else if (do_cache) cached = rt_cache_route(nhc, rt); if (unlikely(!cached)) { /* Routes we intend to cache in nexthop exception or * FIB nexthop have the DST_NOCACHE bit clear. * However, if we are unsuccessful at storing this * route into the cache we really need to set it. */ if (!rt->rt_gw4) { rt->rt_gw_family = AF_INET; rt->rt_gw4 = daddr; } rt_add_uncached_list(rt); } } else rt_add_uncached_list(rt); #ifdef CONFIG_IP_ROUTE_CLASSID #ifdef CONFIG_IP_MULTIPLE_TABLES set_class_tag(rt, res->tclassid); #endif set_class_tag(rt, itag); #endif } struct rtable *rt_dst_alloc(struct net_device *dev, unsigned int flags, u16 type, bool noxfrm) { struct rtable *rt; rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, (noxfrm ? DST_NOXFRM : 0)); if (rt) { rt->rt_genid = rt_genid_ipv4(dev_net(dev)); rt->rt_flags = flags; rt->rt_type = type; rt->rt_is_input = 0; rt->rt_iif = 0; rt->rt_pmtu = 0; rt->rt_mtu_locked = 0; rt->rt_uses_gateway = 0; rt->rt_gw_family = 0; rt->rt_gw4 = 0; rt->dst.output = ip_output; if (flags & RTCF_LOCAL) rt->dst.input = ip_local_deliver; } return rt; } EXPORT_SYMBOL(rt_dst_alloc); struct rtable *rt_dst_clone(struct net_device *dev, struct rtable *rt) { struct rtable *new_rt; new_rt = dst_alloc(&ipv4_dst_ops, dev, DST_OBSOLETE_FORCE_CHK, rt->dst.flags); if (new_rt) { new_rt->rt_genid = rt_genid_ipv4(dev_net(dev)); new_rt->rt_flags = rt->rt_flags; new_rt->rt_type = rt->rt_type; new_rt->rt_is_input = rt->rt_is_input; new_rt->rt_iif = rt->rt_iif; new_rt->rt_pmtu = rt->rt_pmtu; new_rt->rt_mtu_locked = rt->rt_mtu_locked; new_rt->rt_gw_family = rt->rt_gw_family; if (rt->rt_gw_family == AF_INET) new_rt->rt_gw4 = rt->rt_gw4; else if (rt->rt_gw_family == AF_INET6) new_rt->rt_gw6 = rt->rt_gw6; new_rt->dst.input = rt->dst.input; new_rt->dst.output = rt->dst.output; new_rt->dst.error = rt->dst.error; new_rt->dst.lastuse = jiffies; new_rt->dst.lwtstate = lwtstate_get(rt->dst.lwtstate); } return new_rt; } EXPORT_SYMBOL(rt_dst_clone); /* called in rcu_read_lock() section */ enum skb_drop_reason ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, struct in_device *in_dev, u32 *itag) { enum skb_drop_reason reason; /* Primary sanity checks. */ if (!in_dev) return SKB_DROP_REASON_NOT_SPECIFIED; if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) return SKB_DROP_REASON_IP_INVALID_SOURCE; if (skb->protocol != htons(ETH_P_IP)) return SKB_DROP_REASON_INVALID_PROTO; if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) return SKB_DROP_REASON_IP_LOCALNET; if (ipv4_is_zeronet(saddr)) { if (!ipv4_is_local_multicast(daddr) && ip_hdr(skb)->protocol != IPPROTO_IGMP) return SKB_DROP_REASON_IP_INVALID_SOURCE; } else { reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0, dev, in_dev, itag); if (reason) return reason; } return SKB_NOT_DROPPED_YET; } /* called in rcu_read_lock() section */ static enum skb_drop_reason ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, int our) { struct in_device *in_dev = __in_dev_get_rcu(dev); unsigned int flags = RTCF_MULTICAST; enum skb_drop_reason reason; struct rtable *rth; u32 itag = 0; reason = ip_mc_validate_source(skb, daddr, saddr, dscp, dev, in_dev, &itag); if (reason) return reason; if (our) flags |= RTCF_LOCAL; if (IN_DEV_ORCONF(in_dev, NOPOLICY)) IPCB(skb)->flags |= IPSKB_NOPOLICY; rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, false); if (!rth) return SKB_DROP_REASON_NOMEM; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif rth->dst.output = ip_rt_bug; rth->rt_is_input= 1; #ifdef CONFIG_IP_MROUTE if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) rth->dst.input = ip_mr_input; #endif RT_CACHE_STAT_INC(in_slow_mc); skb_dst_drop(skb); skb_dst_set(skb, &rth->dst); return SKB_NOT_DROPPED_YET; } static void ip_handle_martian_source(struct net_device *dev, struct in_device *in_dev, struct sk_buff *skb, __be32 daddr, __be32 saddr) { RT_CACHE_STAT_INC(in_martian_src); #ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) { /* * RFC1812 recommendation, if source is martian, * the only hint is MAC header. */ pr_warn("martian source %pI4 from %pI4, on dev %s\n", &daddr, &saddr, dev->name); if (dev->hard_header_len && skb_mac_header_was_set(skb)) { print_hex_dump(KERN_WARNING, "ll header: ", DUMP_PREFIX_OFFSET, 16, 1, skb_mac_header(skb), dev->hard_header_len, false); } } #endif } /* called in rcu_read_lock() section */ static enum skb_drop_reason __mkroute_input(struct sk_buff *skb, const struct fib_result *res, struct in_device *in_dev, __be32 daddr, __be32 saddr, dscp_t dscp) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct fib_nh_common *nhc = FIB_RES_NHC(*res); struct net_device *dev = nhc->nhc_dev; struct fib_nh_exception *fnhe; struct rtable *rth; int err; struct in_device *out_dev; bool do_cache; u32 itag = 0; /* get a working reference to the output device */ out_dev = __in_dev_get_rcu(dev); if (!out_dev) { net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); return reason; } err = fib_validate_source(skb, saddr, daddr, dscp, FIB_RES_OIF(*res), in_dev->dev, in_dev, &itag); if (err < 0) { reason = -err; ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr, saddr); goto cleanup; } do_cache = res->fi && !itag; if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && skb->protocol == htons(ETH_P_IP)) { __be32 gw; gw = nhc->nhc_gw_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; if (IN_DEV_SHARED_MEDIA(out_dev) || inet_addr_onlink(out_dev, saddr, gw)) IPCB(skb)->flags |= IPSKB_DOREDIRECT; } if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is * invalid for proxy arp. DNAT routes are always valid. * * Proxy arp feature have been extended to allow, ARP * replies back to the same interface, to support * Private VLAN switch technologies. See arp.c. */ if (out_dev == in_dev && IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) { reason = SKB_DROP_REASON_ARP_PVLAN_DISABLE; goto cleanup; } } if (IN_DEV_ORCONF(in_dev, NOPOLICY)) IPCB(skb)->flags |= IPSKB_NOPOLICY; fnhe = find_exception(nhc, daddr); if (do_cache) { if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); else rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; } } rth = rt_dst_alloc(out_dev->dev, 0, res->type, IN_DEV_ORCONF(out_dev, NOXFRM)); if (!rth) { reason = SKB_DROP_REASON_NOMEM; goto cleanup; } rth->rt_is_input = 1; RT_CACHE_STAT_INC(in_slow_tot); rth->dst.input = ip_forward; rt_set_nexthop(rth, daddr, res, fnhe, res->fi, res->type, itag, do_cache); lwtunnel_set_redirect(&rth->dst); skb_dst_set(skb, &rth->dst); out: reason = SKB_NOT_DROPPED_YET; cleanup: return reason; } #ifdef CONFIG_IP_ROUTE_MULTIPATH /* To make ICMP packets follow the right flow, the multipath hash is * calculated from the inner IP addresses. */ static void ip_multipath_l3_keys(const struct sk_buff *skb, struct flow_keys *hash_keys) { const struct iphdr *outer_iph = ip_hdr(skb); const struct iphdr *key_iph = outer_iph; const struct iphdr *inner_iph; const struct icmphdr *icmph; struct iphdr _inner_iph; struct icmphdr _icmph; if (likely(outer_iph->protocol != IPPROTO_ICMP)) goto out; if (unlikely((outer_iph->frag_off & htons(IP_OFFSET)) != 0)) goto out; icmph = skb_header_pointer(skb, outer_iph->ihl * 4, sizeof(_icmph), &_icmph); if (!icmph) goto out; if (!icmp_is_err(icmph->type)) goto out; inner_iph = skb_header_pointer(skb, outer_iph->ihl * 4 + sizeof(_icmph), sizeof(_inner_iph), &_inner_iph); if (!inner_iph) goto out; key_iph = inner_iph; out: hash_keys->addrs.v4addrs.src = key_iph->saddr; hash_keys->addrs.v4addrs.dst = key_iph->daddr; } static u32 fib_multipath_custom_hash_outer(const struct net *net, const struct sk_buff *skb, bool *p_has_inner) { u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys keys, hash_keys; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) return 0; memset(&hash_keys, 0, sizeof(hash_keys)); skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) hash_keys.basic.ip_proto = keys.basic.ip_proto; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) hash_keys.ports.src = keys.ports.src; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = keys.ports.dst; *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 fib_multipath_custom_hash_inner(const struct net *net, const struct sk_buff *skb, bool has_inner) { u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys keys, hash_keys; /* We assume the packet carries an encapsulation, but if none was * encountered during dissection of the outer flow, then there is no * point in calling the flow dissector again. */ if (!has_inner) return 0; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) return 0; memset(&hash_keys, 0, sizeof(hash_keys)); skb_flow_dissect_flow_keys(skb, &keys, 0); if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) return 0; if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) hash_keys.tags.flow_label = keys.tags.flow_label; } if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) hash_keys.basic.ip_proto = keys.basic.ip_proto; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) hash_keys.ports.src = keys.ports.src; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) hash_keys.ports.dst = keys.ports.dst; return fib_multipath_hash_from_keys(net, &hash_keys); } static u32 fib_multipath_custom_hash_skb(const struct net *net, const struct sk_buff *skb) { u32 mhash, mhash_inner; bool has_inner = true; mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner); mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner); return jhash_2words(mhash, mhash_inner, 0); } static u32 fib_multipath_custom_hash_fl4(const struct net *net, const struct flowi4 *fl4) { u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields); struct flow_keys hash_keys; if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) return 0; memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) hash_keys.addrs.v4addrs.src = fl4->saddr; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) hash_keys.addrs.v4addrs.dst = fl4->daddr; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) hash_keys.basic.ip_proto = fl4->flowi4_proto; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) hash_keys.ports.src = fl4->fl4_sport; if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) hash_keys.ports.dst = fl4->fl4_dport; return fib_multipath_hash_from_keys(net, &hash_keys); } /* if skb is set it will be used and fl4 can be NULL */ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys) { u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0; struct flow_keys hash_keys; u32 mhash = 0; switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) { case 0: memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; if (skb) { ip_multipath_l3_keys(skb, &hash_keys); } else { hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 1: /* skb is currently provided only when forwarding */ if (skb) { unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP; struct flow_keys keys; /* short-circuit if we already have L4 hash present */ if (skb->l4_hash) return skb_get_hash_raw(skb) >> 1; memset(&hash_keys, 0, sizeof(hash_keys)); if (!flkeys) { skb_flow_dissect_flow_keys(skb, &keys, flag); flkeys = &keys; } hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.addrs.v4addrs.src = flkeys->addrs.v4addrs.src; hash_keys.addrs.v4addrs.dst = flkeys->addrs.v4addrs.dst; hash_keys.ports.src = flkeys->ports.src; hash_keys.ports.dst = flkeys->ports.dst; hash_keys.basic.ip_proto = flkeys->basic.ip_proto; } else { memset(&hash_keys, 0, sizeof(hash_keys)); hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; hash_keys.ports.src = fl4->fl4_sport; hash_keys.ports.dst = fl4->fl4_dport; hash_keys.basic.ip_proto = fl4->flowi4_proto; } mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); /* skb is currently provided only when forwarding */ if (skb) { struct flow_keys keys; skb_flow_dissect_flow_keys(skb, &keys, 0); /* Inner can be v4 or v6 */ if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; hash_keys.tags.flow_label = keys.tags.flow_label; hash_keys.basic.ip_proto = keys.basic.ip_proto; } else { /* Same as case 0 */ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; ip_multipath_l3_keys(skb, &hash_keys); } } else { /* Same as case 0 */ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } mhash = fib_multipath_hash_from_keys(net, &hash_keys); break; case 3: if (skb) mhash = fib_multipath_custom_hash_skb(net, skb); else mhash = fib_multipath_custom_hash_fl4(net, fl4); break; } if (multipath_hash) mhash = jhash_2words(mhash, multipath_hash, 0); return mhash >> 1; } #endif /* CONFIG_IP_ROUTE_MULTIPATH */ static enum skb_drop_reason ip_mkroute_input(struct sk_buff *skb, struct fib_result *res, struct in_device *in_dev, __be32 daddr, __be32 saddr, dscp_t dscp, struct flow_keys *hkeys) { #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res->fi && fib_info_num_path(res->fi) > 1) { int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys); fib_select_multipath(res, h); IPCB(skb)->flags |= IPSKB_MULTIPATH; } #endif /* create a routing cache entry */ return __mkroute_input(skb, res, in_dev, daddr, saddr, dscp); } /* Implements all the saddr-related checks as ip_route_input_slow(), * assuming daddr is valid and the destination is not a local broadcast one. * Uses the provided hint instead of performing a route lookup. */ enum skb_drop_reason ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, const struct sk_buff *hint) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct in_device *in_dev = __in_dev_get_rcu(dev); struct rtable *rt = skb_rtable(hint); struct net *net = dev_net(dev); u32 tag = 0; if (!in_dev) return reason; if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) { reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; } if (ipv4_is_zeronet(saddr)) { reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; } if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { reason = SKB_DROP_REASON_IP_LOCALNET; goto martian_source; } if (rt->rt_type != RTN_LOCAL) goto skip_validate_source; reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev, in_dev, &tag); if (reason) goto martian_source; skip_validate_source: skb_dst_copy(skb, hint); return SKB_NOT_DROPPED_YET; martian_source: ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); return reason; } /* get device for dst_alloc with local routes */ static struct net_device *ip_rt_get_dev(struct net *net, const struct fib_result *res) { struct fib_nh_common *nhc = res->fi ? res->nhc : NULL; struct net_device *dev = NULL; if (nhc) dev = l3mdev_master_dev_rcu(nhc->nhc_dev); return dev ? : net->loopback_dev; } /* * NOTE. We drop all the packets that has local source * addresses, because every properly looped back packet * must have correct destination already attached by output routine. * Changes in the enforced policies must be applied also to * ip_route_use_hint(). * * Such approach solves two big problems: * 1. Not simplex devices are handled properly. * 2. IP spoofing attempts are filtered with 100% of guarantee. * called with rcu_read_lock() */ static enum skb_drop_reason ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, struct fib_result *res) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct in_device *in_dev = __in_dev_get_rcu(dev); struct flow_keys *flkeys = NULL, _flkeys; struct net *net = dev_net(dev); struct ip_tunnel_info *tun_info; int err = -EINVAL; unsigned int flags = 0; u32 itag = 0; struct rtable *rth; struct flowi4 fl4; bool do_cache = true; /* IP on this device is disabled. */ if (!in_dev) goto out; /* Check for the most weird martians, which can be not detected * by fib_lookup. */ tun_info = skb_tunnel_info(skb); if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) fl4.flowi4_tun_key.tun_id = tun_info->key.tun_id; else fl4.flowi4_tun_key.tun_id = 0; skb_dst_drop(skb); if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr)) { reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; } res->fi = NULL; res->table = NULL; if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0)) goto brd_input; /* Accept zero addresses only to limited broadcast; * I even do not know to fix it or not. Waiting for complains :-) */ if (ipv4_is_zeronet(saddr)) { reason = SKB_DROP_REASON_IP_INVALID_SOURCE; goto martian_source; } if (ipv4_is_zeronet(daddr)) { reason = SKB_DROP_REASON_IP_INVALID_DEST; goto martian_destination; } /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(), * and call it once if daddr or/and saddr are loopback addresses */ if (ipv4_is_loopback(daddr)) { if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { reason = SKB_DROP_REASON_IP_LOCALNET; goto martian_destination; } } else if (ipv4_is_loopback(saddr)) { if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net)) { reason = SKB_DROP_REASON_IP_LOCALNET; goto martian_source; } } /* * Now we are ready to route packet. */ fl4.flowi4_l3mdev = 0; fl4.flowi4_oif = 0; fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = inet_dscp_to_dsfield(dscp); fl4.flowi4_scope = RT_SCOPE_UNIVERSE; fl4.flowi4_flags = 0; fl4.daddr = daddr; fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); fl4.flowi4_multipath_hash = 0; if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { flkeys = &_flkeys; } else { fl4.flowi4_proto = 0; fl4.fl4_sport = 0; fl4.fl4_dport = 0; } err = fib_lookup(net, &fl4, res, 0); if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) err = -EHOSTUNREACH; goto no_route; } if (res->type == RTN_BROADCAST) { if (IN_DEV_BFORWARD(in_dev)) goto make_route; /* not do cache if bc_forwarding is enabled */ if (IPV4_DEVCONF_ALL_RO(net, BC_FORWARDING)) do_cache = false; goto brd_input; } err = -EINVAL; if (res->type == RTN_LOCAL) { reason = fib_validate_source_reason(skb, saddr, daddr, dscp, 0, dev, in_dev, &itag); if (reason) goto martian_source; goto local_input; } if (!IN_DEV_FORWARD(in_dev)) { err = -EHOSTUNREACH; goto no_route; } if (res->type != RTN_UNICAST) { reason = SKB_DROP_REASON_IP_INVALID_DEST; goto martian_destination; } make_route: reason = ip_mkroute_input(skb, res, in_dev, daddr, saddr, dscp, flkeys); out: return reason; brd_input: if (skb->protocol != htons(ETH_P_IP)) { reason = SKB_DROP_REASON_INVALID_PROTO; goto out; } if (!ipv4_is_zeronet(saddr)) { reason = fib_validate_source_reason(skb, saddr, 0, dscp, 0, dev, in_dev, &itag); if (reason) goto martian_source; } flags |= RTCF_BROADCAST; res->type = RTN_BROADCAST; RT_CACHE_STAT_INC(in_brd); local_input: if (IN_DEV_ORCONF(in_dev, NOPOLICY)) IPCB(skb)->flags |= IPSKB_NOPOLICY; do_cache &= res->fi && !itag; if (do_cache) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); rth = rcu_dereference(nhc->nhc_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); reason = SKB_NOT_DROPPED_YET; goto out; } } rth = rt_dst_alloc(ip_rt_get_dev(net, res), flags | RTCF_LOCAL, res->type, false); if (!rth) goto e_nobufs; rth->dst.output= ip_rt_bug; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; #endif rth->rt_is_input = 1; RT_CACHE_STAT_INC(in_slow_tot); if (res->type == RTN_UNREACHABLE) { rth->dst.input= ip_error; rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } if (do_cache) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (lwtunnel_input_redirect(rth->dst.lwtstate)) { WARN_ON(rth->dst.input == lwtunnel_input); rth->dst.lwtstate->orig_input = rth->dst.input; rth->dst.input = lwtunnel_input; } if (unlikely(!rt_cache_route(nhc, rth))) rt_add_uncached_list(rth); } skb_dst_set(skb, &rth->dst); reason = SKB_NOT_DROPPED_YET; goto out; no_route: RT_CACHE_STAT_INC(in_no_route); res->type = RTN_UNREACHABLE; res->fi = NULL; res->table = NULL; goto local_input; /* * Do not cache martian addresses: they should be logged (RFC1812) */ martian_destination: RT_CACHE_STAT_INC(in_martian_dst); #ifdef CONFIG_IP_ROUTE_VERBOSE if (IN_DEV_LOG_MARTIANS(in_dev)) net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n", &daddr, &saddr, dev->name); #endif e_nobufs: reason = SKB_DROP_REASON_NOMEM; goto out; martian_source: ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); goto out; } /* called with rcu_read_lock held */ static enum skb_drop_reason ip_route_input_rcu(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev, struct fib_result *res) { /* Multicast recognition logic is moved from route cache to here. * The problem was that too many Ethernet cards have broken/missing * hardware multicast filters :-( As result the host on multicasting * network acquires a lot of useless route cache entries, sort of * SDR messages from all the world. Now we try to get rid of them. * Really, provided software IP multicast filter is organized * reasonably (at least, hashed), it does not result in a slowdown * comparing with route cache reject entries. * Note, that multicast routers are not affected, because * route cache entry is created eventually. */ if (ipv4_is_multicast(daddr)) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; struct in_device *in_dev = __in_dev_get_rcu(dev); int our = 0; if (!in_dev) return -EINVAL; our = ip_check_mc_rcu(in_dev, daddr, saddr, ip_hdr(skb)->protocol); /* check l3 master if no match yet */ if (!our && netif_is_l3_slave(dev)) { struct in_device *l3_in_dev; l3_in_dev = __in_dev_get_rcu(skb->dev); if (l3_in_dev) our = ip_check_mc_rcu(l3_in_dev, daddr, saddr, ip_hdr(skb)->protocol); } if (our #ifdef CONFIG_IP_MROUTE || (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev)) #endif ) { reason = ip_route_input_mc(skb, daddr, saddr, dscp, dev, our); } return reason; } return ip_route_input_slow(skb, daddr, saddr, dscp, dev, res); } enum skb_drop_reason ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr, dscp_t dscp, struct net_device *dev) { enum skb_drop_reason reason; struct fib_result res; rcu_read_lock(); reason = ip_route_input_rcu(skb, daddr, saddr, dscp, dev, &res); rcu_read_unlock(); return reason; } EXPORT_SYMBOL(ip_route_input_noref); /* called with rcu_read_lock() */ static struct rtable *__mkroute_output(const struct fib_result *res, const struct flowi4 *fl4, int orig_oif, struct net_device *dev_out, unsigned int flags) { struct fib_info *fi = res->fi; struct fib_nh_exception *fnhe; struct in_device *in_dev; u16 type = res->type; struct rtable *rth; bool do_cache; in_dev = __in_dev_get_rcu(dev_out); if (!in_dev) return ERR_PTR(-EINVAL); if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK) && !netif_is_l3_master(dev_out)) return ERR_PTR(-EINVAL); if (ipv4_is_lbcast(fl4->daddr)) type = RTN_BROADCAST; else if (ipv4_is_multicast(fl4->daddr)) type = RTN_MULTICAST; else if (ipv4_is_zeronet(fl4->daddr)) return ERR_PTR(-EINVAL); if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; do_cache = true; if (type == RTN_BROADCAST) { flags |= RTCF_BROADCAST | RTCF_LOCAL; fi = NULL; } else if (type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr, fl4->flowi4_proto)) flags &= ~RTCF_LOCAL; else do_cache = false; /* If multicast route do not exist use * default one, but do not gateway in this case. * Yes, it is hack. */ if (fi && res->prefixlen < 4) fi = NULL; } else if ((type == RTN_LOCAL) && (orig_oif != 0) && (orig_oif != dev_out->ifindex)) { /* For local routes that require a particular output interface * we do not want to cache the result. Caching the result * causes incorrect behaviour when there are multiple source * addresses on the interface, the end result being that if the * intended recipient is waiting on that interface for the * packet he won't receive it because it will be delivered on * the loopback interface and the IP_PKTINFO ipi_ifindex will * be set to the loopback interface as well. */ do_cache = false; } fnhe = NULL; do_cache &= fi != NULL; if (fi) { struct fib_nh_common *nhc = FIB_RES_NHC(*res); struct rtable __rcu **prth; fnhe = find_exception(nhc, fl4->daddr); if (!do_cache) goto add; if (fnhe) { prth = &fnhe->fnhe_rth_output; } else { if (unlikely(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH && !(nhc->nhc_gw_family && nhc->nhc_scope == RT_SCOPE_LINK))) { do_cache = false; goto add; } prth = raw_cpu_ptr(nhc->nhc_pcpu_rth_output); } rth = rcu_dereference(*prth); if (rt_cache_valid(rth) && dst_hold_safe(&rth->dst)) return rth; } add: rth = rt_dst_alloc(dev_out, flags, type, IN_DEV_ORCONF(in_dev, NOXFRM)); if (!rth) return ERR_PTR(-ENOBUFS); rth->rt_iif = orig_oif; RT_CACHE_STAT_INC(out_slow_tot); if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) { if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) { rth->dst.output = ip_mc_output; RT_CACHE_STAT_INC(out_slow_mc); } #ifdef CONFIG_IP_MROUTE if (type == RTN_MULTICAST) { if (IN_DEV_MFORWARD(in_dev) && !ipv4_is_local_multicast(fl4->daddr)) { rth->dst.input = ip_mr_input; rth->dst.output = ip_mc_output; } } #endif } rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0, do_cache); lwtunnel_set_redirect(&rth->dst); return rth; } /* * Major route resolver routine. */ struct rtable *ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, const struct sk_buff *skb) { struct fib_result res = { .type = RTN_UNSPEC, .fi = NULL, .table = NULL, .tclassid = 0, }; struct rtable *rth; fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos &= INET_DSCP_MASK; rcu_read_lock(); rth = ip_route_output_key_hash_rcu(net, fl4, &res, skb); rcu_read_unlock(); return rth; } EXPORT_SYMBOL_GPL(ip_route_output_key_hash); struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, struct fib_result *res, const struct sk_buff *skb) { struct net_device *dev_out = NULL; int orig_oif = fl4->flowi4_oif; unsigned int flags = 0; struct rtable *rth; int err; if (fl4->saddr) { if (ipv4_is_multicast(fl4->saddr) || ipv4_is_lbcast(fl4->saddr) || ipv4_is_zeronet(fl4->saddr)) { rth = ERR_PTR(-EINVAL); goto out; } rth = ERR_PTR(-ENETUNREACH); /* I removed check for oif == dev_out->oif here. * It was wrong for two reasons: * 1. ip_dev_find(net, saddr) can return wrong iface, if saddr * is assigned to multiple interfaces. * 2. Moreover, we are allowed to send packets with saddr * of another iface. --ANK */ if (fl4->flowi4_oif == 0 && (ipv4_is_multicast(fl4->daddr) || ipv4_is_lbcast(fl4->daddr))) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ dev_out = __ip_dev_find(net, fl4->saddr, false); if (!dev_out) goto out; /* Special hack: user can direct multicasts * and limited broadcast via necessary interface * without fiddling with IP_MULTICAST_IF or IP_PKTINFO. * This hack is not just for fun, it allows * vic,vat and friends to work. * They bind socket to loopback, set ttl to zero * and expect that it will work. * From the viewpoint of routing cache they are broken, * because we are not allowed to build multicast path * with loopback source addr (look, routing cache * cannot know, that ttl is zero, so that packet * will not leave this host and route is valid). * Luckily, this hack is good workaround. */ fl4->flowi4_oif = dev_out->ifindex; goto make_route; } if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) { /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ if (!__ip_dev_find(net, fl4->saddr, false)) goto out; } } if (fl4->flowi4_oif) { dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif); rth = ERR_PTR(-ENODEV); if (!dev_out) goto out; /* RACE: Check return value of inet_select_addr instead. */ if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) { rth = ERR_PTR(-ENETUNREACH); goto out; } if (ipv4_is_local_multicast(fl4->daddr) || ipv4_is_lbcast(fl4->daddr) || fl4->flowi4_proto == IPPROTO_IGMP) { if (!fl4->saddr) fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); goto make_route; } if (!fl4->saddr) { if (ipv4_is_multicast(fl4->daddr)) fl4->saddr = inet_select_addr(dev_out, 0, fl4->flowi4_scope); else if (!fl4->daddr) fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); } } if (!fl4->daddr) { fl4->daddr = fl4->saddr; if (!fl4->daddr) fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; fl4->flowi4_oif = LOOPBACK_IFINDEX; res->type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; } err = fib_lookup(net, fl4, res, 0); if (err) { res->fi = NULL; res->table = NULL; if (fl4->flowi4_oif && (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) { /* Apparently, routing tables are wrong. Assume, * that the destination is on link. * * WHY? DW. * Because we are allowed to send to iface * even if it has NO routes and NO assigned * addresses. When oif is specified, routing * tables are looked up with only one purpose: * to catch if destination is gatewayed, rather than * direct. Moreover, if MSG_DONTROUTE is set, * we send packet, ignoring both routing tables * and ifaddr state. --ANK * * * We could make it even if oif is unknown, * likely IPv6, but we do not. */ if (fl4->saddr == 0) fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_LINK); res->type = RTN_UNICAST; goto make_route; } rth = ERR_PTR(err); goto out; } if (res->type == RTN_LOCAL) { if (!fl4->saddr) { if (res->fi->fib_prefsrc) fl4->saddr = res->fi->fib_prefsrc; else fl4->saddr = fl4->daddr; } /* L3 master device is the loopback for that domain */ dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? : net->loopback_dev; /* make sure orig_oif points to fib result device even * though packet rx/tx happens over loopback or l3mdev */ orig_oif = FIB_RES_OIF(*res); fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; } fib_select_path(net, res, fl4, skb); dev_out = FIB_RES_DEV(*res); make_route: rth = __mkroute_output(res, fl4, orig_oif, dev_out, flags); out: return rth; } static struct dst_ops ipv4_dst_blackhole_ops = { .family = AF_INET, .default_advmss = ipv4_default_advmss, .neigh_lookup = ipv4_neigh_lookup, .check = dst_blackhole_check, .cow_metrics = dst_blackhole_cow_metrics, .update_pmtu = dst_blackhole_update_pmtu, .redirect = dst_blackhole_redirect, .mtu = dst_blackhole_mtu, }; struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig) { struct rtable *ort = dst_rtable(dst_orig); struct rtable *rt; rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, DST_OBSOLETE_DEAD, 0); if (rt) { struct dst_entry *new = &rt->dst; new->__use = 1; new->input = dst_discard; new->output = dst_discard_out; new->dev = net->loopback_dev; netdev_hold(new->dev, &new->dev_tracker, GFP_ATOMIC); rt->rt_is_input = ort->rt_is_input; rt->rt_iif = ort->rt_iif; rt->rt_pmtu = ort->rt_pmtu; rt->rt_mtu_locked = ort->rt_mtu_locked; rt->rt_genid = rt_genid_ipv4(net); rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_uses_gateway = ort->rt_uses_gateway; rt->rt_gw_family = ort->rt_gw_family; if (rt->rt_gw_family == AF_INET) rt->rt_gw4 = ort->rt_gw4; else if (rt->rt_gw_family == AF_INET6) rt->rt_gw6 = ort->rt_gw6; } dst_release(dst_orig); return rt ? &rt->dst : ERR_PTR(-ENOMEM); } struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, const struct sock *sk) { struct rtable *rt = __ip_route_output_key(net, flp4); if (IS_ERR(rt)) return rt; if (flp4->flowi4_proto) { flp4->flowi4_oif = rt->dst.dev->ifindex; rt = dst_rtable(xfrm_lookup_route(net, &rt->dst, flowi4_to_flowi(flp4), sk, 0)); } return rt; } EXPORT_SYMBOL_GPL(ip_route_output_flow); /* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, struct rtable *rt, u32 table_id, dscp_t dscp, struct flowi4 *fl4, struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags) { struct rtmsg *r; struct nlmsghdr *nlh; unsigned long expires = 0; u32 error; u32 metrics[RTAX_MAX]; nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags); if (!nlh) return -EMSGSIZE; r = nlmsg_data(nlh); r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = inet_dscp_to_dsfield(dscp); r->rtm_table = table_id < 256 ? table_id : RT_TABLE_COMPAT; if (nla_put_u32(skb, RTA_TABLE, table_id)) goto nla_put_failure; r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; if (IPCB(skb)->flags & IPSKB_DOREDIRECT) r->rtm_flags |= RTCF_DOREDIRECT; if (nla_put_in_addr(skb, RTA_DST, dst)) goto nla_put_failure; if (src) { r->rtm_src_len = 32; if (nla_put_in_addr(skb, RTA_SRC, src)) goto nla_put_failure; } if (rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) goto nla_put_failure; if (rt->dst.lwtstate && lwtunnel_fill_encap(skb, rt->dst.lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0) goto nla_put_failure; #ifdef CONFIG_IP_ROUTE_CLASSID if (rt->dst.tclassid && nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid)) goto nla_put_failure; #endif if (fl4 && !rt_is_input_route(rt) && fl4->saddr != src) { if (nla_put_in_addr(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } if (rt->rt_uses_gateway) { if (rt->rt_gw_family == AF_INET && nla_put_in_addr(skb, RTA_GATEWAY, rt->rt_gw4)) { goto nla_put_failure; } else if (rt->rt_gw_family == AF_INET6) { int alen = sizeof(struct in6_addr); struct nlattr *nla; struct rtvia *via; nla = nla_reserve(skb, RTA_VIA, alen + 2); if (!nla) goto nla_put_failure; via = nla_data(nla); via->rtvia_family = AF_INET6; memcpy(via->rtvia_addr, &rt->rt_gw6, alen); } } expires = rt->dst.expires; if (expires) { unsigned long now = jiffies; if (time_before(now, expires)) expires -= now; else expires = 0; } memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); if (rt->rt_pmtu && expires) metrics[RTAX_MTU - 1] = rt->rt_pmtu; if (rt->rt_mtu_locked && expires) metrics[RTAX_LOCK - 1] |= BIT(RTAX_MTU); if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; if (fl4) { if (fl4->flowi4_mark && nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; if (!uid_eq(fl4->flowi4_uid, INVALID_UID) && nla_put_u32(skb, RTA_UID, from_kuid_munged(current_user_ns(), fl4->flowi4_uid))) goto nla_put_failure; if (rt_is_input_route(rt)) { #ifdef CONFIG_IP_MROUTE if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) && IPV4_DEVCONF_ALL_RO(net, MC_FORWARDING)) { int err = ipmr_get_route(net, skb, fl4->saddr, fl4->daddr, r, portid); if (err <= 0) { if (err == 0) return 0; goto nla_put_failure; } } else #endif if (nla_put_u32(skb, RTA_IIF, fl4->flowi4_iif)) goto nla_put_failure; } } error = rt->dst.error; if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0) goto nla_put_failure; nlmsg_end(skb, nlh); return 0; nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; } static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fnhe_hash_bucket *bucket, int genid, int *fa_index, int fa_start, unsigned int flags) { int i; for (i = 0; i < FNHE_HASH_SIZE; i++) { struct fib_nh_exception *fnhe; for (fnhe = rcu_dereference(bucket[i].chain); fnhe; fnhe = rcu_dereference(fnhe->fnhe_next)) { struct rtable *rt; int err; if (*fa_index < fa_start) goto next; if (fnhe->fnhe_genid != genid) goto next; if (fnhe->fnhe_expires && time_after(jiffies, fnhe->fnhe_expires)) goto next; rt = rcu_dereference(fnhe->fnhe_rth_input); if (!rt) rt = rcu_dereference(fnhe->fnhe_rth_output); if (!rt) goto next; err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt, table_id, 0, NULL, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags); if (err) return err; next: (*fa_index)++; } } return 0; } int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fib_info *fi, int *fa_index, int fa_start, unsigned int flags) { struct net *net = sock_net(cb->skb->sk); int nhsel, genid = fnhe_genid(net); for (nhsel = 0; nhsel < fib_info_num_path(fi); nhsel++) { struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); struct fnhe_hash_bucket *bucket; int err; if (nhc->nhc_flags & RTNH_F_DEAD) continue; rcu_read_lock(); bucket = rcu_dereference(nhc->nhc_exceptions); err = 0; if (bucket) err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, genid, fa_index, fa_start, flags); rcu_read_unlock(); if (err) return err; } return 0; } static struct sk_buff *inet_rtm_getroute_build_skb(__be32 src, __be32 dst, u8 ip_proto, __be16 sport, __be16 dport) { struct sk_buff *skb; struct iphdr *iph; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return NULL; /* Reserve room for dummy headers, this skb can pass * through good chunk of routing engine. */ skb_reset_mac_header(skb); skb_reset_network_header(skb); skb->protocol = htons(ETH_P_IP); iph = skb_put(skb, sizeof(struct iphdr)); iph->protocol = ip_proto; iph->saddr = src; iph->daddr = dst; iph->version = 0x4; iph->frag_off = 0; iph->ihl = 0x5; skb_set_transport_header(skb, skb->len); switch (iph->protocol) { case IPPROTO_UDP: { struct udphdr *udph; udph = skb_put_zero(skb, sizeof(struct udphdr)); udph->source = sport; udph->dest = dport; udph->len = htons(sizeof(struct udphdr)); udph->check = 0; break; } case IPPROTO_TCP: { struct tcphdr *tcph; tcph = skb_put_zero(skb, sizeof(struct tcphdr)); tcph->source = sport; tcph->dest = dport; tcph->doff = sizeof(struct tcphdr) / 4; tcph->rst = 1; tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), src, dst, 0); break; } case IPPROTO_ICMP: { struct icmphdr *icmph; icmph = skb_put_zero(skb, sizeof(struct icmphdr)); icmph->type = ICMP_ECHO; icmph->code = 0; } } return skb; } static int inet_rtm_valid_getroute_req(struct sk_buff *skb, const struct nlmsghdr *nlh, struct nlattr **tb, struct netlink_ext_ack *extack) { struct rtmsg *rtm; int i, err; if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*rtm))) { NL_SET_ERR_MSG(extack, "ipv4: Invalid header for route get request"); return -EINVAL; } if (!netlink_strict_get_check(skb)) return nlmsg_parse_deprecated(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, extack); rtm = nlmsg_data(nlh); if ((rtm->rtm_src_len && rtm->rtm_src_len != 32) || (rtm->rtm_dst_len && rtm->rtm_dst_len != 32) || rtm->rtm_table || rtm->rtm_protocol || rtm->rtm_scope || rtm->rtm_type) { NL_SET_ERR_MSG(extack, "ipv4: Invalid values in header for route get request"); return -EINVAL; } if (rtm->rtm_flags & ~(RTM_F_NOTIFY | RTM_F_LOOKUP_TABLE | RTM_F_FIB_MATCH)) { NL_SET_ERR_MSG(extack, "ipv4: Unsupported rtm_flags for route get request"); return -EINVAL; } err = nlmsg_parse_deprecated_strict(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy, extack); if (err) return err; if ((tb[RTA_SRC] && !rtm->rtm_src_len) || (tb[RTA_DST] && !rtm->rtm_dst_len)) { NL_SET_ERR_MSG(extack, "ipv4: rtm_src_len and rtm_dst_len must be 32 for IPv4"); return -EINVAL; } for (i = 0; i <= RTA_MAX; i++) { if (!tb[i]) continue; switch (i) { case RTA_IIF: case RTA_OIF: case RTA_SRC: case RTA_DST: case RTA_IP_PROTO: case RTA_SPORT: case RTA_DPORT: case RTA_MARK: case RTA_UID: break; default: NL_SET_ERR_MSG(extack, "ipv4: Unsupported attribute in route get request"); return -EINVAL; } } return 0; } static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX+1]; u32 table_id = RT_TABLE_MAIN; __be16 sport = 0, dport = 0; struct fib_result res = {}; u8 ip_proto = IPPROTO_UDP; struct rtable *rt = NULL; struct sk_buff *skb; struct rtmsg *rtm; struct flowi4 fl4 = {}; __be32 dst = 0; __be32 src = 0; kuid_t uid; u32 iif; int err; int mark; err = inet_rtm_valid_getroute_req(in_skb, nlh, tb, extack); if (err < 0) return err; rtm = nlmsg_data(nlh); src = nla_get_in_addr_default(tb[RTA_SRC], 0); dst = nla_get_in_addr_default(tb[RTA_DST], 0); iif = nla_get_u32_default(tb[RTA_IIF], 0); mark = nla_get_u32_default(tb[RTA_MARK], 0); if (tb[RTA_UID]) uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID])); else uid = (iif ? INVALID_UID : current_uid()); if (tb[RTA_IP_PROTO]) { err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO], &ip_proto, AF_INET, extack); if (err) return err; } if (tb[RTA_SPORT]) sport = nla_get_be16(tb[RTA_SPORT]); if (tb[RTA_DPORT]) dport = nla_get_be16(tb[RTA_DPORT]); skb = inet_rtm_getroute_build_skb(src, dst, ip_proto, sport, dport); if (!skb) return -ENOBUFS; fl4.daddr = dst; fl4.saddr = src; fl4.flowi4_tos = rtm->rtm_tos & INET_DSCP_MASK; fl4.flowi4_oif = nla_get_u32_default(tb[RTA_OIF], 0); fl4.flowi4_mark = mark; fl4.flowi4_uid = uid; if (sport) fl4.fl4_sport = sport; if (dport) fl4.fl4_dport = dport; fl4.flowi4_proto = ip_proto; rcu_read_lock(); if (iif) { struct net_device *dev; dev = dev_get_by_index_rcu(net, iif); if (!dev) { err = -ENODEV; goto errout_rcu; } fl4.flowi4_iif = iif; /* for rt_fill_info */ skb->dev = dev; skb->mark = mark; err = ip_route_input_rcu(skb, dst, src, inet_dsfield_to_dscp(rtm->rtm_tos), dev, &res) ? -EINVAL : 0; rt = skb_rtable(skb); if (err == 0 && rt->dst.error) err = -rt->dst.error; } else { fl4.flowi4_iif = LOOPBACK_IFINDEX; skb->dev = net->loopback_dev; rt = ip_route_output_key_hash_rcu(net, &fl4, &res, skb); err = 0; if (IS_ERR(rt)) err = PTR_ERR(rt); else skb_dst_set(skb, &rt->dst); } if (err) goto errout_rcu; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) table_id = res.table ? res.table->tb_id : 0; /* reset skb for netlink reply msg */ skb_trim(skb, 0); skb_reset_network_header(skb); skb_reset_transport_header(skb); skb_reset_mac_header(skb); if (rtm->rtm_flags & RTM_F_FIB_MATCH) { struct fib_rt_info fri; if (!res.fi) { err = fib_props[res.type].error; if (!err) err = -EHOSTUNREACH; goto errout_rcu; } fri.fi = res.fi; fri.tb_id = table_id; fri.dst = res.prefix; fri.dst_len = res.prefixlen; fri.dscp = res.dscp; fri.type = rt->rt_type; fri.offload = 0; fri.trap = 0; fri.offload_failed = 0; if (res.fa_head) { struct fib_alias *fa; hlist_for_each_entry_rcu(fa, res.fa_head, fa_list) { u8 slen = 32 - fri.dst_len; if (fa->fa_slen == slen && fa->tb_id == fri.tb_id && fa->fa_dscp == fri.dscp && fa->fa_info == res.fi && fa->fa_type == fri.type) { fri.offload = READ_ONCE(fa->offload); fri.trap = READ_ONCE(fa->trap); fri.offload_failed = READ_ONCE(fa->offload_failed); break; } } } err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, &fri, 0); } else { err = rt_fill_info(net, dst, src, rt, table_id, res.dscp, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0); } if (err < 0) goto errout_rcu; rcu_read_unlock(); err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout_free: return err; errout_rcu: rcu_read_unlock(); kfree_skb(skb); goto errout_free; } void ip_rt_multicast_event(struct in_device *in_dev) { rt_cache_flush(dev_net(in_dev->dev)); } #ifdef CONFIG_SYSCTL static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_gc_elasticity __read_mostly = 8; static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; static int ipv4_sysctl_rtcache_flush(const struct ctl_table *__ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = (struct net *)__ctl->extra1; if (write) { rt_cache_flush(net); fnhe_genid_bump(net); return 0; } return -EINVAL; } static struct ctl_table ipv4_route_table[] = { { .procname = "gc_thresh", .data = &ipv4_dst_ops.gc_thresh, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "max_size", .data = &ip_rt_max_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { /* Deprecated. Use gc_min_interval_ms */ .procname = "gc_min_interval", .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "gc_min_interval_ms", .data = &ip_rt_gc_min_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, }, { .procname = "gc_timeout", .data = &ip_rt_gc_timeout, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "gc_interval", .data = &ip_rt_gc_interval, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "redirect_load", .data = &ip_rt_redirect_load, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "redirect_number", .data = &ip_rt_redirect_number, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "redirect_silence", .data = &ip_rt_redirect_silence, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "error_cost", .data = &ip_rt_error_cost, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "error_burst", .data = &ip_rt_error_burst, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "gc_elasticity", .data = &ip_rt_gc_elasticity, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, }; static const char ipv4_route_flush_procname[] = "flush"; static struct ctl_table ipv4_route_netns_table[] = { { .procname = ipv4_route_flush_procname, .maxlen = sizeof(int), .mode = 0200, .proc_handler = ipv4_sysctl_rtcache_flush, }, { .procname = "min_pmtu", .data = &init_net.ipv4.ip_rt_min_pmtu, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = &ip_min_valid_pmtu, }, { .procname = "mtu_expires", .data = &init_net.ipv4.ip_rt_mtu_expires, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "min_adv_mss", .data = &init_net.ipv4.ip_rt_min_advmss, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, }; static __net_init int sysctl_route_net_init(struct net *net) { struct ctl_table *tbl; size_t table_size = ARRAY_SIZE(ipv4_route_netns_table); tbl = ipv4_route_netns_table; if (!net_eq(net, &init_net)) { int i; tbl = kmemdup(tbl, sizeof(ipv4_route_netns_table), GFP_KERNEL); if (!tbl) goto err_dup; /* Don't export non-whitelisted sysctls to unprivileged users */ if (net->user_ns != &init_user_ns) { if (tbl[0].procname != ipv4_route_flush_procname) table_size = 0; } /* Update the variables to point into the current struct net * except for the first element flush */ for (i = 1; i < table_size; i++) tbl[i].data += (void *)net - (void *)&init_net; } tbl[0].extra1 = net; net->ipv4.route_hdr = register_net_sysctl_sz(net, "net/ipv4/route", tbl, table_size); if (!net->ipv4.route_hdr) goto err_reg; return 0; err_reg: if (tbl != ipv4_route_netns_table) kfree(tbl); err_dup: return -ENOMEM; } static __net_exit void sysctl_route_net_exit(struct net *net) { const struct ctl_table *tbl; tbl = net->ipv4.route_hdr->ctl_table_arg; unregister_net_sysctl_table(net->ipv4.route_hdr); BUG_ON(tbl == ipv4_route_netns_table); kfree(tbl); } static __net_initdata struct pernet_operations sysctl_route_ops = { .init = sysctl_route_net_init, .exit = sysctl_route_net_exit, }; #endif static __net_init int netns_ip_rt_init(struct net *net) { /* Set default value for namespaceified sysctls */ net->ipv4.ip_rt_min_pmtu = DEFAULT_MIN_PMTU; net->ipv4.ip_rt_mtu_expires = DEFAULT_MTU_EXPIRES; net->ipv4.ip_rt_min_advmss = DEFAULT_MIN_ADVMSS; return 0; } static struct pernet_operations __net_initdata ip_rt_ops = { .init = netns_ip_rt_init, }; static __net_init int rt_genid_init(struct net *net) { atomic_set(&net->ipv4.rt_genid, 0); atomic_set(&net->fnhe_genid, 0); atomic_set(&net->ipv4.dev_addr_genid, get_random_u32()); return 0; } static __net_initdata struct pernet_operations rt_genid_ops = { .init = rt_genid_init, }; static int __net_init ipv4_inetpeer_init(struct net *net) { struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL); if (!bp) return -ENOMEM; inet_peer_base_init(bp); net->ipv4.peers = bp; return 0; } static void __net_exit ipv4_inetpeer_exit(struct net *net) { struct inet_peer_base *bp = net->ipv4.peers; net->ipv4.peers = NULL; inetpeer_invalidate_tree(bp); kfree(bp); } static __net_initdata struct pernet_operations ipv4_inetpeer_ops = { .init = ipv4_inetpeer_init, .exit = ipv4_inetpeer_exit, }; #ifdef CONFIG_IP_ROUTE_CLASSID struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; #endif /* CONFIG_IP_ROUTE_CLASSID */ static const struct rtnl_msg_handler ip_rt_rtnl_msg_handlers[] __initconst = { {.protocol = PF_INET, .msgtype = RTM_GETROUTE, .doit = inet_rtm_getroute, .flags = RTNL_FLAG_DOIT_UNLOCKED}, }; int __init ip_rt_init(void) { void *idents_hash; int cpu; /* For modern hosts, this will use 2 MB of memory */ idents_hash = alloc_large_system_hash("IP idents", sizeof(*ip_idents) + sizeof(*ip_tstamps), 0, 16, /* one bucket per 64 KB */ HASH_ZERO, NULL, &ip_idents_mask, 2048, 256*1024); ip_idents = idents_hash; get_random_bytes(ip_idents, (ip_idents_mask + 1) * sizeof(*ip_idents)); ip_tstamps = idents_hash + (ip_idents_mask + 1) * sizeof(*ip_idents); for_each_possible_cpu(cpu) { struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu); INIT_LIST_HEAD(&ul->head); spin_lock_init(&ul->lock); } #ifdef CONFIG_IP_ROUTE_CLASSID ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct)); if (!ip_rt_acct) panic("IP: failed to allocate ip_rt_acct\n"); #endif ipv4_dst_ops.kmem_cachep = KMEM_CACHE(rtable, SLAB_HWCACHE_ALIGN | SLAB_PANIC); ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; if (dst_entries_init(&ipv4_dst_ops) < 0) panic("IP: failed to allocate ipv4_dst_ops counter\n"); if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); ipv4_dst_ops.gc_thresh = ~0; ip_rt_max_size = INT_MAX; devinet_init(); ip_fib_init(); if (ip_rt_proc_init()) pr_err("Unable to create route proc files\n"); #ifdef CONFIG_XFRM xfrm_init(); xfrm4_init(); #endif rtnl_register_many(ip_rt_rtnl_msg_handlers); #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); #endif register_pernet_subsys(&ip_rt_ops); register_pernet_subsys(&rt_genid_ops); register_pernet_subsys(&ipv4_inetpeer_ops); return 0; } #ifdef CONFIG_SYSCTL /* * We really need to sanitize the damn ipv4 init order, then all * this nonsense will go away. */ void __init ip_static_sysctl_init(void) { register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table); } #endif |
6927 120 6814 6928 6928 76 6924 76 6924 2 13 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * linux/drivers/char/serial_core.h * * Copyright (C) 2000 Deep Blue Solutions Ltd. */ #ifndef LINUX_SERIAL_CORE_H #define LINUX_SERIAL_CORE_H #include <linux/bitops.h> #include <linux/compiler.h> #include <linux/console.h> #include <linux/interrupt.h> #include <linux/lockdep.h> #include <linux/printk.h> #include <linux/spinlock.h> #include <linux/sched.h> #include <linux/tty.h> #include <linux/mutex.h> #include <linux/sysrq.h> #include <uapi/linux/serial_core.h> #ifdef CONFIG_SERIAL_CORE_CONSOLE #define uart_console(port) \ ((port)->cons && (port)->cons->index == (port)->line) #else #define uart_console(port) ({ (void)port; 0; }) #endif struct uart_port; struct serial_struct; struct serial_port_device; struct device; struct gpio_desc; /** * struct uart_ops -- interface between serial_core and the driver * * This structure describes all the operations that can be done on the * physical hardware. * * @tx_empty: ``unsigned int ()(struct uart_port *port)`` * * This function tests whether the transmitter fifo and shifter for the * @port is empty. If it is empty, this function should return * %TIOCSER_TEMT, otherwise return 0. If the port does not support this * operation, then it should return %TIOCSER_TEMT. * * Locking: none. * Interrupts: caller dependent. * This call must not sleep * * @set_mctrl: ``void ()(struct uart_port *port, unsigned int mctrl)`` * * This function sets the modem control lines for @port to the state * described by @mctrl. The relevant bits of @mctrl are: * * - %TIOCM_RTS RTS signal. * - %TIOCM_DTR DTR signal. * - %TIOCM_OUT1 OUT1 signal. * - %TIOCM_OUT2 OUT2 signal. * - %TIOCM_LOOP Set the port into loopback mode. * * If the appropriate bit is set, the signal should be driven * active. If the bit is clear, the signal should be driven * inactive. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @get_mctrl: ``unsigned int ()(struct uart_port *port)`` * * Returns the current state of modem control inputs of @port. The state * of the outputs should not be returned, since the core keeps track of * their state. The state information should include: * * - %TIOCM_CAR state of DCD signal * - %TIOCM_CTS state of CTS signal * - %TIOCM_DSR state of DSR signal * - %TIOCM_RI state of RI signal * * The bit is set if the signal is currently driven active. If * the port does not support CTS, DCD or DSR, the driver should * indicate that the signal is permanently active. If RI is * not available, the signal should not be indicated as active. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @stop_tx: ``void ()(struct uart_port *port)`` * * Stop transmitting characters. This might be due to the CTS line * becoming inactive or the tty layer indicating we want to stop * transmission due to an %XOFF character. * * The driver should stop transmitting characters as soon as possible. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @start_tx: ``void ()(struct uart_port *port)`` * * Start transmitting characters. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @throttle: ``void ()(struct uart_port *port)`` * * Notify the serial driver that input buffers for the line discipline are * close to full, and it should somehow signal that no more characters * should be sent to the serial port. * This will be called only if hardware assisted flow control is enabled. * * Locking: serialized with @unthrottle() and termios modification by the * tty layer. * * @unthrottle: ``void ()(struct uart_port *port)`` * * Notify the serial driver that characters can now be sent to the serial * port without fear of overrunning the input buffers of the line * disciplines. * * This will be called only if hardware assisted flow control is enabled. * * Locking: serialized with @throttle() and termios modification by the * tty layer. * * @send_xchar: ``void ()(struct uart_port *port, char ch)`` * * Transmit a high priority character, even if the port is stopped. This * is used to implement XON/XOFF flow control and tcflow(). If the serial * driver does not implement this function, the tty core will append the * character to the circular buffer and then call start_tx() / stop_tx() * to flush the data out. * * Do not transmit if @ch == '\0' (%__DISABLED_CHAR). * * Locking: none. * Interrupts: caller dependent. * * @start_rx: ``void ()(struct uart_port *port)`` * * Start receiving characters. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @stop_rx: ``void ()(struct uart_port *port)`` * * Stop receiving characters; the @port is in the process of being closed. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @enable_ms: ``void ()(struct uart_port *port)`` * * Enable the modem status interrupts. * * This method may be called multiple times. Modem status interrupts * should be disabled when the @shutdown() method is called. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @break_ctl: ``void ()(struct uart_port *port, int ctl)`` * * Control the transmission of a break signal. If @ctl is nonzero, the * break signal should be transmitted. The signal should be terminated * when another call is made with a zero @ctl. * * Locking: caller holds tty_port->mutex * * @startup: ``int ()(struct uart_port *port)`` * * Grab any interrupt resources and initialise any low level driver state. * Enable the port for reception. It should not activate RTS nor DTR; * this will be done via a separate call to @set_mctrl(). * * This method will only be called when the port is initially opened. * * Locking: port_sem taken. * Interrupts: globally disabled. * * @shutdown: ``void ()(struct uart_port *port)`` * * Disable the @port, disable any break condition that may be in effect, * and free any interrupt resources. It should not disable RTS nor DTR; * this will have already been done via a separate call to @set_mctrl(). * * Drivers must not access @port->state once this call has completed. * * This method will only be called when there are no more users of this * @port. * * Locking: port_sem taken. * Interrupts: caller dependent. * * @flush_buffer: ``void ()(struct uart_port *port)`` * * Flush any write buffers, reset any DMA state and stop any ongoing DMA * transfers. * * This will be called whenever the @port->state->xmit circular buffer is * cleared. * * Locking: @port->lock taken. * Interrupts: locally disabled. * This call must not sleep * * @set_termios: ``void ()(struct uart_port *port, struct ktermios *new, * struct ktermios *old)`` * * Change the @port parameters, including word length, parity, stop bits. * Update @port->read_status_mask and @port->ignore_status_mask to * indicate the types of events we are interested in receiving. Relevant * ktermios::c_cflag bits are: * * - %CSIZE - word size * - %CSTOPB - 2 stop bits * - %PARENB - parity enable * - %PARODD - odd parity (when %PARENB is in force) * - %ADDRB - address bit (changed through uart_port::rs485_config()). * - %CREAD - enable reception of characters (if not set, still receive * characters from the port, but throw them away). * - %CRTSCTS - if set, enable CTS status change reporting. * - %CLOCAL - if not set, enable modem status change reporting. * * Relevant ktermios::c_iflag bits are: * * - %INPCK - enable frame and parity error events to be passed to the TTY * layer. * - %BRKINT / %PARMRK - both of these enable break events to be passed to * the TTY layer. * - %IGNPAR - ignore parity and framing errors. * - %IGNBRK - ignore break errors. If %IGNPAR is also set, ignore overrun * errors as well. * * The interaction of the ktermios::c_iflag bits is as follows (parity * error given as an example): * * ============ ======= ======= ========================================= * Parity error INPCK IGNPAR * ============ ======= ======= ========================================= * n/a 0 n/a character received, marked as %TTY_NORMAL * None 1 n/a character received, marked as %TTY_NORMAL * Yes 1 0 character received, marked as %TTY_PARITY * Yes 1 1 character discarded * ============ ======= ======= ========================================= * * Other flags may be used (eg, xon/xoff characters) if your hardware * supports hardware "soft" flow control. * * Locking: caller holds tty_port->mutex * Interrupts: caller dependent. * This call must not sleep * * @set_ldisc: ``void ()(struct uart_port *port, struct ktermios *termios)`` * * Notifier for discipline change. See * Documentation/driver-api/tty/tty_ldisc.rst. * * Locking: caller holds tty_port->mutex * * @pm: ``void ()(struct uart_port *port, unsigned int state, * unsigned int oldstate)`` * * Perform any power management related activities on the specified @port. * @state indicates the new state (defined by enum uart_pm_state), * @oldstate indicates the previous state. * * This function should not be used to grab any resources. * * This will be called when the @port is initially opened and finally * closed, except when the @port is also the system console. This will * occur even if %CONFIG_PM is not set. * * Locking: none. * Interrupts: caller dependent. * * @type: ``const char *()(struct uart_port *port)`` * * Return a pointer to a string constant describing the specified @port, * or return %NULL, in which case the string 'unknown' is substituted. * * Locking: none. * Interrupts: caller dependent. * * @release_port: ``void ()(struct uart_port *port)`` * * Release any memory and IO region resources currently in use by the * @port. * * Locking: none. * Interrupts: caller dependent. * * @request_port: ``int ()(struct uart_port *port)`` * * Request any memory and IO region resources required by the port. If any * fail, no resources should be registered when this function returns, and * it should return -%EBUSY on failure. * * Locking: none. * Interrupts: caller dependent. * * @config_port: ``void ()(struct uart_port *port, int type)`` * * Perform any autoconfiguration steps required for the @port. @type * contains a bit mask of the required configuration. %UART_CONFIG_TYPE * indicates that the port requires detection and identification. * @port->type should be set to the type found, or %PORT_UNKNOWN if no * port was detected. * * %UART_CONFIG_IRQ indicates autoconfiguration of the interrupt signal, * which should be probed using standard kernel autoprobing techniques. * This is not necessary on platforms where ports have interrupts * internally hard wired (eg, system on a chip implementations). * * Locking: none. * Interrupts: caller dependent. * * @verify_port: ``int ()(struct uart_port *port, * struct serial_struct *serinfo)`` * * Verify the new serial port information contained within @serinfo is * suitable for this port type. * * Locking: none. * Interrupts: caller dependent. * * @ioctl: ``int ()(struct uart_port *port, unsigned int cmd, * unsigned long arg)`` * * Perform any port specific IOCTLs. IOCTL commands must be defined using * the standard numbering system found in <asm/ioctl.h>. * * Locking: none. * Interrupts: caller dependent. * * @poll_init: ``int ()(struct uart_port *port)`` * * Called by kgdb to perform the minimal hardware initialization needed to * support @poll_put_char() and @poll_get_char(). Unlike @startup(), this * should not request interrupts. * * Locking: %tty_mutex and tty_port->mutex taken. * Interrupts: n/a. * * @poll_put_char: ``void ()(struct uart_port *port, unsigned char ch)`` * * Called by kgdb to write a single character @ch directly to the serial * @port. It can and should block until there is space in the TX FIFO. * * Locking: none. * Interrupts: caller dependent. * This call must not sleep * * @poll_get_char: ``int ()(struct uart_port *port)`` * * Called by kgdb to read a single character directly from the serial * port. If data is available, it should be returned; otherwise the * function should return %NO_POLL_CHAR immediately. * * Locking: none. * Interrupts: caller dependent. * This call must not sleep */ struct uart_ops { unsigned int (*tx_empty)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int mctrl); unsigned int (*get_mctrl)(struct uart_port *); void (*stop_tx)(struct uart_port *); void (*start_tx)(struct uart_port *); void (*throttle)(struct uart_port *); void (*unthrottle)(struct uart_port *); void (*send_xchar)(struct uart_port *, char ch); void (*stop_rx)(struct uart_port *); void (*start_rx)(struct uart_port *); void (*enable_ms)(struct uart_port *); void (*break_ctl)(struct uart_port *, int ctl); int (*startup)(struct uart_port *); void (*shutdown)(struct uart_port *); void (*flush_buffer)(struct uart_port *); void (*set_termios)(struct uart_port *, struct ktermios *new, const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); void (*pm)(struct uart_port *, unsigned int state, unsigned int oldstate); const char *(*type)(struct uart_port *); void (*release_port)(struct uart_port *); int (*request_port)(struct uart_port *); void (*config_port)(struct uart_port *, int); int (*verify_port)(struct uart_port *, struct serial_struct *); int (*ioctl)(struct uart_port *, unsigned int, unsigned long); #ifdef CONFIG_CONSOLE_POLL int (*poll_init)(struct uart_port *); void (*poll_put_char)(struct uart_port *, unsigned char); int (*poll_get_char)(struct uart_port *); #endif }; #define NO_POLL_CHAR 0x00ff0000 #define UART_CONFIG_TYPE (1 << 0) #define UART_CONFIG_IRQ (1 << 1) struct uart_icount { __u32 cts; __u32 dsr; __u32 rng; __u32 dcd; __u32 rx; __u32 tx; __u32 frame; __u32 overrun; __u32 parity; __u32 brk; __u32 buf_overrun; }; typedef u64 __bitwise upf_t; typedef unsigned int __bitwise upstat_t; struct uart_port { spinlock_t lock; /* port lock */ unsigned long iobase; /* in/out[bwl] */ unsigned char __iomem *membase; /* read/write[bwl] */ unsigned int (*serial_in)(struct uart_port *, int); void (*serial_out)(struct uart_port *, int, int); void (*set_termios)(struct uart_port *, struct ktermios *new, const struct ktermios *old); void (*set_ldisc)(struct uart_port *, struct ktermios *); unsigned int (*get_mctrl)(struct uart_port *); void (*set_mctrl)(struct uart_port *, unsigned int); unsigned int (*get_divisor)(struct uart_port *, unsigned int baud, unsigned int *frac); void (*set_divisor)(struct uart_port *, unsigned int baud, unsigned int quot, unsigned int quot_frac); int (*startup)(struct uart_port *port); void (*shutdown)(struct uart_port *port); void (*throttle)(struct uart_port *port); void (*unthrottle)(struct uart_port *port); int (*handle_irq)(struct uart_port *); void (*pm)(struct uart_port *, unsigned int state, unsigned int old); void (*handle_break)(struct uart_port *); int (*rs485_config)(struct uart_port *, struct ktermios *termios, struct serial_rs485 *rs485); int (*iso7816_config)(struct uart_port *, struct serial_iso7816 *iso7816); unsigned int ctrl_id; /* optional serial core controller id */ unsigned int port_id; /* optional serial core port id */ unsigned int irq; /* irq number */ unsigned long irqflags; /* irq flags */ unsigned int uartclk; /* base uart clock */ unsigned int fifosize; /* tx fifo size */ unsigned char x_char; /* xon/xoff char */ unsigned char regshift; /* reg offset shift */ unsigned char iotype; /* io access style */ #define UPIO_UNKNOWN ((unsigned char)~0U) /* UCHAR_MAX */ #define UPIO_PORT (SERIAL_IO_PORT) /* 8b I/O port access */ #define UPIO_HUB6 (SERIAL_IO_HUB6) /* Hub6 ISA card */ #define UPIO_MEM (SERIAL_IO_MEM) /* driver-specific */ #define UPIO_MEM32 (SERIAL_IO_MEM32) /* 32b little endian */ #define UPIO_AU (SERIAL_IO_AU) /* Au1x00 and RT288x type IO */ #define UPIO_TSI (SERIAL_IO_TSI) /* Tsi108/109 type IO */ #define UPIO_MEM32BE (SERIAL_IO_MEM32BE) /* 32b big endian */ #define UPIO_MEM16 (SERIAL_IO_MEM16) /* 16b little endian */ unsigned char quirks; /* internal quirks */ /* internal quirks must be updated while holding port mutex */ #define UPQ_NO_TXEN_TEST BIT(0) unsigned int read_status_mask; /* driver specific */ unsigned int ignore_status_mask; /* driver specific */ struct uart_state *state; /* pointer to parent state */ struct uart_icount icount; /* statistics */ struct console *cons; /* struct console, if any */ /* flags must be updated while holding port mutex */ upf_t flags; /* * These flags must be equivalent to the flags defined in * include/uapi/linux/tty_flags.h which are the userspace definitions * assigned from the serial_struct flags in uart_set_info() * [for bit definitions in the UPF_CHANGE_MASK] * * Bits [0..ASYNCB_LAST_USER] are userspace defined/visible/changeable * The remaining bits are serial-core specific and not modifiable by * userspace. */ #ifdef CONFIG_HAS_IOPORT #define UPF_FOURPORT ((__force upf_t) ASYNC_FOURPORT /* 1 */ ) #else #define UPF_FOURPORT 0 #endif #define UPF_SAK ((__force upf_t) ASYNC_SAK /* 2 */ ) #define UPF_SPD_HI ((__force upf_t) ASYNC_SPD_HI /* 4 */ ) #define UPF_SPD_VHI ((__force upf_t) ASYNC_SPD_VHI /* 5 */ ) #define UPF_SPD_CUST ((__force upf_t) ASYNC_SPD_CUST /* 0x0030 */ ) #define UPF_SPD_WARP ((__force upf_t) ASYNC_SPD_WARP /* 0x1010 */ ) #define UPF_SPD_MASK ((__force upf_t) ASYNC_SPD_MASK /* 0x1030 */ ) #define UPF_SKIP_TEST ((__force upf_t) ASYNC_SKIP_TEST /* 6 */ ) #define UPF_AUTO_IRQ ((__force upf_t) ASYNC_AUTO_IRQ /* 7 */ ) #define UPF_HARDPPS_CD ((__force upf_t) ASYNC_HARDPPS_CD /* 11 */ ) #define UPF_SPD_SHI ((__force upf_t) ASYNC_SPD_SHI /* 12 */ ) #define UPF_LOW_LATENCY ((__force upf_t) ASYNC_LOW_LATENCY /* 13 */ ) #define UPF_BUGGY_UART ((__force upf_t) ASYNC_BUGGY_UART /* 14 */ ) #define UPF_MAGIC_MULTIPLIER ((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ ) #define UPF_NO_THRE_TEST ((__force upf_t) BIT_ULL(19)) /* Port has hardware-assisted h/w flow control */ #define UPF_AUTO_CTS ((__force upf_t) BIT_ULL(20)) #define UPF_AUTO_RTS ((__force upf_t) BIT_ULL(21)) #define UPF_HARD_FLOW ((__force upf_t) (UPF_AUTO_CTS | UPF_AUTO_RTS)) /* Port has hardware-assisted s/w flow control */ #define UPF_SOFT_FLOW ((__force upf_t) BIT_ULL(22)) #define UPF_CONS_FLOW ((__force upf_t) BIT_ULL(23)) #define UPF_SHARE_IRQ ((__force upf_t) BIT_ULL(24)) #define UPF_EXAR_EFR ((__force upf_t) BIT_ULL(25)) #define UPF_BUG_THRE ((__force upf_t) BIT_ULL(26)) /* The exact UART type is known and should not be probed. */ #define UPF_FIXED_TYPE ((__force upf_t) BIT_ULL(27)) #define UPF_BOOT_AUTOCONF ((__force upf_t) BIT_ULL(28)) #define UPF_FIXED_PORT ((__force upf_t) BIT_ULL(29)) #define UPF_DEAD ((__force upf_t) BIT_ULL(30)) #define UPF_IOREMAP ((__force upf_t) BIT_ULL(31)) #define UPF_FULL_PROBE ((__force upf_t) BIT_ULL(32)) #define __UPF_CHANGE_MASK 0x17fff #define UPF_CHANGE_MASK ((__force upf_t) __UPF_CHANGE_MASK) #define UPF_USR_MASK ((__force upf_t) (UPF_SPD_MASK|UPF_LOW_LATENCY)) #if __UPF_CHANGE_MASK > ASYNC_FLAGS #error Change mask not equivalent to userspace-visible bit defines #endif /* * Must hold termios_rwsem, port mutex and port lock to change; * can hold any one lock to read. */ upstat_t status; #define UPSTAT_CTS_ENABLE ((__force upstat_t) (1 << 0)) #define UPSTAT_DCD_ENABLE ((__force upstat_t) (1 << 1)) #define UPSTAT_AUTORTS ((__force upstat_t) (1 << 2)) #define UPSTAT_AUTOCTS ((__force upstat_t) (1 << 3)) #define UPSTAT_AUTOXOFF ((__force upstat_t) (1 << 4)) #define UPSTAT_SYNC_FIFO ((__force upstat_t) (1 << 5)) bool hw_stopped; /* sw-assisted CTS flow state */ unsigned int mctrl; /* current modem ctrl settings */ unsigned int frame_time; /* frame timing in ns */ unsigned int type; /* port type */ const struct uart_ops *ops; unsigned int custom_divisor; unsigned int line; /* port index */ unsigned int minor; resource_size_t mapbase; /* for ioremap */ resource_size_t mapsize; struct device *dev; /* serial port physical parent device */ struct serial_port_device *port_dev; /* serial core port device */ unsigned long sysrq; /* sysrq timeout */ u8 sysrq_ch; /* char for sysrq */ unsigned char has_sysrq; unsigned char sysrq_seq; /* index in sysrq_toggle_seq */ unsigned char hub6; /* this should be in the 8250 driver */ unsigned char suspended; unsigned char console_reinit; const char *name; /* port name */ struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ struct serial_rs485 rs485; struct serial_rs485 rs485_supported; /* Supported mask for serial_rs485 */ struct gpio_desc *rs485_term_gpio; /* enable RS485 bus termination */ struct gpio_desc *rs485_rx_during_tx_gpio; /* Output GPIO that sets the state of RS485 RX during TX */ struct serial_iso7816 iso7816; void *private_data; /* generic platform data pointer */ }; /* * Only for console->device_lock()/_unlock() callbacks and internal * port lock wrapper synchronization. */ static inline void __uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) { spin_lock_irqsave(&up->lock, *flags); } /* * Only for console->device_lock()/_unlock() callbacks and internal * port lock wrapper synchronization. */ static inline void __uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) { spin_unlock_irqrestore(&up->lock, flags); } /** * uart_port_set_cons - Safely set the @cons field for a uart * @up: The uart port to set * @con: The new console to set to * * This function must be used to set @up->cons. It uses the port lock to * synchronize with the port lock wrappers in order to ensure that the console * cannot change or disappear while another context is holding the port lock. */ static inline void uart_port_set_cons(struct uart_port *up, struct console *con) { unsigned long flags; __uart_port_lock_irqsave(up, &flags); up->cons = con; __uart_port_unlock_irqrestore(up, flags); } /* Only for internal port lock wrapper usage. */ static inline bool __uart_port_using_nbcon(struct uart_port *up) { lockdep_assert_held_once(&up->lock); if (likely(!uart_console(up))) return false; /* * @up->cons is only modified under the port lock. Therefore it is * certain that it cannot disappear here. * * @up->cons->node is added/removed from the console list under the * port lock. Therefore it is certain that the registration status * cannot change here, thus @up->cons->flags can be read directly. */ if (hlist_unhashed_lockless(&up->cons->node) || !(up->cons->flags & CON_NBCON) || !up->cons->write_atomic) { return false; } return true; } /* Only for internal port lock wrapper usage. */ static inline bool __uart_port_nbcon_try_acquire(struct uart_port *up) { if (!__uart_port_using_nbcon(up)) return true; return nbcon_device_try_acquire(up->cons); } /* Only for internal port lock wrapper usage. */ static inline void __uart_port_nbcon_acquire(struct uart_port *up) { if (!__uart_port_using_nbcon(up)) return; while (!nbcon_device_try_acquire(up->cons)) cpu_relax(); } /* Only for internal port lock wrapper usage. */ static inline void __uart_port_nbcon_release(struct uart_port *up) { if (!__uart_port_using_nbcon(up)) return; nbcon_device_release(up->cons); } /** * uart_port_lock - Lock the UART port * @up: Pointer to UART port structure */ static inline void uart_port_lock(struct uart_port *up) { spin_lock(&up->lock); __uart_port_nbcon_acquire(up); } /** * uart_port_lock_irq - Lock the UART port and disable interrupts * @up: Pointer to UART port structure */ static inline void uart_port_lock_irq(struct uart_port *up) { spin_lock_irq(&up->lock); __uart_port_nbcon_acquire(up); } /** * uart_port_lock_irqsave - Lock the UART port, save and disable interrupts * @up: Pointer to UART port structure * @flags: Pointer to interrupt flags storage */ static inline void uart_port_lock_irqsave(struct uart_port *up, unsigned long *flags) { spin_lock_irqsave(&up->lock, *flags); __uart_port_nbcon_acquire(up); } /** * uart_port_trylock - Try to lock the UART port * @up: Pointer to UART port structure * * Returns: True if lock was acquired, false otherwise */ static inline bool uart_port_trylock(struct uart_port *up) { if (!spin_trylock(&up->lock)) return false; if (!__uart_port_nbcon_try_acquire(up)) { spin_unlock(&up->lock); return false; } return true; } /** * uart_port_trylock_irqsave - Try to lock the UART port, save and disable interrupts * @up: Pointer to UART port structure * @flags: Pointer to interrupt flags storage * * Returns: True if lock was acquired, false otherwise */ static inline bool uart_port_trylock_irqsave(struct uart_port *up, unsigned long *flags) { if (!spin_trylock_irqsave(&up->lock, *flags)) return false; if (!__uart_port_nbcon_try_acquire(up)) { spin_unlock_irqrestore(&up->lock, *flags); return false; } return true; } /** * uart_port_unlock - Unlock the UART port * @up: Pointer to UART port structure */ static inline void uart_port_unlock(struct uart_port *up) { __uart_port_nbcon_release(up); spin_unlock(&up->lock); } /** * uart_port_unlock_irq - Unlock the UART port and re-enable interrupts * @up: Pointer to UART port structure */ static inline void uart_port_unlock_irq(struct uart_port *up) { __uart_port_nbcon_release(up); spin_unlock_irq(&up->lock); } /** * uart_port_unlock_irqrestore - Unlock the UART port, restore interrupts * @up: Pointer to UART port structure * @flags: The saved interrupt flags for restore */ static inline void uart_port_unlock_irqrestore(struct uart_port *up, unsigned long flags) { __uart_port_nbcon_release(up); spin_unlock_irqrestore(&up->lock, flags); } static inline int serial_port_in(struct uart_port *up, int offset) { return up->serial_in(up, offset); } static inline void serial_port_out(struct uart_port *up, int offset, int value) { up->serial_out(up, offset, value); } /** * enum uart_pm_state - power states for UARTs * @UART_PM_STATE_ON: UART is powered, up and operational * @UART_PM_STATE_OFF: UART is powered off * @UART_PM_STATE_UNDEFINED: sentinel */ enum uart_pm_state { UART_PM_STATE_ON = 0, UART_PM_STATE_OFF = 3, /* number taken from ACPI */ UART_PM_STATE_UNDEFINED, }; /* * This is the state information which is persistent across opens. */ struct uart_state { struct tty_port port; enum uart_pm_state pm_state; atomic_t refcount; wait_queue_head_t remove_wait; struct uart_port *uart_port; }; #define UART_XMIT_SIZE PAGE_SIZE /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 /** * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars * @up: uart_port structure describing the port * @chars: number of characters sent * * This function advances the tail of circular xmit buffer by the number of * @chars transmitted and handles accounting of transmitted bytes (into * @up's icount.tx). */ static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars) { struct tty_port *tport = &up->state->port; kfifo_skip_count(&tport->xmit_fifo, chars); up->icount.tx += chars; } static inline unsigned int uart_fifo_out(struct uart_port *up, unsigned char *buf, unsigned int chars) { struct tty_port *tport = &up->state->port; chars = kfifo_out(&tport->xmit_fifo, buf, chars); up->icount.tx += chars; return chars; } static inline unsigned int uart_fifo_get(struct uart_port *up, unsigned char *ch) { struct tty_port *tport = &up->state->port; unsigned int chars; chars = kfifo_get(&tport->xmit_fifo, ch); up->icount.tx += chars; return chars; } struct module; struct tty_driver; struct uart_driver { struct module *owner; const char *driver_name; const char *dev_name; int major; int minor; int nr; struct console *cons; /* * these are private; the low level driver should not * touch these; they should be initialised to NULL */ struct uart_state *state; struct tty_driver *tty_driver; }; void uart_write_wakeup(struct uart_port *port); /** * enum UART_TX_FLAGS -- flags for uart_port_tx_flags() * * @UART_TX_NOSTOP: don't call port->ops->stop_tx() on empty buffer */ enum UART_TX_FLAGS { UART_TX_NOSTOP = BIT(0), }; #define __uart_port_tx(uport, ch, flags, tx_ready, put_char, tx_done, \ for_test, for_post) \ ({ \ struct uart_port *__port = (uport); \ struct tty_port *__tport = &__port->state->port; \ unsigned int pending; \ \ for (; (for_test) && (tx_ready); (for_post), __port->icount.tx++) { \ if (__port->x_char) { \ (ch) = __port->x_char; \ (put_char); \ __port->x_char = 0; \ continue; \ } \ \ if (uart_tx_stopped(__port)) \ break; \ \ if (!kfifo_get(&__tport->xmit_fifo, &(ch))) \ break; \ \ (put_char); \ } \ \ (tx_done); \ \ pending = kfifo_len(&__tport->xmit_fifo); \ if (pending < WAKEUP_CHARS) { \ uart_write_wakeup(__port); \ \ if (!((flags) & UART_TX_NOSTOP) && pending == 0) \ __port->ops->stop_tx(__port); \ } \ \ pending; \ }) /** * uart_port_tx_limited -- transmit helper for uart_port with count limiting * @port: uart port * @ch: variable to store a character to be written to the HW * @count: a limit of characters to send * @tx_ready: can HW accept more data function * @put_char: function to write a character * @tx_done: function to call after the loop is done * * This helper transmits characters from the xmit buffer to the hardware using * @put_char(). It does so until @count characters are sent and while @tx_ready * evaluates to true. * * Returns: the number of characters in the xmit buffer when done. * * The expression in macro parameters shall be designed as follows: * * **tx_ready:** should evaluate to true if the HW can accept more data to * be sent. This parameter can be %true, which means the HW is always ready. * * **put_char:** shall write @ch to the device of @port. * * **tx_done:** when the write loop is done, this can perform arbitrary * action before potential invocation of ops->stop_tx() happens. If the * driver does not need to do anything, use e.g. ({}). * * For all of them, @port->lock is held, interrupts are locally disabled and * the expressions must not sleep. */ #define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ __uart_port_tx(port, ch, 0, tx_ready, put_char, tx_done, __count, \ __count--); \ }) /** * uart_port_tx_limited_flags -- transmit helper for uart_port with count limiting with flags * @port: uart port * @ch: variable to store a character to be written to the HW * @flags: %UART_TX_NOSTOP or similar * @count: a limit of characters to send * @tx_ready: can HW accept more data function * @put_char: function to write a character * @tx_done: function to call after the loop is done * * See uart_port_tx_limited() for more details. */ #define uart_port_tx_limited_flags(port, ch, flags, count, tx_ready, put_char, tx_done) ({ \ unsigned int __count = (count); \ __uart_port_tx(port, ch, flags, tx_ready, put_char, tx_done, __count, \ __count--); \ }) /** * uart_port_tx -- transmit helper for uart_port * @port: uart port * @ch: variable to store a character to be written to the HW * @tx_ready: can HW accept more data function * @put_char: function to write a character * * See uart_port_tx_limited() for more details. */ #define uart_port_tx(port, ch, tx_ready, put_char) \ __uart_port_tx(port, ch, 0, tx_ready, put_char, ({}), true, ({})) /** * uart_port_tx_flags -- transmit helper for uart_port with flags * @port: uart port * @ch: variable to store a character to be written to the HW * @flags: %UART_TX_NOSTOP or similar * @tx_ready: can HW accept more data function * @put_char: function to write a character * * See uart_port_tx_limited() for more details. */ #define uart_port_tx_flags(port, ch, flags, tx_ready, put_char) \ __uart_port_tx(port, ch, flags, tx_ready, put_char, ({}), true, ({})) /* * Baud rate helpers. */ void uart_update_timeout(struct uart_port *port, unsigned int cflag, unsigned int baud); unsigned int uart_get_baud_rate(struct uart_port *port, struct ktermios *termios, const struct ktermios *old, unsigned int min, unsigned int max); unsigned int uart_get_divisor(struct uart_port *port, unsigned int baud); /* * Calculates FIFO drain time. */ static inline unsigned long uart_fifo_timeout(struct uart_port *port) { u64 fifo_timeout = (u64)READ_ONCE(port->frame_time) * port->fifosize; /* Add .02 seconds of slop */ fifo_timeout += 20 * NSEC_PER_MSEC; return max(nsecs_to_jiffies(fifo_timeout), 1UL); } /* Base timer interval for polling */ static inline unsigned long uart_poll_timeout(struct uart_port *port) { unsigned long timeout = uart_fifo_timeout(port); return timeout > 6 ? (timeout / 2 - 2) : 1; } /* * Console helpers. */ struct earlycon_device { struct console *con; struct uart_port port; char options[32]; /* e.g., 115200n8 */ unsigned int baud; }; struct earlycon_id { char name[15]; char name_term; /* In case compiler didn't '\0' term name */ char compatible[128]; int (*setup)(struct earlycon_device *, const char *options); }; extern const struct earlycon_id __earlycon_table[]; extern const struct earlycon_id __earlycon_table_end[]; #if defined(CONFIG_SERIAL_EARLYCON) && !defined(MODULE) #define EARLYCON_USED_OR_UNUSED __used #else #define EARLYCON_USED_OR_UNUSED __maybe_unused #endif #define OF_EARLYCON_DECLARE(_name, compat, fn) \ static const struct earlycon_id __UNIQUE_ID(__earlycon_##_name) \ EARLYCON_USED_OR_UNUSED __section("__earlycon_table") \ __aligned(__alignof__(struct earlycon_id)) \ = { .name = __stringify(_name), \ .compatible = compat, \ .setup = fn } #define EARLYCON_DECLARE(_name, fn) OF_EARLYCON_DECLARE(_name, "", fn) int of_setup_earlycon(const struct earlycon_id *match, unsigned long node, const char *options); #ifdef CONFIG_SERIAL_EARLYCON extern bool earlycon_acpi_spcr_enable __initdata; int setup_earlycon(char *buf); #else static const bool earlycon_acpi_spcr_enable EARLYCON_USED_OR_UNUSED; static inline int setup_earlycon(char *buf) { return 0; } #endif /* Variant of uart_console_registered() when the console_list_lock is held. */ static inline bool uart_console_registered_locked(struct uart_port *port) { return uart_console(port) && console_is_registered_locked(port->cons); } static inline bool uart_console_registered(struct uart_port *port) { return uart_console(port) && console_is_registered(port->cons); } struct uart_port *uart_get_console(struct uart_port *ports, int nr, struct console *c); int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr, char **options); void uart_parse_options(const char *options, int *baud, int *parity, int *bits, int *flow); int uart_set_options(struct uart_port *port, struct console *co, int baud, int parity, int bits, int flow); struct tty_driver *uart_console_device(struct console *co, int *index); void uart_console_write(struct uart_port *port, const char *s, unsigned int count, void (*putchar)(struct uart_port *, unsigned char)); /* * Port/driver registration/removal */ int uart_register_driver(struct uart_driver *uart); void uart_unregister_driver(struct uart_driver *uart); int uart_add_one_port(struct uart_driver *reg, struct uart_port *port); void uart_remove_one_port(struct uart_driver *reg, struct uart_port *port); int uart_read_port_properties(struct uart_port *port); int uart_read_and_validate_port_properties(struct uart_port *port); bool uart_match_port(const struct uart_port *port1, const struct uart_port *port2); /* * Power Management */ int uart_suspend_port(struct uart_driver *reg, struct uart_port *port); int uart_resume_port(struct uart_driver *reg, struct uart_port *port); static inline int uart_tx_stopped(struct uart_port *port) { struct tty_struct *tty = port->state->port.tty; if ((tty && tty->flow.stopped) || port->hw_stopped) return 1; return 0; } static inline bool uart_cts_enabled(struct uart_port *uport) { return !!(uport->status & UPSTAT_CTS_ENABLE); } static inline bool uart_softcts_mode(struct uart_port *uport) { upstat_t mask = UPSTAT_CTS_ENABLE | UPSTAT_AUTOCTS; return ((uport->status & mask) == UPSTAT_CTS_ENABLE); } /* * The following are helper functions for the low level drivers. */ void uart_handle_dcd_change(struct uart_port *uport, bool active); void uart_handle_cts_change(struct uart_port *uport, bool active); void uart_insert_char(struct uart_port *port, unsigned int status, unsigned int overrun, u8 ch, u8 flag); void uart_xchar_out(struct uart_port *uport, int offset); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL #define SYSRQ_TIMEOUT (HZ * 5) bool uart_try_toggle_sysrq(struct uart_port *port, u8 ch); static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch) { if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { if (sysrq_mask()) { handle_sysrq(ch); port->sysrq = 0; return 1; } if (uart_try_toggle_sysrq(port, ch)) return 1; } port->sysrq = 0; return 0; } static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) { if (!port->sysrq) return 0; if (ch && time_before(jiffies, port->sysrq)) { if (sysrq_mask()) { port->sysrq_ch = ch; port->sysrq = 0; return 1; } if (uart_try_toggle_sysrq(port, ch)) return 1; } port->sysrq = 0; return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { u8 sysrq_ch; if (!port->has_sysrq) { uart_port_unlock(port); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; uart_port_unlock(port); if (sysrq_ch) handle_sysrq(sysrq_ch); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { u8 sysrq_ch; if (!port->has_sysrq) { uart_port_unlock_irqrestore(port, flags); return; } sysrq_ch = port->sysrq_ch; port->sysrq_ch = 0; uart_port_unlock_irqrestore(port, flags); if (sysrq_ch) handle_sysrq(sysrq_ch); } #else /* CONFIG_MAGIC_SYSRQ_SERIAL */ static inline int uart_handle_sysrq_char(struct uart_port *port, u8 ch) { return 0; } static inline int uart_prepare_sysrq_char(struct uart_port *port, u8 ch) { return 0; } static inline void uart_unlock_and_check_sysrq(struct uart_port *port) { uart_port_unlock(port); } static inline void uart_unlock_and_check_sysrq_irqrestore(struct uart_port *port, unsigned long flags) { uart_port_unlock_irqrestore(port, flags); } #endif /* CONFIG_MAGIC_SYSRQ_SERIAL */ /* * We do the SysRQ and SAK checking like this... */ static inline int uart_handle_break(struct uart_port *port) { struct uart_state *state = port->state; if (port->handle_break) port->handle_break(port); #ifdef CONFIG_MAGIC_SYSRQ_SERIAL if (port->has_sysrq && uart_console(port)) { if (!port->sysrq) { port->sysrq = jiffies + SYSRQ_TIMEOUT; return 1; } port->sysrq = 0; } #endif if (port->flags & UPF_SAK) do_SAK(state->port.tty); return 0; } /* * UART_ENABLE_MS - determine if port should enable modem status irqs */ #define UART_ENABLE_MS(port,cflag) ((port)->flags & UPF_HARDPPS_CD || \ (cflag) & CRTSCTS || \ !((cflag) & CLOCAL)) int uart_get_rs485_mode(struct uart_port *port); #endif /* LINUX_SERIAL_CORE_H */ |
249 399 996 247 999 1 985 13 13 536 1 246 248 249 241 593 385 384 2 385 385 386 368 2 366 250 325 324 250 249 243 243 382 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 | // SPDX-License-Identifier: GPL-2.0 /* * linux/fs/char_dev.c * * Copyright (C) 1991, 1992 Linus Torvalds */ #include <linux/init.h> #include <linux/fs.h> #include <linux/kdev_t.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/major.h> #include <linux/errno.h> #include <linux/module.h> #include <linux/seq_file.h> #include <linux/kobject.h> #include <linux/kobj_map.h> #include <linux/cdev.h> #include <linux/mutex.h> #include <linux/backing-dev.h> #include <linux/tty.h> #include "internal.h" static struct kobj_map *cdev_map __ro_after_init; static DEFINE_MUTEX(chrdevs_lock); #define CHRDEV_MAJOR_HASH_SIZE 255 static struct char_device_struct { struct char_device_struct *next; unsigned int major; unsigned int baseminor; int minorct; char name[64]; struct cdev *cdev; /* will die */ } *chrdevs[CHRDEV_MAJOR_HASH_SIZE]; /* index in the above */ static inline int major_to_index(unsigned major) { return major % CHRDEV_MAJOR_HASH_SIZE; } #ifdef CONFIG_PROC_FS void chrdev_show(struct seq_file *f, off_t offset) { struct char_device_struct *cd; mutex_lock(&chrdevs_lock); for (cd = chrdevs[major_to_index(offset)]; cd; cd = cd->next) { if (cd->major == offset) seq_printf(f, "%3d %s\n", cd->major, cd->name); } mutex_unlock(&chrdevs_lock); } #endif /* CONFIG_PROC_FS */ static int find_dynamic_major(void) { int i; struct char_device_struct *cd; for (i = ARRAY_SIZE(chrdevs)-1; i >= CHRDEV_MAJOR_DYN_END; i--) { if (chrdevs[i] == NULL) return i; } for (i = CHRDEV_MAJOR_DYN_EXT_START; i >= CHRDEV_MAJOR_DYN_EXT_END; i--) { for (cd = chrdevs[major_to_index(i)]; cd; cd = cd->next) if (cd->major == i) break; if (cd == NULL) return i; } return -EBUSY; } /* * Register a single major with a specified minor range. * * If major == 0 this function will dynamically allocate an unused major. * If major > 0 this function will attempt to reserve the range of minors * with given major. * */ static struct char_device_struct * __register_chrdev_region(unsigned int major, unsigned int baseminor, int minorct, const char *name) { struct char_device_struct *cd, *curr, *prev = NULL; int ret; int i; if (major >= CHRDEV_MAJOR_MAX) { pr_err("CHRDEV \"%s\" major requested (%u) is greater than the maximum (%u)\n", name, major, CHRDEV_MAJOR_MAX-1); return ERR_PTR(-EINVAL); } if (minorct > MINORMASK + 1 - baseminor) { pr_err("CHRDEV \"%s\" minor range requested (%u-%u) is out of range of maximum range (%u-%u) for a single major\n", name, baseminor, baseminor + minorct - 1, 0, MINORMASK); return ERR_PTR(-EINVAL); } cd = kzalloc(sizeof(struct char_device_struct), GFP_KERNEL); if (cd == NULL) return ERR_PTR(-ENOMEM); mutex_lock(&chrdevs_lock); if (major == 0) { ret = find_dynamic_major(); if (ret < 0) { pr_err("CHRDEV \"%s\" dynamic allocation region is full\n", name); goto out; } major = ret; } ret = -EBUSY; i = major_to_index(major); for (curr = chrdevs[i]; curr; prev = curr, curr = curr->next) { if (curr->major < major) continue; if (curr->major > major) break; if (curr->baseminor + curr->minorct <= baseminor) continue; if (curr->baseminor >= baseminor + minorct) break; goto out; } cd->major = major; cd->baseminor = baseminor; cd->minorct = minorct; strscpy(cd->name, name, sizeof(cd->name)); if (!prev) { cd->next = curr; chrdevs[i] = cd; } else { cd->next = prev->next; prev->next = cd; } mutex_unlock(&chrdevs_lock); return cd; out: mutex_unlock(&chrdevs_lock); kfree(cd); return ERR_PTR(ret); } static struct char_device_struct * __unregister_chrdev_region(unsigned major, unsigned baseminor, int minorct) { struct char_device_struct *cd = NULL, **cp; int i = major_to_index(major); mutex_lock(&chrdevs_lock); for (cp = &chrdevs[i]; *cp; cp = &(*cp)->next) if ((*cp)->major == major && (*cp)->baseminor == baseminor && (*cp)->minorct == minorct) break; if (*cp) { cd = *cp; *cp = cd->next; } mutex_unlock(&chrdevs_lock); return cd; } /** * register_chrdev_region() - register a range of device numbers * @from: the first in the desired range of device numbers; must include * the major number. * @count: the number of consecutive device numbers required * @name: the name of the device or driver. * * Return value is zero on success, a negative error code on failure. */ int register_chrdev_region(dev_t from, unsigned count, const char *name) { struct char_device_struct *cd; dev_t to = from + count; dev_t n, next; for (n = from; n < to; n = next) { next = MKDEV(MAJOR(n)+1, 0); if (next > to) next = to; cd = __register_chrdev_region(MAJOR(n), MINOR(n), next - n, name); if (IS_ERR(cd)) goto fail; } return 0; fail: to = n; for (n = from; n < to; n = next) { next = MKDEV(MAJOR(n)+1, 0); kfree(__unregister_chrdev_region(MAJOR(n), MINOR(n), next - n)); } return PTR_ERR(cd); } /** * alloc_chrdev_region() - register a range of char device numbers * @dev: output parameter for first assigned number * @baseminor: first of the requested range of minor numbers * @count: the number of minor numbers required * @name: the name of the associated device or driver * * Allocates a range of char device numbers. The major number will be * chosen dynamically, and returned (along with the first minor number) * in @dev. Returns zero or a negative error code. */ int alloc_chrdev_region(dev_t *dev, unsigned baseminor, unsigned count, const char *name) { struct char_device_struct *cd; cd = __register_chrdev_region(0, baseminor, count, name); if (IS_ERR(cd)) return PTR_ERR(cd); *dev = MKDEV(cd->major, cd->baseminor); return 0; } /** * __register_chrdev() - create and register a cdev occupying a range of minors * @major: major device number or 0 for dynamic allocation * @baseminor: first of the requested range of minor numbers * @count: the number of minor numbers required * @name: name of this range of devices * @fops: file operations associated with this devices * * If @major == 0 this functions will dynamically allocate a major and return * its number. * * If @major > 0 this function will attempt to reserve a device with the given * major number and will return zero on success. * * Returns a -ve errno on failure. * * The name of this device has nothing to do with the name of the device in * /dev. It only helps to keep track of the different owners of devices. If * your module name has only one type of devices it's ok to use e.g. the name * of the module here. */ int __register_chrdev(unsigned int major, unsigned int baseminor, unsigned int count, const char *name, const struct file_operations *fops) { struct char_device_struct *cd; struct cdev *cdev; int err = -ENOMEM; cd = __register_chrdev_region(major, baseminor, count, name); if (IS_ERR(cd)) return PTR_ERR(cd); cdev = cdev_alloc(); if (!cdev) goto out2; cdev->owner = fops->owner; cdev->ops = fops; kobject_set_name(&cdev->kobj, "%s", name); err = cdev_add(cdev, MKDEV(cd->major, baseminor), count); if (err) goto out; cd->cdev = cdev; return major ? 0 : cd->major; out: kobject_put(&cdev->kobj); out2: kfree(__unregister_chrdev_region(cd->major, baseminor, count)); return err; } /** * unregister_chrdev_region() - unregister a range of device numbers * @from: the first in the range of numbers to unregister * @count: the number of device numbers to unregister * * This function will unregister a range of @count device numbers, * starting with @from. The caller should normally be the one who * allocated those numbers in the first place... */ void unregister_chrdev_region(dev_t from, unsigned count) { dev_t to = from + count; dev_t n, next; for (n = from; n < to; n = next) { next = MKDEV(MAJOR(n)+1, 0); if (next > to) next = to; kfree(__unregister_chrdev_region(MAJOR(n), MINOR(n), next - n)); } } /** * __unregister_chrdev - unregister and destroy a cdev * @major: major device number * @baseminor: first of the range of minor numbers * @count: the number of minor numbers this cdev is occupying * @name: name of this range of devices * * Unregister and destroy the cdev occupying the region described by * @major, @baseminor and @count. This function undoes what * __register_chrdev() did. */ void __unregister_chrdev(unsigned int major, unsigned int baseminor, unsigned int count, const char *name) { struct char_device_struct *cd; cd = __unregister_chrdev_region(major, baseminor, count); if (cd && cd->cdev) cdev_del(cd->cdev); kfree(cd); } static DEFINE_SPINLOCK(cdev_lock); static struct kobject *cdev_get(struct cdev *p) { struct module *owner = p->owner; struct kobject *kobj; if (!try_module_get(owner)) return NULL; kobj = kobject_get_unless_zero(&p->kobj); if (!kobj) module_put(owner); return kobj; } void cdev_put(struct cdev *p) { if (p) { struct module *owner = p->owner; kobject_put(&p->kobj); module_put(owner); } } /* * Called every time a character special file is opened */ static int chrdev_open(struct inode *inode, struct file *filp) { const struct file_operations *fops; struct cdev *p; struct cdev *new = NULL; int ret = 0; spin_lock(&cdev_lock); p = inode->i_cdev; if (!p) { struct kobject *kobj; int idx; spin_unlock(&cdev_lock); kobj = kobj_lookup(cdev_map, inode->i_rdev, &idx); if (!kobj) return -ENXIO; new = container_of(kobj, struct cdev, kobj); spin_lock(&cdev_lock); /* Check i_cdev again in case somebody beat us to it while we dropped the lock. */ p = inode->i_cdev; if (!p) { inode->i_cdev = p = new; list_add(&inode->i_devices, &p->list); new = NULL; } else if (!cdev_get(p)) ret = -ENXIO; } else if (!cdev_get(p)) ret = -ENXIO; spin_unlock(&cdev_lock); cdev_put(new); if (ret) return ret; ret = -ENXIO; fops = fops_get(p->ops); if (!fops) goto out_cdev_put; replace_fops(filp, fops); if (filp->f_op->open) { ret = filp->f_op->open(inode, filp); if (ret) goto out_cdev_put; } return 0; out_cdev_put: cdev_put(p); return ret; } void cd_forget(struct inode *inode) { spin_lock(&cdev_lock); list_del_init(&inode->i_devices); inode->i_cdev = NULL; inode->i_mapping = &inode->i_data; spin_unlock(&cdev_lock); } static void cdev_purge(struct cdev *cdev) { spin_lock(&cdev_lock); while (!list_empty(&cdev->list)) { struct inode *inode; inode = container_of(cdev->list.next, struct inode, i_devices); list_del_init(&inode->i_devices); inode->i_cdev = NULL; } spin_unlock(&cdev_lock); } /* * Dummy default file-operations: the only thing this does * is contain the open that then fills in the correct operations * depending on the special file... */ const struct file_operations def_chr_fops = { .open = chrdev_open, .llseek = noop_llseek, }; static struct kobject *exact_match(dev_t dev, int *part, void *data) { struct cdev *p = data; return &p->kobj; } static int exact_lock(dev_t dev, void *data) { struct cdev *p = data; return cdev_get(p) ? 0 : -1; } /** * cdev_add() - add a char device to the system * @p: the cdev structure for the device * @dev: the first device number for which this device is responsible * @count: the number of consecutive minor numbers corresponding to this * device * * cdev_add() adds the device represented by @p to the system, making it * live immediately. A negative error code is returned on failure. */ int cdev_add(struct cdev *p, dev_t dev, unsigned count) { int error; p->dev = dev; p->count = count; if (WARN_ON(dev == WHITEOUT_DEV)) { error = -EBUSY; goto err; } error = kobj_map(cdev_map, dev, count, NULL, exact_match, exact_lock, p); if (error) goto err; kobject_get(p->kobj.parent); return 0; err: kfree_const(p->kobj.name); p->kobj.name = NULL; return error; } /** * cdev_set_parent() - set the parent kobject for a char device * @p: the cdev structure * @kobj: the kobject to take a reference to * * cdev_set_parent() sets a parent kobject which will be referenced * appropriately so the parent is not freed before the cdev. This * should be called before cdev_add. */ void cdev_set_parent(struct cdev *p, struct kobject *kobj) { WARN_ON(!kobj->state_initialized); p->kobj.parent = kobj; } /** * cdev_device_add() - add a char device and it's corresponding * struct device, linkink * @dev: the device structure * @cdev: the cdev structure * * cdev_device_add() adds the char device represented by @cdev to the system, * just as cdev_add does. It then adds @dev to the system using device_add * The dev_t for the char device will be taken from the struct device which * needs to be initialized first. This helper function correctly takes a * reference to the parent device so the parent will not get released until * all references to the cdev are released. * * This helper uses dev->devt for the device number. If it is not set * it will not add the cdev and it will be equivalent to device_add. * * This function should be used whenever the struct cdev and the * struct device are members of the same structure whose lifetime is * managed by the struct device. * * NOTE: Callers must assume that userspace was able to open the cdev and * can call cdev fops callbacks at any time, even if this function fails. */ int cdev_device_add(struct cdev *cdev, struct device *dev) { int rc = 0; if (dev->devt) { cdev_set_parent(cdev, &dev->kobj); rc = cdev_add(cdev, dev->devt, 1); if (rc) return rc; } rc = device_add(dev); if (rc && dev->devt) cdev_del(cdev); return rc; } /** * cdev_device_del() - inverse of cdev_device_add * @cdev: the cdev structure * @dev: the device structure * * cdev_device_del() is a helper function to call cdev_del and device_del. * It should be used whenever cdev_device_add is used. * * If dev->devt is not set it will not remove the cdev and will be equivalent * to device_del. * * NOTE: This guarantees that associated sysfs callbacks are not running * or runnable, however any cdevs already open will remain and their fops * will still be callable even after this function returns. */ void cdev_device_del(struct cdev *cdev, struct device *dev) { device_del(dev); if (dev->devt) cdev_del(cdev); } static void cdev_unmap(dev_t dev, unsigned count) { kobj_unmap(cdev_map, dev, count); } /** * cdev_del() - remove a cdev from the system * @p: the cdev structure to be removed * * cdev_del() removes @p from the system, possibly freeing the structure * itself. * * NOTE: This guarantees that cdev device will no longer be able to be * opened, however any cdevs already open will remain and their fops will * still be callable even after cdev_del returns. */ void cdev_del(struct cdev *p) { cdev_unmap(p->dev, p->count); kobject_put(&p->kobj); } static void cdev_default_release(struct kobject *kobj) { struct cdev *p = container_of(kobj, struct cdev, kobj); struct kobject *parent = kobj->parent; cdev_purge(p); kobject_put(parent); } static void cdev_dynamic_release(struct kobject *kobj) { struct cdev *p = container_of(kobj, struct cdev, kobj); struct kobject *parent = kobj->parent; cdev_purge(p); kfree(p); kobject_put(parent); } static struct kobj_type ktype_cdev_default = { .release = cdev_default_release, }; static struct kobj_type ktype_cdev_dynamic = { .release = cdev_dynamic_release, }; /** * cdev_alloc() - allocate a cdev structure * * Allocates and returns a cdev structure, or NULL on failure. */ struct cdev *cdev_alloc(void) { struct cdev *p = kzalloc(sizeof(struct cdev), GFP_KERNEL); if (p) { INIT_LIST_HEAD(&p->list); kobject_init(&p->kobj, &ktype_cdev_dynamic); } return p; } /** * cdev_init() - initialize a cdev structure * @cdev: the structure to initialize * @fops: the file_operations for this device * * Initializes @cdev, remembering @fops, making it ready to add to the * system with cdev_add(). */ void cdev_init(struct cdev *cdev, const struct file_operations *fops) { memset(cdev, 0, sizeof *cdev); INIT_LIST_HEAD(&cdev->list); kobject_init(&cdev->kobj, &ktype_cdev_default); cdev->ops = fops; } static struct kobject *base_probe(dev_t dev, int *part, void *data) { if (request_module("char-major-%d-%d", MAJOR(dev), MINOR(dev)) > 0) /* Make old-style 2.4 aliases work */ request_module("char-major-%d", MAJOR(dev)); return NULL; } void __init chrdev_init(void) { cdev_map = kobj_map_init(base_probe, &chrdevs_lock); } /* Let modules do char dev stuff */ EXPORT_SYMBOL(register_chrdev_region); EXPORT_SYMBOL(unregister_chrdev_region); EXPORT_SYMBOL(alloc_chrdev_region); EXPORT_SYMBOL(cdev_init); EXPORT_SYMBOL(cdev_alloc); EXPORT_SYMBOL(cdev_del); EXPORT_SYMBOL(cdev_add); EXPORT_SYMBOL(cdev_set_parent); EXPORT_SYMBOL(cdev_device_add); EXPORT_SYMBOL(cdev_device_del); EXPORT_SYMBOL(__register_chrdev); EXPORT_SYMBOL(__unregister_chrdev); |
3 3 3 3 3 2 2 2 2 1 1 1 3 3 3 3 3 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 | // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/cls_basic.c Basic Packet Classifier. * * Authors: Thomas Graf <tgraf@suug.ch> */ #include <linux/module.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/idr.h> #include <linux/percpu.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <net/tc_wrapper.h> struct basic_head { struct list_head flist; struct idr handle_idr; struct rcu_head rcu; }; struct basic_filter { u32 handle; struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_result res; struct tcf_proto *tp; struct list_head link; struct tc_basic_pcnt __percpu *pf; struct rcu_work rwork; }; TC_INDIRECT_SCOPE int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) { int r; struct basic_head *head = rcu_dereference_bh(tp->root); struct basic_filter *f; list_for_each_entry_rcu(f, &head->flist, link) { __this_cpu_inc(f->pf->rcnt); if (!tcf_em_tree_match(skb, &f->ematches, NULL)) continue; __this_cpu_inc(f->pf->rhit); *res = f->res; r = tcf_exts_exec(skb, &f->exts, res); if (r < 0) continue; return r; } return -1; } static void *basic_get(struct tcf_proto *tp, u32 handle) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { if (f->handle == handle) { return f; } } return NULL; } static int basic_init(struct tcf_proto *tp) { struct basic_head *head; head = kzalloc(sizeof(*head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->flist); idr_init(&head->handle_idr); rcu_assign_pointer(tp->root, head); return 0; } static void __basic_delete_filter(struct basic_filter *f) { tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); tcf_exts_put_net(&f->exts); free_percpu(f->pf); kfree(f); } static void basic_delete_filter_work(struct work_struct *work) { struct basic_filter *f = container_of(to_rcu_work(work), struct basic_filter, rwork); rtnl_lock(); __basic_delete_filter(f); rtnl_unlock(); } static void basic_destroy(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f, *n; list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); idr_remove(&head->handle_idr, f->handle); if (tcf_exts_get_net(&f->exts)) tcf_queue_work(&f->rwork, basic_delete_filter_work); else __basic_delete_filter(f); } idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); } static int basic_delete(struct tcf_proto *tp, void *arg, bool *last, bool rtnl_held, struct netlink_ext_ack *extack) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f = arg; list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); idr_remove(&head->handle_idr, f->handle); tcf_exts_get_net(&f->exts); tcf_queue_work(&f->rwork, basic_delete_filter_work); *last = list_empty(&head->flist); return 0; } static const struct nla_policy basic_policy[TCA_BASIC_MAX + 1] = { [TCA_BASIC_CLASSID] = { .type = NLA_U32 }, [TCA_BASIC_EMATCHES] = { .type = NLA_NESTED }, }; static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct basic_filter *f, unsigned long base, struct nlattr **tb, struct nlattr *est, u32 flags, struct netlink_ext_ack *extack) { int err; err = tcf_exts_validate(net, tp, tb, est, &f->exts, flags, extack); if (err < 0) return err; err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &f->ematches); if (err < 0) return err; if (tb[TCA_BASIC_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]); tcf_bind_filter(tp, &f->res, base); } f->tp = tp; return 0; } static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, void **arg, u32 flags, struct netlink_ext_ack *extack) { int err; struct basic_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_BASIC_MAX + 1]; struct basic_filter *fold = (struct basic_filter *) *arg; struct basic_filter *fnew; if (tca[TCA_OPTIONS] == NULL) return -EINVAL; err = nla_parse_nested_deprecated(tb, TCA_BASIC_MAX, tca[TCA_OPTIONS], basic_policy, NULL); if (err < 0) return err; if (fold != NULL) { if (handle && fold->handle != handle) return -EINVAL; } fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); if (!fnew) return -ENOBUFS; err = tcf_exts_init(&fnew->exts, net, TCA_BASIC_ACT, TCA_BASIC_POLICE); if (err < 0) goto errout; if (!handle) { handle = 1; err = idr_alloc_u32(&head->handle_idr, fnew, &handle, INT_MAX, GFP_KERNEL); } else if (!fold) { err = idr_alloc_u32(&head->handle_idr, fnew, &handle, handle, GFP_KERNEL); } if (err) goto errout; fnew->handle = handle; fnew->pf = alloc_percpu(struct tc_basic_pcnt); if (!fnew->pf) { err = -ENOMEM; goto errout; } err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], flags, extack); if (err < 0) { if (!fold) idr_remove(&head->handle_idr, fnew->handle); goto errout; } *arg = fnew; if (fold) { idr_replace(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); tcf_exts_get_net(&fold->exts); tcf_queue_work(&fold->rwork, basic_delete_filter_work); } else { list_add_rcu(&fnew->link, &head->flist); } return 0; errout: free_percpu(fnew->pf); tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg, bool rtnl_held) { struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { if (!tc_cls_stats_dump(tp, arg, f)) break; } } static void basic_bind_class(void *fh, u32 classid, unsigned long cl, void *q, unsigned long base) { struct basic_filter *f = fh; tc_cls_bind_class(classid, cl, q, &f->res, base); } static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t, bool rtnl_held) { struct tc_basic_pcnt gpf = {}; struct basic_filter *f = fh; struct nlattr *nest; int cpu; if (f == NULL) return skb->len; t->tcm_handle = f->handle; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; if (f->res.classid && nla_put_u32(skb, TCA_BASIC_CLASSID, f->res.classid)) goto nla_put_failure; for_each_possible_cpu(cpu) { struct tc_basic_pcnt *pf = per_cpu_ptr(f->pf, cpu); gpf.rcnt += pf->rcnt; gpf.rhit += pf->rhit; } if (nla_put_64bit(skb, TCA_BASIC_PCNT, sizeof(struct tc_basic_pcnt), &gpf, TCA_BASIC_PAD)) goto nla_put_failure; if (tcf_exts_dump(skb, &f->exts) < 0 || tcf_em_tree_dump(skb, &f->ematches, TCA_BASIC_EMATCHES) < 0) goto nla_put_failure; nla_nest_end(skb, nest); if (tcf_exts_dump_stats(skb, &f->exts) < 0) goto nla_put_failure; return skb->len; nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static struct tcf_proto_ops cls_basic_ops __read_mostly = { .kind = "basic", .classify = basic_classify, .init = basic_init, .destroy = basic_destroy, .get = basic_get, .change = basic_change, .delete = basic_delete, .walk = basic_walk, .dump = basic_dump, .bind_class = basic_bind_class, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_CLS("basic"); static int __init init_basic(void) { return register_tcf_proto_ops(&cls_basic_ops); } static void __exit exit_basic(void) { unregister_tcf_proto_ops(&cls_basic_ops); } module_init(init_basic) module_exit(exit_basic) MODULE_DESCRIPTION("TC basic classifier"); MODULE_LICENSE("GPL"); |
1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 | /* * Compressed rom filesystem for Linux. * * Copyright (C) 1999 Linus Torvalds. * * This file is released under the GPL. */ /* * These are the VFS interfaces to the compressed rom filesystem. * The actual compression is based on zlib, see the other files. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/pagemap.h> #include <linux/pfn_t.h> #include <linux/ramfs.h> #include <linux/init.h> #include <linux/string.h> #include <linux/blkdev.h> #include <linux/mtd/mtd.h> #include <linux/mtd/super.h> #include <linux/fs_context.h> #include <linux/slab.h> #include <linux/vfs.h> #include <linux/mutex.h> #include <uapi/linux/cramfs_fs.h> #include <linux/uaccess.h> #include "internal.h" /* * cramfs super-block data in memory */ struct cramfs_sb_info { unsigned long magic; unsigned long size; unsigned long blocks; unsigned long files; unsigned long flags; void *linear_virt_addr; resource_size_t linear_phys_addr; size_t mtd_point_size; }; static inline struct cramfs_sb_info *CRAMFS_SB(struct super_block *sb) { return sb->s_fs_info; } static const struct super_operations cramfs_ops; static const struct inode_operations cramfs_dir_inode_operations; static const struct file_operations cramfs_directory_operations; static const struct file_operations cramfs_physmem_fops; static const struct address_space_operations cramfs_aops; static DEFINE_MUTEX(read_mutex); /* These macros may change in future, to provide better st_ino semantics. */ #define OFFSET(x) ((x)->i_ino) static unsigned long cramino(const struct cramfs_inode *cino, unsigned int offset) { if (!cino->offset) return offset + 1; if (!cino->size) return offset + 1; /* * The file mode test fixes buggy mkcramfs implementations where * cramfs_inode->offset is set to a non zero value for entries * which did not contain data, like devices node and fifos. */ switch (cino->mode & S_IFMT) { case S_IFREG: case S_IFDIR: case S_IFLNK: return cino->offset << 2; default: break; } return offset + 1; } static struct inode *get_cramfs_inode(struct super_block *sb, const struct cramfs_inode *cramfs_inode, unsigned int offset) { struct inode *inode; static struct timespec64 zerotime; inode = iget_locked(sb, cramino(cramfs_inode, offset)); if (!inode) return ERR_PTR(-ENOMEM); if (!(inode->i_state & I_NEW)) return inode; switch (cramfs_inode->mode & S_IFMT) { case S_IFREG: inode->i_fop = &generic_ro_fops; inode->i_data.a_ops = &cramfs_aops; if (IS_ENABLED(CONFIG_CRAMFS_MTD) && CRAMFS_SB(sb)->flags & CRAMFS_FLAG_EXT_BLOCK_POINTERS && CRAMFS_SB(sb)->linear_phys_addr) inode->i_fop = &cramfs_physmem_fops; break; case S_IFDIR: inode->i_op = &cramfs_dir_inode_operations; inode->i_fop = &cramfs_directory_operations; break; case S_IFLNK: inode->i_op = &page_symlink_inode_operations; inode_nohighmem(inode); inode->i_data.a_ops = &cramfs_aops; break; default: init_special_inode(inode, cramfs_inode->mode, old_decode_dev(cramfs_inode->size)); } inode->i_mode = cramfs_inode->mode; i_uid_write(inode, cramfs_inode->uid); i_gid_write(inode, cramfs_inode->gid); /* if the lower 2 bits are zero, the inode contains data */ if (!(inode->i_ino & 3)) { inode->i_size = cramfs_inode->size; inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; } /* Struct copy intentional */ inode_set_mtime_to_ts(inode, inode_set_atime_to_ts(inode, inode_set_ctime_to_ts(inode, zerotime))); /* inode->i_nlink is left 1 - arguably wrong for directories, but it's the best we can do without reading the directory contents. 1 yields the right result in GNU find, even without -noleaf option. */ unlock_new_inode(inode); return inode; } /* * We have our own block cache: don't fill up the buffer cache * with the rom-image, because the way the filesystem is set * up the accesses should be fairly regular and cached in the * page cache and dentry tree anyway.. * * This also acts as a way to guarantee contiguous areas of up to * BLKS_PER_BUF*PAGE_SIZE, so that the caller doesn't need to * worry about end-of-buffer issues even when decompressing a full * page cache. * * Note: This is all optimized away at compile time when * CONFIG_CRAMFS_BLOCKDEV=n. */ #define READ_BUFFERS (2) /* NEXT_BUFFER(): Loop over [0..(READ_BUFFERS-1)]. */ #define NEXT_BUFFER(_ix) ((_ix) ^ 1) /* * BLKS_PER_BUF_SHIFT should be at least 2 to allow for "compressed" * data that takes up more space than the original and with unlucky * alignment. */ #define BLKS_PER_BUF_SHIFT (2) #define BLKS_PER_BUF (1 << BLKS_PER_BUF_SHIFT) #define BUFFER_SIZE (BLKS_PER_BUF*PAGE_SIZE) static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE]; static unsigned buffer_blocknr[READ_BUFFERS]; static struct super_block *buffer_dev[READ_BUFFERS]; static int next_buffer; /* * Populate our block cache and return a pointer to it. */ static void *cramfs_blkdev_read(struct super_block *sb, unsigned int offset, unsigned int len) { struct address_space *mapping = sb->s_bdev->bd_mapping; struct file_ra_state ra = {}; struct page *pages[BLKS_PER_BUF]; unsigned i, blocknr, buffer; unsigned long devsize; char *data; if (!len) return NULL; blocknr = offset >> PAGE_SHIFT; offset &= PAGE_SIZE - 1; /* Check if an existing buffer already has the data.. */ for (i = 0; i < READ_BUFFERS; i++) { unsigned int blk_offset; if (buffer_dev[i] != sb) continue; if (blocknr < buffer_blocknr[i]) continue; blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_SHIFT; blk_offset += offset; if (blk_offset > BUFFER_SIZE || blk_offset + len > BUFFER_SIZE) continue; return read_buffers[i] + blk_offset; } devsize = bdev_nr_bytes(sb->s_bdev) >> PAGE_SHIFT; /* Ok, read in BLKS_PER_BUF pages completely first. */ file_ra_state_init(&ra, mapping); page_cache_sync_readahead(mapping, &ra, NULL, blocknr, BLKS_PER_BUF); for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = NULL; if (blocknr + i < devsize) { page = read_mapping_page(mapping, blocknr + i, NULL); /* synchronous error? */ if (IS_ERR(page)) page = NULL; } pages[i] = page; } buffer = next_buffer; next_buffer = NEXT_BUFFER(buffer); buffer_blocknr[buffer] = blocknr; buffer_dev[buffer] = sb; data = read_buffers[buffer]; for (i = 0; i < BLKS_PER_BUF; i++) { struct page *page = pages[i]; if (page) { memcpy_from_page(data, page, 0, PAGE_SIZE); put_page(page); } else memset(data, 0, PAGE_SIZE); data += PAGE_SIZE; } return read_buffers[buffer] + offset; } /* * Return a pointer to the linearly addressed cramfs image in memory. */ static void *cramfs_direct_read(struct super_block *sb, unsigned int offset, unsigned int len) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); if (!len) return NULL; if (len > sbi->size || offset > sbi->size - len) return page_address(ZERO_PAGE(0)); return sbi->linear_virt_addr + offset; } /* * Returns a pointer to a buffer containing at least LEN bytes of * filesystem starting at byte offset OFFSET into the filesystem. */ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned int len) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sbi->linear_virt_addr) return cramfs_direct_read(sb, offset, len); else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV)) return cramfs_blkdev_read(sb, offset, len); else return NULL; } /* * For a mapping to be possible, we need a range of uncompressed and * contiguous blocks. Return the offset for the first block and number of * valid blocks for which that is true, or zero otherwise. */ static u32 cramfs_get_block_range(struct inode *inode, u32 pgoff, u32 *pages) { struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); int i; u32 *blockptrs, first_block_addr; /* * We can dereference memory directly here as this code may be * reached only when there is a direct filesystem image mapping * available in memory. */ blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode) + pgoff * 4); first_block_addr = blockptrs[0] & ~CRAMFS_BLK_FLAGS; i = 0; do { u32 block_off = i * (PAGE_SIZE >> CRAMFS_BLK_DIRECT_PTR_SHIFT); u32 expect = (first_block_addr + block_off) | CRAMFS_BLK_FLAG_DIRECT_PTR | CRAMFS_BLK_FLAG_UNCOMPRESSED; if (blockptrs[i] != expect) { pr_debug("range: block %d/%d got %#x expects %#x\n", pgoff+i, pgoff + *pages - 1, blockptrs[i], expect); if (i == 0) return 0; break; } } while (++i < *pages); *pages = i; return first_block_addr << CRAMFS_BLK_DIRECT_PTR_SHIFT; } #ifdef CONFIG_MMU /* * Return true if the last page of a file in the filesystem image contains * some other data that doesn't belong to that file. It is assumed that the * last block is CRAMFS_BLK_FLAG_DIRECT_PTR | CRAMFS_BLK_FLAG_UNCOMPRESSED * (verified by cramfs_get_block_range() and directly accessible in memory. */ static bool cramfs_last_page_is_shared(struct inode *inode) { struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); u32 partial, last_page, blockaddr, *blockptrs; char *tail_data; partial = offset_in_page(inode->i_size); if (!partial) return false; last_page = inode->i_size >> PAGE_SHIFT; blockptrs = (u32 *)(sbi->linear_virt_addr + OFFSET(inode)); blockaddr = blockptrs[last_page] & ~CRAMFS_BLK_FLAGS; blockaddr <<= CRAMFS_BLK_DIRECT_PTR_SHIFT; tail_data = sbi->linear_virt_addr + blockaddr + partial; return memchr_inv(tail_data, 0, PAGE_SIZE - partial) ? true : false; } static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); struct cramfs_sb_info *sbi = CRAMFS_SB(inode->i_sb); unsigned int pages, max_pages, offset; unsigned long address, pgoff = vma->vm_pgoff; char *bailout_reason; int ret; ret = generic_file_readonly_mmap(file, vma); if (ret) return ret; /* * Now try to pre-populate ptes for this vma with a direct * mapping avoiding memory allocation when possible. */ /* Could COW work here? */ bailout_reason = "vma is writable"; if (vma->vm_flags & VM_WRITE) goto bailout; max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; bailout_reason = "beyond file limit"; if (pgoff >= max_pages) goto bailout; pages = min(vma_pages(vma), max_pages - pgoff); offset = cramfs_get_block_range(inode, pgoff, &pages); bailout_reason = "unsuitable block layout"; if (!offset) goto bailout; address = sbi->linear_phys_addr + offset; bailout_reason = "data is not page aligned"; if (!PAGE_ALIGNED(address)) goto bailout; /* Don't map the last page if it contains some other data */ if (pgoff + pages == max_pages && cramfs_last_page_is_shared(inode)) { pr_debug("mmap: %pD: last page is shared\n", file); pages--; } if (!pages) { bailout_reason = "no suitable block remaining"; goto bailout; } if (pages == vma_pages(vma)) { /* * The entire vma is mappable. remap_pfn_range() will * make it distinguishable from a non-direct mapping * in /proc/<pid>/maps by substituting the file offset * with the actual physical address. */ ret = remap_pfn_range(vma, vma->vm_start, address >> PAGE_SHIFT, pages * PAGE_SIZE, vma->vm_page_prot); } else { /* * Let's create a mixed map if we can't map it all. * The normal paging machinery will take care of the * unpopulated ptes via cramfs_read_folio(). */ int i; vm_flags_set(vma, VM_MIXEDMAP); for (i = 0; i < pages && !ret; i++) { vm_fault_t vmf; unsigned long off = i * PAGE_SIZE; pfn_t pfn = phys_to_pfn_t(address + off, PFN_DEV); vmf = vmf_insert_mixed(vma, vma->vm_start + off, pfn); if (vmf & VM_FAULT_ERROR) ret = vm_fault_to_errno(vmf, 0); } } if (!ret) pr_debug("mapped %pD[%lu] at 0x%08lx (%u/%lu pages) " "to vma 0x%08lx, page_prot 0x%llx\n", file, pgoff, address, pages, vma_pages(vma), vma->vm_start, (unsigned long long)pgprot_val(vma->vm_page_prot)); return ret; bailout: pr_debug("%pD[%lu]: direct mmap impossible: %s\n", file, pgoff, bailout_reason); /* Didn't manage any direct map, but normal paging is still possible */ return 0; } #else /* CONFIG_MMU */ static int cramfs_physmem_mmap(struct file *file, struct vm_area_struct *vma) { return is_nommu_shared_mapping(vma->vm_flags) ? 0 : -ENOSYS; } static unsigned long cramfs_physmem_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct cramfs_sb_info *sbi = CRAMFS_SB(sb); unsigned int pages, block_pages, max_pages, offset; pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; max_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; if (pgoff >= max_pages || pages > max_pages - pgoff) return -EINVAL; block_pages = pages; offset = cramfs_get_block_range(inode, pgoff, &block_pages); if (!offset || block_pages != pages) return -ENOSYS; addr = sbi->linear_phys_addr + offset; pr_debug("get_unmapped for %pD ofs %#lx siz %lu at 0x%08lx\n", file, pgoff*PAGE_SIZE, len, addr); return addr; } static unsigned int cramfs_physmem_mmap_capabilities(struct file *file) { return NOMMU_MAP_COPY | NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_EXEC; } #endif /* CONFIG_MMU */ static const struct file_operations cramfs_physmem_fops = { .llseek = generic_file_llseek, .read_iter = generic_file_read_iter, .splice_read = filemap_splice_read, .mmap = cramfs_physmem_mmap, #ifndef CONFIG_MMU .get_unmapped_area = cramfs_physmem_get_unmapped_area, .mmap_capabilities = cramfs_physmem_mmap_capabilities, #endif }; static void cramfs_kill_sb(struct super_block *sb) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); generic_shutdown_super(sb); if (IS_ENABLED(CONFIG_CRAMFS_MTD) && sb->s_mtd) { if (sbi && sbi->mtd_point_size) mtd_unpoint(sb->s_mtd, 0, sbi->mtd_point_size); put_mtd_device(sb->s_mtd); sb->s_mtd = NULL; } else if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV) && sb->s_bdev) { sync_blockdev(sb->s_bdev); bdev_fput(sb->s_bdev_file); } kfree(sbi); } static int cramfs_reconfigure(struct fs_context *fc) { sync_filesystem(fc->root->d_sb); fc->sb_flags |= SB_RDONLY; return 0; } static int cramfs_read_super(struct super_block *sb, struct fs_context *fc, struct cramfs_super *super) { struct cramfs_sb_info *sbi = CRAMFS_SB(sb); unsigned long root_offset; bool silent = fc->sb_flags & SB_SILENT; /* We don't know the real size yet */ sbi->size = PAGE_SIZE; /* Read the first block and get the superblock from it */ mutex_lock(&read_mutex); memcpy(super, cramfs_read(sb, 0, sizeof(*super)), sizeof(*super)); mutex_unlock(&read_mutex); /* Do sanity checks on the superblock */ if (super->magic != CRAMFS_MAGIC) { /* check for wrong endianness */ if (super->magic == CRAMFS_MAGIC_WEND) { if (!silent) errorfc(fc, "wrong endianness"); return -EINVAL; } /* check at 512 byte offset */ mutex_lock(&read_mutex); memcpy(super, cramfs_read(sb, 512, sizeof(*super)), sizeof(*super)); mutex_unlock(&read_mutex); if (super->magic != CRAMFS_MAGIC) { if (super->magic == CRAMFS_MAGIC_WEND && !silent) errorfc(fc, "wrong endianness"); else if (!silent) errorfc(fc, "wrong magic"); return -EINVAL; } } /* get feature flags first */ if (super->flags & ~CRAMFS_SUPPORTED_FLAGS) { errorfc(fc, "unsupported filesystem features"); return -EINVAL; } /* Check that the root inode is in a sane state */ if (!S_ISDIR(super->root.mode)) { errorfc(fc, "root is not a directory"); return -EINVAL; } /* correct strange, hard-coded permissions of mkcramfs */ super->root.mode |= 0555; root_offset = super->root.offset << 2; if (super->flags & CRAMFS_FLAG_FSID_VERSION_2) { sbi->size = super->size; sbi->blocks = super->fsid.blocks; sbi->files = super->fsid.files; } else { sbi->size = 1<<28; sbi->blocks = 0; sbi->files = 0; } sbi->magic = super->magic; sbi->flags = super->flags; if (root_offset == 0) infofc(fc, "empty filesystem"); else if (!(super->flags & CRAMFS_FLAG_SHIFTED_ROOT_OFFSET) && ((root_offset != sizeof(struct cramfs_super)) && (root_offset != 512 + sizeof(struct cramfs_super)))) { errorfc(fc, "bad root offset %lu", root_offset); return -EINVAL; } return 0; } static int cramfs_finalize_super(struct super_block *sb, struct cramfs_inode *cramfs_root) { struct inode *root; /* Set it all up.. */ sb->s_flags |= SB_RDONLY; sb->s_time_min = 0; sb->s_time_max = 0; sb->s_op = &cramfs_ops; root = get_cramfs_inode(sb, cramfs_root, 0); if (IS_ERR(root)) return PTR_ERR(root); sb->s_root = d_make_root(root); if (!sb->s_root) return -ENOMEM; return 0; } static int cramfs_blkdev_fill_super(struct super_block *sb, struct fs_context *fc) { struct cramfs_sb_info *sbi; struct cramfs_super super; int i, err; sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; /* Invalidate the read buffers on mount: think disk change.. */ for (i = 0; i < READ_BUFFERS; i++) buffer_blocknr[i] = -1; err = cramfs_read_super(sb, fc, &super); if (err) return err; return cramfs_finalize_super(sb, &super.root); } static int cramfs_mtd_fill_super(struct super_block *sb, struct fs_context *fc) { struct cramfs_sb_info *sbi; struct cramfs_super super; int err; sbi = kzalloc(sizeof(struct cramfs_sb_info), GFP_KERNEL); if (!sbi) return -ENOMEM; sb->s_fs_info = sbi; /* Map only one page for now. Will remap it when fs size is known. */ err = mtd_point(sb->s_mtd, 0, PAGE_SIZE, &sbi->mtd_point_size, &sbi->linear_virt_addr, &sbi->linear_phys_addr); if (err || sbi->mtd_point_size != PAGE_SIZE) { pr_err("unable to get direct memory access to mtd:%s\n", sb->s_mtd->name); return err ? : -ENODATA; } pr_info("checking physical address %pap for linear cramfs image\n", &sbi->linear_phys_addr); err = cramfs_read_super(sb, fc, &super); if (err) return err; /* Remap the whole filesystem now */ pr_info("linear cramfs image on mtd:%s appears to be %lu KB in size\n", sb->s_mtd->name, sbi->size/1024); mtd_unpoint(sb->s_mtd, 0, PAGE_SIZE); err = mtd_point(sb->s_mtd, 0, sbi->size, &sbi->mtd_point_size, &sbi->linear_virt_addr, &sbi->linear_phys_addr); if (err || sbi->mtd_point_size != sbi->size) { pr_err("unable to get direct memory access to mtd:%s\n", sb->s_mtd->name); return err ? : -ENODATA; } return cramfs_finalize_super(sb, &super.root); } static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf) { struct super_block *sb = dentry->d_sb; u64 id = 0; if (sb->s_bdev) id = huge_encode_dev(sb->s_bdev->bd_dev); else if (sb->s_dev) id = huge_encode_dev(sb->s_dev); buf->f_type = CRAMFS_MAGIC; buf->f_bsize = PAGE_SIZE; buf->f_blocks = CRAMFS_SB(sb)->blocks; buf->f_bfree = 0; buf->f_bavail = 0; buf->f_files = CRAMFS_SB(sb)->files; buf->f_ffree = 0; buf->f_fsid = u64_to_fsid(id); buf->f_namelen = CRAMFS_MAXPATHLEN; return 0; } /* * Read a cramfs directory entry. */ static int cramfs_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; char *buf; unsigned int offset; /* Offset within the thing. */ if (ctx->pos >= inode->i_size) return 0; offset = ctx->pos; /* Directory entries are always 4-byte aligned */ if (offset & 3) return -EINVAL; buf = kmalloc(CRAMFS_MAXPATHLEN, GFP_KERNEL); if (!buf) return -ENOMEM; while (offset < inode->i_size) { struct cramfs_inode *de; unsigned long nextoffset; char *name; ino_t ino; umode_t mode; int namelen; mutex_lock(&read_mutex); de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN); name = (char *)(de+1); /* * Namelengths on disk are shifted by two * and the name padded out to 4-byte boundaries * with zeroes. */ namelen = de->namelen << 2; memcpy(buf, name, namelen); ino = cramino(de, OFFSET(inode) + offset); mode = de->mode; mutex_unlock(&read_mutex); nextoffset = offset + sizeof(*de) + namelen; for (;;) { if (!namelen) { kfree(buf); return -EIO; } if (buf[namelen-1]) break; namelen--; } if (!dir_emit(ctx, buf, namelen, ino, mode >> 12)) break; ctx->pos = offset = nextoffset; } kfree(buf); return 0; } /* * Lookup and fill in the inode data.. */ static struct dentry *cramfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { unsigned int offset = 0; struct inode *inode = NULL; int sorted; mutex_lock(&read_mutex); sorted = CRAMFS_SB(dir->i_sb)->flags & CRAMFS_FLAG_SORTED_DIRS; while (offset < dir->i_size) { struct cramfs_inode *de; char *name; int namelen, retval; int dir_off = OFFSET(dir) + offset; de = cramfs_read(dir->i_sb, dir_off, sizeof(*de)+CRAMFS_MAXPATHLEN); name = (char *)(de+1); /* Try to take advantage of sorted directories */ if (sorted && (dentry->d_name.name[0] < name[0])) break; namelen = de->namelen << 2; offset += sizeof(*de) + namelen; /* Quick check that the name is roughly the right length */ if (((dentry->d_name.len + 3) & ~3) != namelen) continue; for (;;) { if (!namelen) { inode = ERR_PTR(-EIO); goto out; } if (name[namelen-1]) break; namelen--; } if (namelen != dentry->d_name.len) continue; retval = memcmp(dentry->d_name.name, name, namelen); if (retval > 0) continue; if (!retval) { inode = get_cramfs_inode(dir->i_sb, de, dir_off); break; } /* else (retval < 0) */ if (sorted) break; } out: mutex_unlock(&read_mutex); return d_splice_alias(inode, dentry); } static int cramfs_read_folio(struct file *file, struct folio *folio) { struct inode *inode = folio->mapping->host; u32 maxblock; int bytes_filled; void *pgdata; bool success = false; maxblock = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; bytes_filled = 0; pgdata = kmap_local_folio(folio, 0); if (folio->index < maxblock) { struct super_block *sb = inode->i_sb; u32 blkptr_offset = OFFSET(inode) + folio->index * 4; u32 block_ptr, block_start, block_len; bool uncompressed, direct; mutex_lock(&read_mutex); block_ptr = *(u32 *) cramfs_read(sb, blkptr_offset, 4); uncompressed = (block_ptr & CRAMFS_BLK_FLAG_UNCOMPRESSED); direct = (block_ptr & CRAMFS_BLK_FLAG_DIRECT_PTR); block_ptr &= ~CRAMFS_BLK_FLAGS; if (direct) { /* * The block pointer is an absolute start pointer, * shifted by 2 bits. The size is included in the * first 2 bytes of the data block when compressed, * or PAGE_SIZE otherwise. */ block_start = block_ptr << CRAMFS_BLK_DIRECT_PTR_SHIFT; if (uncompressed) { block_len = PAGE_SIZE; /* if last block: cap to file length */ if (folio->index == maxblock - 1) block_len = offset_in_page(inode->i_size); } else { block_len = *(u16 *) cramfs_read(sb, block_start, 2); block_start += 2; } } else { /* * The block pointer indicates one past the end of * the current block (start of next block). If this * is the first block then it starts where the block * pointer table ends, otherwise its start comes * from the previous block's pointer. */ block_start = OFFSET(inode) + maxblock * 4; if (folio->index) block_start = *(u32 *) cramfs_read(sb, blkptr_offset - 4, 4); /* Beware... previous ptr might be a direct ptr */ if (unlikely(block_start & CRAMFS_BLK_FLAG_DIRECT_PTR)) { /* See comments on earlier code. */ u32 prev_start = block_start; block_start = prev_start & ~CRAMFS_BLK_FLAGS; block_start <<= CRAMFS_BLK_DIRECT_PTR_SHIFT; if (prev_start & CRAMFS_BLK_FLAG_UNCOMPRESSED) { block_start += PAGE_SIZE; } else { block_len = *(u16 *) cramfs_read(sb, block_start, 2); block_start += 2 + block_len; } } block_start &= ~CRAMFS_BLK_FLAGS; block_len = block_ptr - block_start; } if (block_len == 0) ; /* hole */ else if (unlikely(block_len > 2*PAGE_SIZE || (uncompressed && block_len > PAGE_SIZE))) { mutex_unlock(&read_mutex); pr_err("bad data blocksize %u\n", block_len); goto err; } else if (uncompressed) { memcpy(pgdata, cramfs_read(sb, block_start, block_len), block_len); bytes_filled = block_len; } else { bytes_filled = cramfs_uncompress_block(pgdata, PAGE_SIZE, cramfs_read(sb, block_start, block_len), block_len); } mutex_unlock(&read_mutex); if (unlikely(bytes_filled < 0)) goto err; } memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled); flush_dcache_folio(folio); success = true; err: kunmap_local(pgdata); folio_end_read(folio, success); return 0; } static const struct address_space_operations cramfs_aops = { .read_folio = cramfs_read_folio }; /* * Our operations: */ /* * A directory can only readdir */ static const struct file_operations cramfs_directory_operations = { .llseek = generic_file_llseek, .read = generic_read_dir, .iterate_shared = cramfs_readdir, }; static const struct inode_operations cramfs_dir_inode_operations = { .lookup = cramfs_lookup, }; static const struct super_operations cramfs_ops = { .statfs = cramfs_statfs, }; static int cramfs_get_tree(struct fs_context *fc) { int ret = -ENOPROTOOPT; if (IS_ENABLED(CONFIG_CRAMFS_MTD)) { ret = get_tree_mtd(fc, cramfs_mtd_fill_super); if (!ret) return 0; } if (IS_ENABLED(CONFIG_CRAMFS_BLOCKDEV)) ret = get_tree_bdev(fc, cramfs_blkdev_fill_super); return ret; } static const struct fs_context_operations cramfs_context_ops = { .get_tree = cramfs_get_tree, .reconfigure = cramfs_reconfigure, }; /* * Set up the filesystem mount context. */ static int cramfs_init_fs_context(struct fs_context *fc) { fc->ops = &cramfs_context_ops; return 0; } static struct file_system_type cramfs_fs_type = { .owner = THIS_MODULE, .name = "cramfs", .init_fs_context = cramfs_init_fs_context, .kill_sb = cramfs_kill_sb, .fs_flags = FS_REQUIRES_DEV, }; MODULE_ALIAS_FS("cramfs"); static int __init init_cramfs_fs(void) { int rv; rv = cramfs_uncompress_init(); if (rv < 0) return rv; rv = register_filesystem(&cramfs_fs_type); if (rv < 0) cramfs_uncompress_exit(); return rv; } static void __exit exit_cramfs_fs(void) { cramfs_uncompress_exit(); unregister_filesystem(&cramfs_fs_type); } module_init(init_cramfs_fs) module_exit(exit_cramfs_fs) MODULE_DESCRIPTION("Compressed ROM file system support"); MODULE_LICENSE("GPL"); |
76 76 6 69 1 7 142 142 141 1 1 19 19 19 19 19 8 8 8 8 8 26 26 26 33 15 1 15 3 1 14 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 | // SPDX-License-Identifier: GPL-2.0-only #include <linux/module.h> #include <linux/errno.h> #include <linux/socket.h> #include <linux/kernel.h> #include <net/dst_metadata.h> #include <net/udp.h> #include <net/udp_tunnel.h> #include <net/inet_dscp.h> int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { int err; struct socket *sock = NULL; struct sockaddr_in udp_addr; err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; if (cfg->bind_ifindex) { err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true); if (err < 0) goto error; } udp_addr.sin_family = AF_INET; udp_addr.sin_addr = cfg->local_ip; udp_addr.sin_port = cfg->local_udp_port; err = kernel_bind(sock, (struct sockaddr *)&udp_addr, sizeof(udp_addr)); if (err < 0) goto error; if (cfg->peer_udp_port) { udp_addr.sin_family = AF_INET; udp_addr.sin_addr = cfg->peer_ip; udp_addr.sin_port = cfg->peer_udp_port; err = kernel_connect(sock, (struct sockaddr *)&udp_addr, sizeof(udp_addr), 0); if (err < 0) goto error; } sock->sk->sk_no_check_tx = !cfg->use_udp_checksums; *sockp = sock; return 0; error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); } *sockp = NULL; return err; } EXPORT_SYMBOL(udp_sock_create4); void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *cfg) { struct sock *sk = sock->sk; /* Disable multicast loopback */ inet_clear_bit(MC_LOOP, sk); /* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */ inet_inc_convert_csum(sk); rcu_assign_sk_user_data(sk, cfg->sk_user_data); udp_sk(sk)->encap_type = cfg->encap_type; udp_sk(sk)->encap_rcv = cfg->encap_rcv; udp_sk(sk)->encap_err_rcv = cfg->encap_err_rcv; udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup; udp_sk(sk)->encap_destroy = cfg->encap_destroy; udp_sk(sk)->gro_receive = cfg->gro_receive; udp_sk(sk)->gro_complete = cfg->gro_complete; udp_tunnel_encap_enable(sk); } EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock); void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, unsigned short type) { struct sock *sk = sock->sk; struct udp_tunnel_info ti; ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; udp_tunnel_nic_add_port(dev, &ti); } EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port); void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, unsigned short type) { struct sock *sk = sock->sk; struct udp_tunnel_info ti; ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; udp_tunnel_nic_del_port(dev, &ti); } EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port); /* Notify netdevs that UDP port started listening */ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct udp_tunnel_info ti; struct net_device *dev; ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; rcu_read_lock(); for_each_netdev_rcu(net, dev) { udp_tunnel_nic_add_port(dev, &ti); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(udp_tunnel_notify_add_rx_port); /* Notify netdevs that UDP port is no more listening */ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); struct udp_tunnel_info ti; struct net_device *dev; ti.type = type; ti.sa_family = sk->sk_family; ti.port = inet_sk(sk)->inet_sport; rcu_read_lock(); for_each_netdev_rcu(net, dev) { udp_tunnel_nic_del_port(dev, &ti); } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(udp_tunnel_notify_del_rx_port); void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port, bool xnet, bool nocheck) { struct udphdr *uh; __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); uh = udp_hdr(skb); uh->dest = dst_port; uh->source = src_port; uh->len = htons(skb->len); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); udp_set_csum(nocheck, skb, src, dst, skb->len); iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet); } EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb); void udp_tunnel_sock_release(struct socket *sock) { rcu_assign_sk_user_data(sock->sk, NULL); synchronize_rcu(); kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); } EXPORT_SYMBOL_GPL(udp_tunnel_sock_release); struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb, unsigned short family, const unsigned long *flags, __be64 tunnel_id, int md_size) { struct metadata_dst *tun_dst; struct ip_tunnel_info *info; if (family == AF_INET) tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size); else tun_dst = ipv6_tun_rx_dst(skb, flags, tunnel_id, md_size); if (!tun_dst) return NULL; info = &tun_dst->u.tun_info; info->key.tp_src = udp_hdr(skb)->source; info->key.tp_dst = udp_hdr(skb)->dest; if (udp_hdr(skb)->check) __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags); return tun_dst; } EXPORT_SYMBOL_GPL(udp_tun_rx_dst); struct rtable *udp_tunnel_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, int oif, __be32 *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 tos, struct dst_cache *dst_cache) { struct rtable *rt = NULL; struct flowi4 fl4; #ifdef CONFIG_DST_CACHE if (dst_cache) { rt = dst_cache_get_ip4(dst_cache, saddr); if (rt) return rt; } #endif memset(&fl4, 0, sizeof(fl4)); fl4.flowi4_mark = skb->mark; fl4.flowi4_proto = IPPROTO_UDP; fl4.flowi4_oif = oif; fl4.daddr = key->u.ipv4.dst; fl4.saddr = key->u.ipv4.src; fl4.fl4_dport = dport; fl4.fl4_sport = sport; fl4.flowi4_tos = tos & INET_DSCP_MASK; fl4.flowi4_flags = key->flow_flags; rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) { netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); return ERR_PTR(-ENETUNREACH); } if (rt->dst.dev == dev) { /* is this necessary? */ netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); ip_rt_put(rt); return ERR_PTR(-ELOOP); } #ifdef CONFIG_DST_CACHE if (dst_cache) dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); #endif *saddr = fl4.saddr; return rt; } EXPORT_SYMBOL_GPL(udp_tunnel_dst_lookup); MODULE_DESCRIPTION("IPv4 Foo over UDP tunnel driver"); MODULE_LICENSE("GPL"); |
177 50 25 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 | /* SPDX-License-Identifier: GPL-2.0 */ /* * Wound/Wait Mutexes: blocking mutual exclusion locks with deadlock avoidance * * Original mutex implementation started by Ingo Molnar: * * Copyright (C) 2004, 2005, 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> * * Wait/Die implementation: * Copyright (C) 2013 Canonical Ltd. * Choice of algorithm: * Copyright (C) 2018 WMWare Inc. * * This file contains the main data structure and API definitions. */ #ifndef __LINUX_WW_MUTEX_H #define __LINUX_WW_MUTEX_H #include <linux/mutex.h> #include <linux/rtmutex.h> #if defined(CONFIG_DEBUG_MUTEXES) || \ (defined(CONFIG_PREEMPT_RT) && defined(CONFIG_DEBUG_RT_MUTEXES)) #define DEBUG_WW_MUTEXES #endif #ifndef CONFIG_PREEMPT_RT #define WW_MUTEX_BASE mutex #define ww_mutex_base_init(l,n,k) __mutex_init(l,n,k) #define ww_mutex_base_is_locked(b) mutex_is_locked((b)) #else #define WW_MUTEX_BASE rt_mutex #define ww_mutex_base_init(l,n,k) __rt_mutex_init(l,n,k) #define ww_mutex_base_is_locked(b) rt_mutex_base_is_locked(&(b)->rtmutex) #endif struct ww_class { atomic_long_t stamp; struct lock_class_key acquire_key; struct lock_class_key mutex_key; const char *acquire_name; const char *mutex_name; unsigned int is_wait_die; }; struct ww_mutex { struct WW_MUTEX_BASE base; struct ww_acquire_ctx *ctx; #ifdef DEBUG_WW_MUTEXES struct ww_class *ww_class; #endif }; struct ww_acquire_ctx { struct task_struct *task; unsigned long stamp; unsigned int acquired; unsigned short wounded; unsigned short is_wait_die; #ifdef DEBUG_WW_MUTEXES unsigned int done_acquire; struct ww_class *ww_class; void *contending_lock; #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; /** * @first_lock_dep_map: fake lockdep_map for first locked ww_mutex. * * lockdep requires the lockdep_map for the first locked ww_mutex * in a ww transaction to remain in memory until all ww_mutexes of * the transaction have been unlocked. Ensure this by keeping a * fake locked ww_mutex lockdep map between ww_acquire_init() and * ww_acquire_fini(). */ struct lockdep_map first_lock_dep_map; #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH unsigned int deadlock_inject_interval; unsigned int deadlock_inject_countdown; #endif }; #define __WW_CLASS_INITIALIZER(ww_class, _is_wait_die) \ { .stamp = ATOMIC_LONG_INIT(0) \ , .acquire_name = #ww_class "_acquire" \ , .mutex_name = #ww_class "_mutex" \ , .is_wait_die = _is_wait_die } #define DEFINE_WD_CLASS(classname) \ struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 1) #define DEFINE_WW_CLASS(classname) \ struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 0) /** * ww_mutex_init - initialize the w/w mutex * @lock: the mutex to be initialized * @ww_class: the w/w class the mutex should belong to * * Initialize the w/w mutex to unlocked state and associate it with the given * class. Static define macro for w/w mutex is not provided and this function * is the only way to properly initialize the w/w mutex. * * It is not allowed to initialize an already locked mutex. */ static inline void ww_mutex_init(struct ww_mutex *lock, struct ww_class *ww_class) { ww_mutex_base_init(&lock->base, ww_class->mutex_name, &ww_class->mutex_key); lock->ctx = NULL; #ifdef DEBUG_WW_MUTEXES lock->ww_class = ww_class; #endif } /** * ww_acquire_init - initialize a w/w acquire context * @ctx: w/w acquire context to initialize * @ww_class: w/w class of the context * * Initializes an context to acquire multiple mutexes of the given w/w class. * * Context-based w/w mutex acquiring can be done in any order whatsoever within * a given lock class. Deadlocks will be detected and handled with the * wait/die logic. * * Mixing of context-based w/w mutex acquiring and single w/w mutex locking can * result in undetected deadlocks and is so forbidden. Mixing different contexts * for the same w/w class when acquiring mutexes can also result in undetected * deadlocks, and is hence also forbidden. Both types of abuse will be caught by * enabling CONFIG_PROVE_LOCKING. * * Nesting of acquire contexts for _different_ w/w classes is possible, subject * to the usual locking rules between different lock classes. * * An acquire context must be released with ww_acquire_fini by the same task * before the memory is freed. It is recommended to allocate the context itself * on the stack. */ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, struct ww_class *ww_class) { ctx->task = current; ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp); ctx->acquired = 0; ctx->wounded = false; ctx->is_wait_die = ww_class->is_wait_die; #ifdef DEBUG_WW_MUTEXES ctx->ww_class = ww_class; ctx->done_acquire = 0; ctx->contending_lock = NULL; #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC debug_check_no_locks_freed((void *)ctx, sizeof(*ctx)); lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, &ww_class->acquire_key, 0); lockdep_init_map(&ctx->first_lock_dep_map, ww_class->mutex_name, &ww_class->mutex_key, 0); mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_); mutex_acquire_nest(&ctx->first_lock_dep_map, 0, 0, &ctx->dep_map, _RET_IP_); #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH ctx->deadlock_inject_interval = 1; ctx->deadlock_inject_countdown = ctx->stamp & 0xf; #endif } /** * ww_acquire_done - marks the end of the acquire phase * @ctx: the acquire context * * Marks the end of the acquire phase, any further w/w mutex lock calls using * this context are forbidden. * * Calling this function is optional, it is just useful to document w/w mutex * code and clearly designated the acquire phase from actually using the locked * data structures. */ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) { #ifdef DEBUG_WW_MUTEXES lockdep_assert_held(ctx); DEBUG_LOCKS_WARN_ON(ctx->done_acquire); ctx->done_acquire = 1; #endif } /** * ww_acquire_fini - releases a w/w acquire context * @ctx: the acquire context to free * * Releases a w/w acquire context. This must be called _after_ all acquired w/w * mutexes have been released with ww_mutex_unlock. */ static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { #ifdef CONFIG_DEBUG_LOCK_ALLOC mutex_release(&ctx->first_lock_dep_map, _THIS_IP_); mutex_release(&ctx->dep_map, _THIS_IP_); #endif #ifdef DEBUG_WW_MUTEXES DEBUG_LOCKS_WARN_ON(ctx->acquired); if (!IS_ENABLED(CONFIG_PROVE_LOCKING)) /* * lockdep will normally handle this, * but fail without anyway */ ctx->done_acquire = 1; if (!IS_ENABLED(CONFIG_DEBUG_LOCK_ALLOC)) /* ensure ww_acquire_fini will still fail if called twice */ ctx->acquired = ~0U; #endif } /** * ww_mutex_lock - acquire the w/w mutex * @lock: the mutex to be acquired * @ctx: w/w acquire context, or NULL to acquire only a single lock. * * Lock the w/w mutex exclusively for this task. * * Deadlocks within a given w/w class of locks are detected and handled with the * wait/die algorithm. If the lock isn't immediately available this function * will either sleep until it is (wait case). Or it selects the current context * for backing off by returning -EDEADLK (die case). Trying to acquire the * same lock with the same context twice is also detected and signalled by * returning -EALREADY. Returns 0 if the mutex was successfully acquired. * * In the die case the caller must release all currently held w/w mutexes for * the given context and then wait for this contending lock to be available by * calling ww_mutex_lock_slow. Alternatively callers can opt to not acquire this * lock and proceed with trying to acquire further w/w mutexes (e.g. when * scanning through lru lists trying to free resources). * * The mutex must later on be released by the same task that * acquired it. The task may not exit without first unlocking the mutex. Also, * kernel memory where the mutex resides must not be freed with the mutex still * locked. The mutex must first be initialized (or statically defined) before it * can be locked. memset()-ing the mutex to 0 is not allowed. The mutex must be * of the same w/w lock class as was used to initialize the acquire context. * * A mutex acquired with this function must be released with ww_mutex_unlock. */ extern int /* __must_check */ ww_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_interruptible - acquire the w/w mutex, interruptible * @lock: the mutex to be acquired * @ctx: w/w acquire context * * Lock the w/w mutex exclusively for this task. * * Deadlocks within a given w/w class of locks are detected and handled with the * wait/die algorithm. If the lock isn't immediately available this function * will either sleep until it is (wait case). Or it selects the current context * for backing off by returning -EDEADLK (die case). Trying to acquire the * same lock with the same context twice is also detected and signalled by * returning -EALREADY. Returns 0 if the mutex was successfully acquired. If a * signal arrives while waiting for the lock then this function returns -EINTR. * * In the die case the caller must release all currently held w/w mutexes for * the given context and then wait for this contending lock to be available by * calling ww_mutex_lock_slow_interruptible. Alternatively callers can opt to * not acquire this lock and proceed with trying to acquire further w/w mutexes * (e.g. when scanning through lru lists trying to free resources). * * The mutex must later on be released by the same task that * acquired it. The task may not exit without first unlocking the mutex. Also, * kernel memory where the mutex resides must not be freed with the mutex still * locked. The mutex must first be initialized (or statically defined) before it * can be locked. memset()-ing the mutex to 0 is not allowed. The mutex must be * of the same w/w lock class as was used to initialize the acquire context. * * A mutex acquired with this function must be released with ww_mutex_unlock. */ extern int __must_check ww_mutex_lock_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); /** * ww_mutex_lock_slow - slowpath acquiring of the w/w mutex * @lock: the mutex to be acquired * @ctx: w/w acquire context * * Acquires a w/w mutex with the given context after a die case. This function * will sleep until the lock becomes available. * * The caller must have released all w/w mutexes already acquired with the * context and then call this function on the contended lock. * * Afterwards the caller may continue to (re)acquire the other w/w mutexes it * needs with ww_mutex_lock. Note that the -EALREADY return code from * ww_mutex_lock can be used to avoid locking this contended mutex twice. * * It is forbidden to call this function with any other w/w mutexes associated * with the context held. It is forbidden to call this on anything else than the * contending mutex. * * Note that the slowpath lock acquiring can also be done by calling * ww_mutex_lock directly. This function here is simply to help w/w mutex * locking code readability by clearly denoting the slowpath. */ static inline void ww_mutex_lock_slow(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { int ret; #ifdef DEBUG_WW_MUTEXES DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); #endif ret = ww_mutex_lock(lock, ctx); (void)ret; } /** * ww_mutex_lock_slow_interruptible - slowpath acquiring of the w/w mutex, interruptible * @lock: the mutex to be acquired * @ctx: w/w acquire context * * Acquires a w/w mutex with the given context after a die case. This function * will sleep until the lock becomes available and returns 0 when the lock has * been acquired. If a signal arrives while waiting for the lock then this * function returns -EINTR. * * The caller must have released all w/w mutexes already acquired with the * context and then call this function on the contended lock. * * Afterwards the caller may continue to (re)acquire the other w/w mutexes it * needs with ww_mutex_lock. Note that the -EALREADY return code from * ww_mutex_lock can be used to avoid locking this contended mutex twice. * * It is forbidden to call this function with any other w/w mutexes associated * with the given context held. It is forbidden to call this on anything else * than the contending mutex. * * Note that the slowpath lock acquiring can also be done by calling * ww_mutex_lock_interruptible directly. This function here is simply to help * w/w mutex locking code readability by clearly denoting the slowpath. */ static inline int __must_check ww_mutex_lock_slow_interruptible(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { #ifdef DEBUG_WW_MUTEXES DEBUG_LOCKS_WARN_ON(!ctx->contending_lock); #endif return ww_mutex_lock_interruptible(lock, ctx); } extern void ww_mutex_unlock(struct ww_mutex *lock); extern int __must_check ww_mutex_trylock(struct ww_mutex *lock, struct ww_acquire_ctx *ctx); /*** * ww_mutex_destroy - mark a w/w mutex unusable * @lock: the mutex to be destroyed * * This function marks the mutex uninitialized, and any subsequent * use of the mutex is forbidden. The mutex must not be locked when * this function is called. */ static inline void ww_mutex_destroy(struct ww_mutex *lock) { #ifndef CONFIG_PREEMPT_RT mutex_destroy(&lock->base); #endif } /** * ww_mutex_is_locked - is the w/w mutex locked * @lock: the mutex to be queried * * Returns 1 if the mutex is locked, 0 if unlocked. */ static inline bool ww_mutex_is_locked(struct ww_mutex *lock) { return ww_mutex_base_is_locked(&lock->base); } #endif |
123 38 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_FS_STRUCT_H #define _LINUX_FS_STRUCT_H #include <linux/path.h> #include <linux/spinlock.h> #include <linux/seqlock.h> struct fs_struct { int users; spinlock_t lock; seqcount_spinlock_t seq; int umask; int in_exec; struct path root, pwd; } __randomize_layout; extern struct kmem_cache *fs_cachep; extern void exit_fs(struct task_struct *); extern void set_fs_root(struct fs_struct *, const struct path *); extern void set_fs_pwd(struct fs_struct *, const struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); extern void free_fs_struct(struct fs_struct *); extern int unshare_fs_struct(void); static inline void get_fs_root(struct fs_struct *fs, struct path *root) { spin_lock(&fs->lock); *root = fs->root; path_get(root); spin_unlock(&fs->lock); } static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd) { spin_lock(&fs->lock); *pwd = fs->pwd; path_get(pwd); spin_unlock(&fs->lock); } extern bool current_chrooted(void); #endif /* _LINUX_FS_STRUCT_H */ |
5 1 4 1 4 4 5 6 1 3 1 3 1 1 1 1 2 2 2 2 3 3 1 4 4 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Glue Code for assembler optimized version of Camellia * * Copyright (c) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> * * Camellia parts based on code by: * Copyright (C) 2006 NTT (Nippon Telegraph and Telephone Corporation) */ #include <linux/unaligned.h> #include <linux/crypto.h> #include <linux/init.h> #include <linux/module.h> #include <linux/types.h> #include <crypto/algapi.h> #include "camellia.h" #include "ecb_cbc_helpers.h" /* regular block cipher functions */ asmlinkage void __camellia_enc_blk(const void *ctx, u8 *dst, const u8 *src, bool xor); EXPORT_SYMBOL_GPL(__camellia_enc_blk); asmlinkage void camellia_dec_blk(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_dec_blk); /* 2-way parallel cipher functions */ asmlinkage void __camellia_enc_blk_2way(const void *ctx, u8 *dst, const u8 *src, bool xor); EXPORT_SYMBOL_GPL(__camellia_enc_blk_2way); asmlinkage void camellia_dec_blk_2way(const void *ctx, u8 *dst, const u8 *src); EXPORT_SYMBOL_GPL(camellia_dec_blk_2way); static void camellia_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { camellia_enc_blk(crypto_tfm_ctx(tfm), dst, src); } static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { camellia_dec_blk(crypto_tfm_ctx(tfm), dst, src); } /* camellia sboxes */ __visible const u64 camellia_sp10011110[256] = { 0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL, 0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL, 0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL, 0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL, 0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL, 0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL, 0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL, 0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL, 0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL, 0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL, 0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL, 0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL, 0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL, 0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL, 0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL, 0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL, 0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL, 0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL, 0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL, 0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL, 0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL, 0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL, 0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL, 0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL, 0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL, 0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL, 0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL, 0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL, 0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL, 0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL, 0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL, 0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL, 0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL, 0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL, 0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL, 0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL, 0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL, 0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL, 0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL, 0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL, 0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL, 0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL, 0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL, 0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL, 0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL, 0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL, 0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL, 0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL, 0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL, 0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL, 0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL, 0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL, 0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL, 0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL, 0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL, 0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL, 0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL, 0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL, 0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL, 0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL, 0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL, 0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL, 0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL, 0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL, 0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL, 0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL, 0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL, 0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL, 0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL, 0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL, 0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL, 0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL, 0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL, 0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL, 0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL, 0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL, 0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL, 0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL, 0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL, 0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL, 0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL, 0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL, 0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL, 0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL, 0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL, 0x9e00009e9e9e9e00ULL, }; __visible const u64 camellia_sp22000222[256] = { 0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL, 0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL, 0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL, 0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL, 0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL, 0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL, 0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL, 0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL, 0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL, 0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL, 0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL, 0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL, 0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL, 0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL, 0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL, 0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL, 0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL, 0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL, 0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL, 0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL, 0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL, 0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL, 0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL, 0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL, 0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL, 0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL, 0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL, 0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL, 0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL, 0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL, 0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL, 0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL, 0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL, 0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL, 0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL, 0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL, 0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL, 0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL, 0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL, 0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL, 0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL, 0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL, 0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL, 0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL, 0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL, 0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL, 0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL, 0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL, 0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL, 0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL, 0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL, 0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL, 0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL, 0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL, 0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL, 0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL, 0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL, 0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL, 0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL, 0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL, 0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL, 0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL, 0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL, 0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL, 0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL, 0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL, 0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL, 0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL, 0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL, 0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL, 0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL, 0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL, 0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL, 0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL, 0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL, 0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL, 0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL, 0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL, 0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL, 0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL, 0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL, 0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL, 0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL, 0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL, 0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL, 0x3d3d0000003d3d3dULL, }; __visible const u64 camellia_sp03303033[256] = { 0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL, 0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL, 0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL, 0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL, 0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL, 0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL, 0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL, 0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL, 0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL, 0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL, 0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL, 0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL, 0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL, 0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL, 0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL, 0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL, 0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL, 0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL, 0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL, 0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL, 0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL, 0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL, 0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL, 0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL, 0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL, 0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL, 0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL, 0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL, 0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL, 0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL, 0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL, 0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL, 0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL, 0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL, 0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL, 0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL, 0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL, 0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL, 0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL, 0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL, 0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL, 0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL, 0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL, 0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL, 0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL, 0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL, 0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL, 0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL, 0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL, 0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL, 0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL, 0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL, 0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL, 0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL, 0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL, 0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL, 0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL, 0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL, 0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL, 0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL, 0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL, 0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL, 0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL, 0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL, 0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL, 0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL, 0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL, 0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL, 0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL, 0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL, 0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL, 0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL, 0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL, 0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL, 0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL, 0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL, 0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL, 0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL, 0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL, 0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL, 0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL, 0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL, 0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL, 0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL, 0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL, 0x004f4f004f004f4fULL, }; __visible const u64 camellia_sp00444404[256] = { 0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL, 0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL, 0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL, 0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL, 0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL, 0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL, 0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL, 0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL, 0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL, 0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL, 0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL, 0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL, 0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL, 0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL, 0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL, 0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL, 0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL, 0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL, 0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL, 0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL, 0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL, 0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL, 0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL, 0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL, 0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL, 0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL, 0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL, 0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL, 0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL, 0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL, 0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL, 0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL, 0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL, 0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL, 0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL, 0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL, 0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL, 0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL, 0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL, 0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL, 0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL, 0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL, 0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL, 0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL, 0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL, 0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL, 0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL, 0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL, 0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL, 0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL, 0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL, 0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL, 0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL, 0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL, 0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL, 0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL, 0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL, 0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL, 0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL, 0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL, 0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL, 0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL, 0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL, 0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL, 0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL, 0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL, 0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL, 0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL, 0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL, 0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL, 0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL, 0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL, 0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL, 0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL, 0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL, 0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL, 0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL, 0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL, 0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL, 0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL, 0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL, 0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL, 0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL, 0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL, 0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL, 0x00009e9e9e9e009eULL, }; __visible const u64 camellia_sp02220222[256] = { 0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL, 0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL, 0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL, 0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL, 0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL, 0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL, 0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL, 0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL, 0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL, 0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL, 0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL, 0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL, 0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL, 0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL, 0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL, 0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL, 0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL, 0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL, 0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL, 0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL, 0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL, 0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL, 0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL, 0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL, 0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL, 0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL, 0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL, 0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL, 0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL, 0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL, 0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL, 0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL, 0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL, 0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL, 0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL, 0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL, 0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL, 0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL, 0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL, 0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL, 0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL, 0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL, 0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL, 0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL, 0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL, 0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL, 0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL, 0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL, 0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL, 0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL, 0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL, 0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL, 0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL, 0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL, 0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL, 0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL, 0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL, 0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL, 0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL, 0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL, 0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL, 0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL, 0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL, 0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL, 0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL, 0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL, 0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL, 0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL, 0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL, 0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL, 0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL, 0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL, 0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL, 0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL, 0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL, 0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL, 0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL, 0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL, 0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL, 0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL, 0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL, 0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL, 0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL, 0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL, 0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL, 0x003d3d3d003d3d3dULL, }; __visible const u64 camellia_sp30333033[256] = { 0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL, 0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL, 0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL, 0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL, 0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL, 0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL, 0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL, 0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL, 0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL, 0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL, 0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL, 0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL, 0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL, 0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL, 0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL, 0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL, 0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL, 0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL, 0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL, 0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL, 0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL, 0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL, 0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL, 0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL, 0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL, 0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL, 0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL, 0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL, 0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL, 0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL, 0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL, 0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL, 0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL, 0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL, 0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL, 0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL, 0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL, 0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL, 0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL, 0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL, 0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL, 0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL, 0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL, 0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL, 0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL, 0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL, 0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL, 0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL, 0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL, 0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL, 0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL, 0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL, 0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL, 0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL, 0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL, 0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL, 0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL, 0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL, 0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL, 0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL, 0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL, 0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL, 0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL, 0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL, 0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL, 0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL, 0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL, 0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL, 0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL, 0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL, 0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL, 0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL, 0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL, 0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL, 0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL, 0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL, 0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL, 0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL, 0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL, 0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL, 0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL, 0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL, 0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL, 0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL, 0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL, 0x4f004f4f4f004f4fULL, }; __visible const u64 camellia_sp44044404[256] = { 0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL, 0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL, 0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL, 0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL, 0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL, 0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL, 0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL, 0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL, 0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL, 0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL, 0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL, 0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL, 0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL, 0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL, 0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL, 0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL, 0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL, 0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL, 0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL, 0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL, 0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL, 0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL, 0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL, 0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL, 0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL, 0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL, 0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL, 0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL, 0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL, 0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL, 0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL, 0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL, 0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL, 0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL, 0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL, 0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL, 0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL, 0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL, 0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL, 0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL, 0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL, 0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL, 0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL, 0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL, 0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL, 0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL, 0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL, 0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL, 0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL, 0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL, 0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL, 0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL, 0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL, 0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL, 0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL, 0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL, 0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL, 0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL, 0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL, 0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL, 0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL, 0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL, 0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL, 0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL, 0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL, 0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL, 0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL, 0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL, 0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL, 0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL, 0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL, 0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL, 0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL, 0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL, 0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL, 0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL, 0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL, 0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL, 0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL, 0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL, 0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL, 0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL, 0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL, 0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL, 0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL, 0x9e9e009e9e9e009eULL, }; __visible const u64 camellia_sp11101110[256] = { 0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL, 0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL, 0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL, 0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL, 0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL, 0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL, 0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL, 0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL, 0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL, 0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL, 0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL, 0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL, 0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL, 0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL, 0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL, 0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL, 0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL, 0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL, 0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL, 0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL, 0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL, 0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL, 0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL, 0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL, 0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL, 0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL, 0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL, 0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL, 0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL, 0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL, 0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL, 0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL, 0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL, 0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL, 0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL, 0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL, 0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL, 0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL, 0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL, 0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL, 0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL, 0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL, 0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL, 0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL, 0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL, 0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL, 0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL, 0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL, 0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL, 0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL, 0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL, 0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL, 0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL, 0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL, 0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL, 0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL, 0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL, 0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL, 0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL, 0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL, 0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL, 0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL, 0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL, 0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL, 0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL, 0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL, 0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL, 0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL, 0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL, 0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL, 0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL, 0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL, 0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL, 0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL, 0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL, 0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL, 0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL, 0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL, 0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL, 0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL, 0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL, 0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL, 0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL, 0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL, 0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL, 0x9e9e9e009e9e9e00ULL, }; /* key constants */ #define CAMELLIA_SIGMA1L (0xA09E667FL) #define CAMELLIA_SIGMA1R (0x3BCC908BL) #define CAMELLIA_SIGMA2L (0xB67AE858L) #define CAMELLIA_SIGMA2R (0x4CAA73B2L) #define CAMELLIA_SIGMA3L (0xC6EF372FL) #define CAMELLIA_SIGMA3R (0xE94F82BEL) #define CAMELLIA_SIGMA4L (0x54FF53A5L) #define CAMELLIA_SIGMA4R (0xF1D36F1CL) #define CAMELLIA_SIGMA5L (0x10E527FAL) #define CAMELLIA_SIGMA5R (0xDE682D1DL) #define CAMELLIA_SIGMA6L (0xB05688C2L) #define CAMELLIA_SIGMA6R (0xB3E6C1FDL) /* macros */ #define ROLDQ(l, r, bits) ({ \ u64 t = l; \ l = (l << bits) | (r >> (64 - bits)); \ r = (r << bits) | (t >> (64 - bits)); \ }) #define CAMELLIA_F(x, kl, kr, y) ({ \ u64 ii = x ^ (((u64)kl << 32) | kr); \ y = camellia_sp11101110[(uint8_t)ii]; \ y ^= camellia_sp44044404[(uint8_t)(ii >> 8)]; \ ii >>= 16; \ y ^= camellia_sp30333033[(uint8_t)ii]; \ y ^= camellia_sp02220222[(uint8_t)(ii >> 8)]; \ ii >>= 16; \ y ^= camellia_sp00444404[(uint8_t)ii]; \ y ^= camellia_sp03303033[(uint8_t)(ii >> 8)]; \ ii >>= 16; \ y ^= camellia_sp22000222[(uint8_t)ii]; \ y ^= camellia_sp10011110[(uint8_t)(ii >> 8)]; \ y = ror64(y, 32); \ }) #define SET_SUBKEY_LR(INDEX, sRL) (subkey[(INDEX)] = ror64((sRL), 32)) static void camellia_setup_tail(u64 *subkey, u64 *subRL, int max) { u64 kw4, tt; u32 dw, tl, tr; /* absorb kw2 to other subkeys */ /* round 2 */ subRL[3] ^= subRL[1]; /* round 4 */ subRL[5] ^= subRL[1]; /* round 6 */ subRL[7] ^= subRL[1]; subRL[1] ^= (subRL[1] & ~subRL[9]) << 32; /* modified for FLinv(kl2) */ dw = (subRL[1] & subRL[9]) >> 32; subRL[1] ^= rol32(dw, 1); /* round 8 */ subRL[11] ^= subRL[1]; /* round 10 */ subRL[13] ^= subRL[1]; /* round 12 */ subRL[15] ^= subRL[1]; subRL[1] ^= (subRL[1] & ~subRL[17]) << 32; /* modified for FLinv(kl4) */ dw = (subRL[1] & subRL[17]) >> 32; subRL[1] ^= rol32(dw, 1); /* round 14 */ subRL[19] ^= subRL[1]; /* round 16 */ subRL[21] ^= subRL[1]; /* round 18 */ subRL[23] ^= subRL[1]; if (max == 24) { /* kw3 */ subRL[24] ^= subRL[1]; /* absorb kw4 to other subkeys */ kw4 = subRL[25]; } else { subRL[1] ^= (subRL[1] & ~subRL[25]) << 32; /* modified for FLinv(kl6) */ dw = (subRL[1] & subRL[25]) >> 32; subRL[1] ^= rol32(dw, 1); /* round 20 */ subRL[27] ^= subRL[1]; /* round 22 */ subRL[29] ^= subRL[1]; /* round 24 */ subRL[31] ^= subRL[1]; /* kw3 */ subRL[32] ^= subRL[1]; /* absorb kw4 to other subkeys */ kw4 = subRL[33]; /* round 23 */ subRL[30] ^= kw4; /* round 21 */ subRL[28] ^= kw4; /* round 19 */ subRL[26] ^= kw4; kw4 ^= (kw4 & ~subRL[24]) << 32; /* modified for FL(kl5) */ dw = (kw4 & subRL[24]) >> 32; kw4 ^= rol32(dw, 1); } /* round 17 */ subRL[22] ^= kw4; /* round 15 */ subRL[20] ^= kw4; /* round 13 */ subRL[18] ^= kw4; kw4 ^= (kw4 & ~subRL[16]) << 32; /* modified for FL(kl3) */ dw = (kw4 & subRL[16]) >> 32; kw4 ^= rol32(dw, 1); /* round 11 */ subRL[14] ^= kw4; /* round 9 */ subRL[12] ^= kw4; /* round 7 */ subRL[10] ^= kw4; kw4 ^= (kw4 & ~subRL[8]) << 32; /* modified for FL(kl1) */ dw = (kw4 & subRL[8]) >> 32; kw4 ^= rol32(dw, 1); /* round 5 */ subRL[6] ^= kw4; /* round 3 */ subRL[4] ^= kw4; /* round 1 */ subRL[2] ^= kw4; /* kw1 */ subRL[0] ^= kw4; /* key XOR is end of F-function */ SET_SUBKEY_LR(0, subRL[0] ^ subRL[2]); /* kw1 */ SET_SUBKEY_LR(2, subRL[3]); /* round 1 */ SET_SUBKEY_LR(3, subRL[2] ^ subRL[4]); /* round 2 */ SET_SUBKEY_LR(4, subRL[3] ^ subRL[5]); /* round 3 */ SET_SUBKEY_LR(5, subRL[4] ^ subRL[6]); /* round 4 */ SET_SUBKEY_LR(6, subRL[5] ^ subRL[7]); /* round 5 */ tl = (subRL[10] >> 32) ^ (subRL[10] & ~subRL[8]); dw = tl & (subRL[8] >> 32); /* FL(kl1) */ tr = subRL[10] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(7, subRL[6] ^ tt); /* round 6 */ SET_SUBKEY_LR(8, subRL[8]); /* FL(kl1) */ SET_SUBKEY_LR(9, subRL[9]); /* FLinv(kl2) */ tl = (subRL[7] >> 32) ^ (subRL[7] & ~subRL[9]); dw = tl & (subRL[9] >> 32); /* FLinv(kl2) */ tr = subRL[7] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(10, subRL[11] ^ tt); /* round 7 */ SET_SUBKEY_LR(11, subRL[10] ^ subRL[12]); /* round 8 */ SET_SUBKEY_LR(12, subRL[11] ^ subRL[13]); /* round 9 */ SET_SUBKEY_LR(13, subRL[12] ^ subRL[14]); /* round 10 */ SET_SUBKEY_LR(14, subRL[13] ^ subRL[15]); /* round 11 */ tl = (subRL[18] >> 32) ^ (subRL[18] & ~subRL[16]); dw = tl & (subRL[16] >> 32); /* FL(kl3) */ tr = subRL[18] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(15, subRL[14] ^ tt); /* round 12 */ SET_SUBKEY_LR(16, subRL[16]); /* FL(kl3) */ SET_SUBKEY_LR(17, subRL[17]); /* FLinv(kl4) */ tl = (subRL[15] >> 32) ^ (subRL[15] & ~subRL[17]); dw = tl & (subRL[17] >> 32); /* FLinv(kl4) */ tr = subRL[15] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(18, subRL[19] ^ tt); /* round 13 */ SET_SUBKEY_LR(19, subRL[18] ^ subRL[20]); /* round 14 */ SET_SUBKEY_LR(20, subRL[19] ^ subRL[21]); /* round 15 */ SET_SUBKEY_LR(21, subRL[20] ^ subRL[22]); /* round 16 */ SET_SUBKEY_LR(22, subRL[21] ^ subRL[23]); /* round 17 */ if (max == 24) { SET_SUBKEY_LR(23, subRL[22]); /* round 18 */ SET_SUBKEY_LR(24, subRL[24] ^ subRL[23]); /* kw3 */ } else { tl = (subRL[26] >> 32) ^ (subRL[26] & ~subRL[24]); dw = tl & (subRL[24] >> 32); /* FL(kl5) */ tr = subRL[26] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(23, subRL[22] ^ tt); /* round 18 */ SET_SUBKEY_LR(24, subRL[24]); /* FL(kl5) */ SET_SUBKEY_LR(25, subRL[25]); /* FLinv(kl6) */ tl = (subRL[23] >> 32) ^ (subRL[23] & ~subRL[25]); dw = tl & (subRL[25] >> 32); /* FLinv(kl6) */ tr = subRL[23] ^ rol32(dw, 1); tt = (tr | ((u64)tl << 32)); SET_SUBKEY_LR(26, subRL[27] ^ tt); /* round 19 */ SET_SUBKEY_LR(27, subRL[26] ^ subRL[28]); /* round 20 */ SET_SUBKEY_LR(28, subRL[27] ^ subRL[29]); /* round 21 */ SET_SUBKEY_LR(29, subRL[28] ^ subRL[30]); /* round 22 */ SET_SUBKEY_LR(30, subRL[29] ^ subRL[31]); /* round 23 */ SET_SUBKEY_LR(31, subRL[30]); /* round 24 */ SET_SUBKEY_LR(32, subRL[32] ^ subRL[31]); /* kw3 */ } } static void camellia_setup128(const unsigned char *key, u64 *subkey) { u64 kl, kr, ww; u64 subRL[26]; /** * k == kl || kr (|| is concatenation) */ kl = get_unaligned_be64(key); kr = get_unaligned_be64(key + 8); /* generate KL dependent subkeys */ /* kw1 */ subRL[0] = kl; /* kw2 */ subRL[1] = kr; /* rotation left shift 15bit */ ROLDQ(kl, kr, 15); /* k3 */ subRL[4] = kl; /* k4 */ subRL[5] = kr; /* rotation left shift 15+30bit */ ROLDQ(kl, kr, 30); /* k7 */ subRL[10] = kl; /* k8 */ subRL[11] = kr; /* rotation left shift 15+30+15bit */ ROLDQ(kl, kr, 15); /* k10 */ subRL[13] = kr; /* rotation left shift 15+30+15+17 bit */ ROLDQ(kl, kr, 17); /* kl3 */ subRL[16] = kl; /* kl4 */ subRL[17] = kr; /* rotation left shift 15+30+15+17+17 bit */ ROLDQ(kl, kr, 17); /* k13 */ subRL[18] = kl; /* k14 */ subRL[19] = kr; /* rotation left shift 15+30+15+17+17+17 bit */ ROLDQ(kl, kr, 17); /* k17 */ subRL[22] = kl; /* k18 */ subRL[23] = kr; /* generate KA */ kl = subRL[0]; kr = subRL[1]; CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); kr ^= ww; CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); /* current status == (kll, klr, w0, w1) */ CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); kr ^= ww; CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); kl ^= ww; /* generate KA dependent subkeys */ /* k1, k2 */ subRL[2] = kl; subRL[3] = kr; ROLDQ(kl, kr, 15); /* k5,k6 */ subRL[6] = kl; subRL[7] = kr; ROLDQ(kl, kr, 15); /* kl1, kl2 */ subRL[8] = kl; subRL[9] = kr; ROLDQ(kl, kr, 15); /* k9 */ subRL[12] = kl; ROLDQ(kl, kr, 15); /* k11, k12 */ subRL[14] = kl; subRL[15] = kr; ROLDQ(kl, kr, 34); /* k15, k16 */ subRL[20] = kl; subRL[21] = kr; ROLDQ(kl, kr, 17); /* kw3, kw4 */ subRL[24] = kl; subRL[25] = kr; camellia_setup_tail(subkey, subRL, 24); } static void camellia_setup256(const unsigned char *key, u64 *subkey) { u64 kl, kr; /* left half of key */ u64 krl, krr; /* right half of key */ u64 ww; /* temporary variables */ u64 subRL[34]; /** * key = (kl || kr || krl || krr) (|| is concatenation) */ kl = get_unaligned_be64(key); kr = get_unaligned_be64(key + 8); krl = get_unaligned_be64(key + 16); krr = get_unaligned_be64(key + 24); /* generate KL dependent subkeys */ /* kw1 */ subRL[0] = kl; /* kw2 */ subRL[1] = kr; ROLDQ(kl, kr, 45); /* k9 */ subRL[12] = kl; /* k10 */ subRL[13] = kr; ROLDQ(kl, kr, 15); /* kl3 */ subRL[16] = kl; /* kl4 */ subRL[17] = kr; ROLDQ(kl, kr, 17); /* k17 */ subRL[22] = kl; /* k18 */ subRL[23] = kr; ROLDQ(kl, kr, 34); /* k23 */ subRL[30] = kl; /* k24 */ subRL[31] = kr; /* generate KR dependent subkeys */ ROLDQ(krl, krr, 15); /* k3 */ subRL[4] = krl; /* k4 */ subRL[5] = krr; ROLDQ(krl, krr, 15); /* kl1 */ subRL[8] = krl; /* kl2 */ subRL[9] = krr; ROLDQ(krl, krr, 30); /* k13 */ subRL[18] = krl; /* k14 */ subRL[19] = krr; ROLDQ(krl, krr, 34); /* k19 */ subRL[26] = krl; /* k20 */ subRL[27] = krr; ROLDQ(krl, krr, 34); /* generate KA */ kl = subRL[0] ^ krl; kr = subRL[1] ^ krr; CAMELLIA_F(kl, CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, ww); kr ^= ww; CAMELLIA_F(kr, CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, kl); kl ^= krl; CAMELLIA_F(kl, CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, kr); kr ^= ww ^ krr; CAMELLIA_F(kr, CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, ww); kl ^= ww; /* generate KB */ krl ^= kl; krr ^= kr; CAMELLIA_F(krl, CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, ww); krr ^= ww; CAMELLIA_F(krr, CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, ww); krl ^= ww; /* generate KA dependent subkeys */ ROLDQ(kl, kr, 15); /* k5 */ subRL[6] = kl; /* k6 */ subRL[7] = kr; ROLDQ(kl, kr, 30); /* k11 */ subRL[14] = kl; /* k12 */ subRL[15] = kr; /* rotation left shift 32bit */ ROLDQ(kl, kr, 32); /* kl5 */ subRL[24] = kl; /* kl6 */ subRL[25] = kr; /* rotation left shift 17 from k11,k12 -> k21,k22 */ ROLDQ(kl, kr, 17); /* k21 */ subRL[28] = kl; /* k22 */ subRL[29] = kr; /* generate KB dependent subkeys */ /* k1 */ subRL[2] = krl; /* k2 */ subRL[3] = krr; ROLDQ(krl, krr, 30); /* k7 */ subRL[10] = krl; /* k8 */ subRL[11] = krr; ROLDQ(krl, krr, 30); /* k15 */ subRL[20] = krl; /* k16 */ subRL[21] = krr; ROLDQ(krl, krr, 51); /* kw3 */ subRL[32] = krl; /* kw4 */ subRL[33] = krr; camellia_setup_tail(subkey, subRL, 32); } static void camellia_setup192(const unsigned char *key, u64 *subkey) { unsigned char kk[32]; u64 krl, krr; memcpy(kk, key, 24); memcpy((unsigned char *)&krl, key+16, 8); krr = ~krl; memcpy(kk+24, (unsigned char *)&krr, 8); camellia_setup256(kk, subkey); } int __camellia_setkey(struct camellia_ctx *cctx, const unsigned char *key, unsigned int key_len) { if (key_len != 16 && key_len != 24 && key_len != 32) return -EINVAL; cctx->key_length = key_len; switch (key_len) { case 16: camellia_setup128(key, cctx->key_table); break; case 24: camellia_setup192(key, cctx->key_table); break; case 32: camellia_setup256(key, cctx->key_table); break; } return 0; } EXPORT_SYMBOL_GPL(__camellia_setkey); static int camellia_setkey(struct crypto_tfm *tfm, const u8 *key, unsigned int key_len) { return __camellia_setkey(crypto_tfm_ctx(tfm), key, key_len); } static int camellia_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { return camellia_setkey(&tfm->base, key, key_len); } void camellia_decrypt_cbc_2way(const void *ctx, u8 *dst, const u8 *src) { u8 buf[CAMELLIA_BLOCK_SIZE]; const u8 *iv = src; if (dst == src) iv = memcpy(buf, iv, sizeof(buf)); camellia_dec_blk_2way(ctx, dst, src); crypto_xor(dst + CAMELLIA_BLOCK_SIZE, iv, CAMELLIA_BLOCK_SIZE); } EXPORT_SYMBOL_GPL(camellia_decrypt_cbc_2way); static int ecb_encrypt(struct skcipher_request *req) { ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); ECB_BLOCK(2, camellia_enc_blk_2way); ECB_BLOCK(1, camellia_enc_blk); ECB_WALK_END(); } static int ecb_decrypt(struct skcipher_request *req) { ECB_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); ECB_BLOCK(2, camellia_dec_blk_2way); ECB_BLOCK(1, camellia_dec_blk); ECB_WALK_END(); } static int cbc_encrypt(struct skcipher_request *req) { CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); CBC_ENC_BLOCK(camellia_enc_blk); CBC_WALK_END(); } static int cbc_decrypt(struct skcipher_request *req) { CBC_WALK_START(req, CAMELLIA_BLOCK_SIZE, -1); CBC_DEC_BLOCK(2, camellia_decrypt_cbc_2way); CBC_DEC_BLOCK(1, camellia_dec_blk); CBC_WALK_END(); } static struct crypto_alg camellia_cipher_alg = { .cra_name = "camellia", .cra_driver_name = "camellia-asm", .cra_priority = 200, .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = CAMELLIA_BLOCK_SIZE, .cra_ctxsize = sizeof(struct camellia_ctx), .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_u = { .cipher = { .cia_min_keysize = CAMELLIA_MIN_KEY_SIZE, .cia_max_keysize = CAMELLIA_MAX_KEY_SIZE, .cia_setkey = camellia_setkey, .cia_encrypt = camellia_encrypt, .cia_decrypt = camellia_decrypt } } }; static struct skcipher_alg camellia_skcipher_algs[] = { { .base.cra_name = "ecb(camellia)", .base.cra_driver_name = "ecb-camellia-asm", .base.cra_priority = 300, .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct camellia_ctx), .base.cra_module = THIS_MODULE, .min_keysize = CAMELLIA_MIN_KEY_SIZE, .max_keysize = CAMELLIA_MAX_KEY_SIZE, .setkey = camellia_setkey_skcipher, .encrypt = ecb_encrypt, .decrypt = ecb_decrypt, }, { .base.cra_name = "cbc(camellia)", .base.cra_driver_name = "cbc-camellia-asm", .base.cra_priority = 300, .base.cra_blocksize = CAMELLIA_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct camellia_ctx), .base.cra_module = THIS_MODULE, .min_keysize = CAMELLIA_MIN_KEY_SIZE, .max_keysize = CAMELLIA_MAX_KEY_SIZE, .ivsize = CAMELLIA_BLOCK_SIZE, .setkey = camellia_setkey_skcipher, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, } }; static bool is_blacklisted_cpu(void) { if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return false; if (boot_cpu_data.x86 == 0x0f) { /* * On Pentium 4, camellia-asm is slower than original assembler * implementation because excessive uses of 64bit rotate and * left-shifts (which are really slow on P4) needed to store and * handle 128bit block in two 64bit registers. */ return true; } return false; } static int force; module_param(force, int, 0); MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); static int __init camellia_init(void) { int err; if (!force && is_blacklisted_cpu()) { printk(KERN_INFO "camellia-x86_64: performance on this CPU " "would be suboptimal: disabling " "camellia-x86_64.\n"); return -ENODEV; } err = crypto_register_alg(&camellia_cipher_alg); if (err) return err; err = crypto_register_skciphers(camellia_skcipher_algs, ARRAY_SIZE(camellia_skcipher_algs)); if (err) crypto_unregister_alg(&camellia_cipher_alg); return err; } static void __exit camellia_fini(void) { crypto_unregister_alg(&camellia_cipher_alg); crypto_unregister_skciphers(camellia_skcipher_algs, ARRAY_SIZE(camellia_skcipher_algs)); } module_init(camellia_init); module_exit(camellia_fini); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized"); MODULE_ALIAS_CRYPTO("camellia"); MODULE_ALIAS_CRYPTO("camellia-asm"); |
1 1 7 5 5 1 1 4 4 6 1 5 5 44 44 44 44 1 10 681 10 7 10 8 3 9 9 8 1 62 26 44 10 33 7 3 42 9 4 37 1 29 173 16 24 6 1 1 5 6 11 11 11 78 80 61 12 8 20 8 12 9 1 2 8 24 24 24 176 178 35 35 9 9 25 25 8 8 19 61 80 81 224 224 22 30 30 18 18 32 32 18 18 3 13 3919 1727 1729 16 10 7 76 9570 9571 1 7 33 33 33 1703 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 | // SPDX-License-Identifier: GPL-2.0-only /* * Landlock LSM - Filesystem management and hooks * * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2018-2020 ANSSI * Copyright © 2021-2022 Microsoft Corporation * Copyright © 2022 Günther Noack <gnoack3000@gmail.com> * Copyright © 2023-2024 Google LLC */ #include <asm/ioctls.h> #include <kunit/test.h> #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/bits.h> #include <linux/compiler_types.h> #include <linux/dcache.h> #include <linux/err.h> #include <linux/falloc.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/limits.h> #include <linux/list.h> #include <linux/lsm_hooks.h> #include <linux/mount.h> #include <linux/namei.h> #include <linux/path.h> #include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/stat.h> #include <linux/types.h> #include <linux/wait_bit.h> #include <linux/workqueue.h> #include <uapi/linux/fiemap.h> #include <uapi/linux/landlock.h> #include "common.h" #include "cred.h" #include "fs.h" #include "limits.h" #include "object.h" #include "ruleset.h" #include "setup.h" /* Underlying object management */ static void release_inode(struct landlock_object *const object) __releases(object->lock) { struct inode *const inode = object->underobj; struct super_block *sb; if (!inode) { spin_unlock(&object->lock); return; } /* * Protects against concurrent use by hook_sb_delete() of the reference * to the underlying inode. */ object->underobj = NULL; /* * Makes sure that if the filesystem is concurrently unmounted, * hook_sb_delete() will wait for us to finish iput(). */ sb = inode->i_sb; atomic_long_inc(&landlock_superblock(sb)->inode_refs); spin_unlock(&object->lock); /* * Because object->underobj was not NULL, hook_sb_delete() and * get_inode_object() guarantee that it is safe to reset * landlock_inode(inode)->object while it is not NULL. It is therefore * not necessary to lock inode->i_lock. */ rcu_assign_pointer(landlock_inode(inode)->object, NULL); /* * Now, new rules can safely be tied to @inode with get_inode_object(). */ iput(inode); if (atomic_long_dec_and_test(&landlock_superblock(sb)->inode_refs)) wake_up_var(&landlock_superblock(sb)->inode_refs); } static const struct landlock_object_underops landlock_fs_underops = { .release = release_inode }; /* IOCTL helpers */ /** * is_masked_device_ioctl - Determine whether an IOCTL command is always * permitted with Landlock for device files. These commands can not be * restricted on device files by enforcing a Landlock policy. * * @cmd: The IOCTL command that is supposed to be run. * * By default, any IOCTL on a device file requires the * LANDLOCK_ACCESS_FS_IOCTL_DEV right. However, we blanket-permit some * commands, if: * * 1. The command is implemented in fs/ioctl.c's do_vfs_ioctl(), * not in f_ops->unlocked_ioctl() or f_ops->compat_ioctl(). * * 2. The command is harmless when invoked on devices. * * We also permit commands that do not make sense for devices, but where the * do_vfs_ioctl() implementation returns a more conventional error code. * * Any new IOCTL commands that are implemented in fs/ioctl.c's do_vfs_ioctl() * should be considered for inclusion here. * * Returns: true if the IOCTL @cmd can not be restricted with Landlock for * device files. */ static __attribute_const__ bool is_masked_device_ioctl(const unsigned int cmd) { switch (cmd) { /* * FIOCLEX, FIONCLEX, FIONBIO and FIOASYNC manipulate the FD's * close-on-exec and the file's buffered-IO and async flags. These * operations are also available through fcntl(2), and are * unconditionally permitted in Landlock. */ case FIOCLEX: case FIONCLEX: case FIONBIO: case FIOASYNC: /* * FIOQSIZE queries the size of a regular file, directory, or link. * * We still permit it, because it always returns -ENOTTY for * other file types. */ case FIOQSIZE: /* * FIFREEZE and FITHAW freeze and thaw the file system which the * given file belongs to. Requires CAP_SYS_ADMIN. * * These commands operate on the file system's superblock rather * than on the file itself. The same operations can also be * done through any other file or directory on the same file * system, so it is safe to permit these. */ case FIFREEZE: case FITHAW: /* * FS_IOC_FIEMAP queries information about the allocation of * blocks within a file. * * This IOCTL command only makes sense for regular files and is * not implemented by devices. It is harmless to permit. */ case FS_IOC_FIEMAP: /* * FIGETBSZ queries the file system's block size for a file or * directory. * * This command operates on the file system's superblock rather * than on the file itself. The same operation can also be done * through any other file or directory on the same file system, * so it is safe to permit it. */ case FIGETBSZ: /* * FICLONE, FICLONERANGE and FIDEDUPERANGE make files share * their underlying storage ("reflink") between source and * destination FDs, on file systems which support that. * * These IOCTL commands only apply to regular files * and are harmless to permit for device files. */ case FICLONE: case FICLONERANGE: case FIDEDUPERANGE: /* * FS_IOC_GETFSUUID and FS_IOC_GETFSSYSFSPATH both operate on * the file system superblock, not on the specific file, so * these operations are available through any other file on the * same file system as well. */ case FS_IOC_GETFSUUID: case FS_IOC_GETFSSYSFSPATH: return true; /* * FIONREAD, FS_IOC_GETFLAGS, FS_IOC_SETFLAGS, FS_IOC_FSGETXATTR and * FS_IOC_FSSETXATTR are forwarded to device implementations. */ /* * file_ioctl() commands (FIBMAP, FS_IOC_RESVSP, FS_IOC_RESVSP64, * FS_IOC_UNRESVSP, FS_IOC_UNRESVSP64 and FS_IOC_ZERO_RANGE) are * forwarded to device implementations, so not permitted. */ /* Other commands are guarded by the access right. */ default: return false; } } /* * is_masked_device_ioctl_compat - same as the helper above, but checking the * "compat" IOCTL commands. * * The IOCTL commands with special handling in compat-mode should behave the * same as their non-compat counterparts. */ static __attribute_const__ bool is_masked_device_ioctl_compat(const unsigned int cmd) { switch (cmd) { /* FICLONE is permitted, same as in the non-compat variant. */ case FICLONE: return true; #if defined(CONFIG_X86_64) /* * FS_IOC_RESVSP_32, FS_IOC_RESVSP64_32, FS_IOC_UNRESVSP_32, * FS_IOC_UNRESVSP64_32, FS_IOC_ZERO_RANGE_32: not blanket-permitted, * for consistency with their non-compat variants. */ case FS_IOC_RESVSP_32: case FS_IOC_RESVSP64_32: case FS_IOC_UNRESVSP_32: case FS_IOC_UNRESVSP64_32: case FS_IOC_ZERO_RANGE_32: #endif /* * FS_IOC32_GETFLAGS, FS_IOC32_SETFLAGS are forwarded to their device * implementations. */ case FS_IOC32_GETFLAGS: case FS_IOC32_SETFLAGS: return false; default: return is_masked_device_ioctl(cmd); } } /* Ruleset management */ static struct landlock_object *get_inode_object(struct inode *const inode) { struct landlock_object *object, *new_object; struct landlock_inode_security *inode_sec = landlock_inode(inode); rcu_read_lock(); retry: object = rcu_dereference(inode_sec->object); if (object) { if (likely(refcount_inc_not_zero(&object->usage))) { rcu_read_unlock(); return object; } /* * We are racing with release_inode(), the object is going * away. Wait for release_inode(), then retry. */ spin_lock(&object->lock); spin_unlock(&object->lock); goto retry; } rcu_read_unlock(); /* * If there is no object tied to @inode, then create a new one (without * holding any locks). */ new_object = landlock_create_object(&landlock_fs_underops, inode); if (IS_ERR(new_object)) return new_object; /* * Protects against concurrent calls to get_inode_object() or * hook_sb_delete(). */ spin_lock(&inode->i_lock); if (unlikely(rcu_access_pointer(inode_sec->object))) { /* Someone else just created the object, bail out and retry. */ spin_unlock(&inode->i_lock); kfree(new_object); rcu_read_lock(); goto retry; } /* * @inode will be released by hook_sb_delete() on its superblock * shutdown, or by release_inode() when no more ruleset references the * related object. */ ihold(inode); rcu_assign_pointer(inode_sec->object, new_object); spin_unlock(&inode->i_lock); return new_object; } /* All access rights that can be tied to files. */ /* clang-format off */ #define ACCESS_FILE ( \ LANDLOCK_ACCESS_FS_EXECUTE | \ LANDLOCK_ACCESS_FS_WRITE_FILE | \ LANDLOCK_ACCESS_FS_READ_FILE | \ LANDLOCK_ACCESS_FS_TRUNCATE | \ LANDLOCK_ACCESS_FS_IOCTL_DEV) /* clang-format on */ /* * @path: Should have been checked by get_path_from_fd(). */ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, const struct path *const path, access_mask_t access_rights) { int err; struct landlock_id id = { .type = LANDLOCK_KEY_INODE, }; /* Files only get access rights that make sense. */ if (!d_is_dir(path->dentry) && (access_rights | ACCESS_FILE) != ACCESS_FILE) return -EINVAL; if (WARN_ON_ONCE(ruleset->num_layers != 1)) return -EINVAL; /* Transforms relative access rights to absolute ones. */ access_rights |= LANDLOCK_MASK_ACCESS_FS & ~landlock_get_fs_access_mask(ruleset, 0); id.key.object = get_inode_object(d_backing_inode(path->dentry)); if (IS_ERR(id.key.object)) return PTR_ERR(id.key.object); mutex_lock(&ruleset->lock); err = landlock_insert_rule(ruleset, id, access_rights); mutex_unlock(&ruleset->lock); /* * No need to check for an error because landlock_insert_rule() * increments the refcount for the new object if needed. */ landlock_put_object(id.key.object); return err; } /* Access-control management */ /* * The lifetime of the returned rule is tied to @domain. * * Returns NULL if no rule is found or if @dentry is negative. */ static const struct landlock_rule * find_rule(const struct landlock_ruleset *const domain, const struct dentry *const dentry) { const struct landlock_rule *rule; const struct inode *inode; struct landlock_id id = { .type = LANDLOCK_KEY_INODE, }; /* Ignores nonexistent leafs. */ if (d_is_negative(dentry)) return NULL; inode = d_backing_inode(dentry); rcu_read_lock(); id.key.object = rcu_dereference(landlock_inode(inode)->object); rule = landlock_find_rule(domain, id); rcu_read_unlock(); return rule; } /* * Allows access to pseudo filesystems that will never be mountable (e.g. * sockfs, pipefs), but can still be reachable through * /proc/<pid>/fd/<file-descriptor> */ static bool is_nouser_or_private(const struct dentry *dentry) { return (dentry->d_sb->s_flags & SB_NOUSER) || (d_is_positive(dentry) && unlikely(IS_PRIVATE(d_backing_inode(dentry)))); } static access_mask_t get_handled_fs_accesses(const struct landlock_ruleset *const domain) { /* Handles all initially denied by default access rights. */ return landlock_union_access_masks(domain).fs | LANDLOCK_ACCESS_FS_INITIALLY_DENIED; } static const struct access_masks any_fs = { .fs = ~0, }; static const struct landlock_ruleset *get_current_fs_domain(void) { return landlock_get_applicable_domain(landlock_get_current_domain(), any_fs); } /* * Check that a destination file hierarchy has more restrictions than a source * file hierarchy. This is only used for link and rename actions. * * @layer_masks_child2: Optional child masks. */ static bool no_more_access( const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS], const bool child1_is_directory, const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS], const bool child2_is_directory) { unsigned long access_bit; for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2); access_bit++) { /* Ignores accesses that only make sense for directories. */ const bool is_file_access = !!(BIT_ULL(access_bit) & ACCESS_FILE); if (child1_is_directory || is_file_access) { /* * Checks if the destination restrictions are a * superset of the source ones (i.e. inherited access * rights without child exceptions): * restrictions(parent2) >= restrictions(child1) */ if ((((*layer_masks_parent1)[access_bit] & (*layer_masks_child1)[access_bit]) | (*layer_masks_parent2)[access_bit]) != (*layer_masks_parent2)[access_bit]) return false; } if (!layer_masks_child2) continue; if (child2_is_directory || is_file_access) { /* * Checks inverted restrictions for RENAME_EXCHANGE: * restrictions(parent1) >= restrictions(child2) */ if ((((*layer_masks_parent2)[access_bit] & (*layer_masks_child2)[access_bit]) | (*layer_masks_parent1)[access_bit]) != (*layer_masks_parent1)[access_bit]) return false; } } return true; } #define NMA_TRUE(...) KUNIT_EXPECT_TRUE(test, no_more_access(__VA_ARGS__)) #define NMA_FALSE(...) KUNIT_EXPECT_FALSE(test, no_more_access(__VA_ARGS__)) #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST static void test_no_more_access(struct kunit *const test) { const layer_mask_t rx0[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_FILE)] = BIT_ULL(0), }; const layer_mask_t mx0[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), [BIT_INDEX(LANDLOCK_ACCESS_FS_MAKE_REG)] = BIT_ULL(0), }; const layer_mask_t x0[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), }; const layer_mask_t x1[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(1), }; const layer_mask_t x01[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0) | BIT_ULL(1), }; const layer_mask_t allows_all[LANDLOCK_NUM_ACCESS_FS] = {}; /* Checks without restriction. */ NMA_TRUE(&x0, &allows_all, false, &allows_all, NULL, false); NMA_TRUE(&allows_all, &x0, false, &allows_all, NULL, false); NMA_FALSE(&x0, &x0, false, &allows_all, NULL, false); /* * Checks that we can only refer a file if no more access could be * inherited. */ NMA_TRUE(&x0, &x0, false, &rx0, NULL, false); NMA_TRUE(&rx0, &rx0, false, &rx0, NULL, false); NMA_FALSE(&rx0, &rx0, false, &x0, NULL, false); NMA_FALSE(&rx0, &rx0, false, &x1, NULL, false); /* Checks allowed referring with different nested domains. */ NMA_TRUE(&x0, &x1, false, &x0, NULL, false); NMA_TRUE(&x1, &x0, false, &x0, NULL, false); NMA_TRUE(&x0, &x01, false, &x0, NULL, false); NMA_TRUE(&x0, &x01, false, &rx0, NULL, false); NMA_TRUE(&x01, &x0, false, &x0, NULL, false); NMA_TRUE(&x01, &x0, false, &rx0, NULL, false); NMA_FALSE(&x01, &x01, false, &x0, NULL, false); /* Checks that file access rights are also enforced for a directory. */ NMA_FALSE(&rx0, &rx0, true, &x0, NULL, false); /* Checks that directory access rights don't impact file referring... */ NMA_TRUE(&mx0, &mx0, false, &x0, NULL, false); /* ...but only directory referring. */ NMA_FALSE(&mx0, &mx0, true, &x0, NULL, false); /* Checks directory exchange. */ NMA_TRUE(&mx0, &mx0, true, &mx0, &mx0, true); NMA_TRUE(&mx0, &mx0, true, &mx0, &x0, true); NMA_FALSE(&mx0, &mx0, true, &x0, &mx0, true); NMA_FALSE(&mx0, &mx0, true, &x0, &x0, true); NMA_FALSE(&mx0, &mx0, true, &x1, &x1, true); /* Checks file exchange with directory access rights... */ NMA_TRUE(&mx0, &mx0, false, &mx0, &mx0, false); NMA_TRUE(&mx0, &mx0, false, &mx0, &x0, false); NMA_TRUE(&mx0, &mx0, false, &x0, &mx0, false); NMA_TRUE(&mx0, &mx0, false, &x0, &x0, false); /* ...and with file access rights. */ NMA_TRUE(&rx0, &rx0, false, &rx0, &rx0, false); NMA_TRUE(&rx0, &rx0, false, &rx0, &x0, false); NMA_FALSE(&rx0, &rx0, false, &x0, &rx0, false); NMA_FALSE(&rx0, &rx0, false, &x0, &x0, false); NMA_FALSE(&rx0, &rx0, false, &x1, &x1, false); /* * Allowing the following requests should not be a security risk * because domain 0 denies execute access, and domain 1 is always * nested with domain 0. However, adding an exception for this case * would mean to check all nested domains to make sure none can get * more privileges (e.g. processes only sandboxed by domain 0). * Moreover, this behavior (i.e. composition of N domains) could then * be inconsistent compared to domain 1's ruleset alone (e.g. it might * be denied to link/rename with domain 1's ruleset, whereas it would * be allowed if nested on top of domain 0). Another drawback would be * to create a cover channel that could enable sandboxed processes to * infer most of the filesystem restrictions from their domain. To * make it simple, efficient, safe, and more consistent, this case is * always denied. */ NMA_FALSE(&x1, &x1, false, &x0, NULL, false); NMA_FALSE(&x1, &x1, false, &rx0, NULL, false); NMA_FALSE(&x1, &x1, true, &x0, NULL, false); NMA_FALSE(&x1, &x1, true, &rx0, NULL, false); /* Checks the same case of exclusive domains with a file... */ NMA_TRUE(&x1, &x1, false, &x01, NULL, false); NMA_FALSE(&x1, &x1, false, &x01, &x0, false); NMA_FALSE(&x1, &x1, false, &x01, &x01, false); NMA_FALSE(&x1, &x1, false, &x0, &x0, false); /* ...and with a directory. */ NMA_FALSE(&x1, &x1, false, &x0, &x0, true); NMA_FALSE(&x1, &x1, true, &x0, &x0, false); NMA_FALSE(&x1, &x1, true, &x0, &x0, true); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ #undef NMA_TRUE #undef NMA_FALSE /* * Removes @layer_masks accesses that are not requested. * * Returns true if the request is allowed, false otherwise. */ static bool scope_to_request(const access_mask_t access_request, layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) { const unsigned long access_req = access_request; unsigned long access_bit; if (WARN_ON_ONCE(!layer_masks)) return true; for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks)) (*layer_masks)[access_bit] = 0; return !memchr_inv(layer_masks, 0, sizeof(*layer_masks)); } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST static void test_scope_to_request_with_exec_none(struct kunit *const test) { /* Allows everything. */ layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; /* Checks and scopes with execute. */ KUNIT_EXPECT_TRUE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, &layer_masks)); KUNIT_EXPECT_EQ(test, 0, layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); KUNIT_EXPECT_EQ(test, 0, layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); } static void test_scope_to_request_with_exec_some(struct kunit *const test) { /* Denies execute and write. */ layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1), }; /* Checks and scopes with execute. */ KUNIT_EXPECT_FALSE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, &layer_masks)); KUNIT_EXPECT_EQ(test, BIT_ULL(0), layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); KUNIT_EXPECT_EQ(test, 0, layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); } static void test_scope_to_request_without_access(struct kunit *const test) { /* Denies execute and write. */ layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1), }; /* Checks and scopes without access request. */ KUNIT_EXPECT_TRUE(test, scope_to_request(0, &layer_masks)); KUNIT_EXPECT_EQ(test, 0, layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); KUNIT_EXPECT_EQ(test, 0, layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ /* * Returns true if there is at least one access right different than * LANDLOCK_ACCESS_FS_REFER. */ static bool is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], const access_mask_t access_request) { unsigned long access_bit; /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ const unsigned long access_check = access_request & ~LANDLOCK_ACCESS_FS_REFER; if (!layer_masks) return false; for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) { if ((*layer_masks)[access_bit]) return true; } return false; } #define IE_TRUE(...) KUNIT_EXPECT_TRUE(test, is_eacces(__VA_ARGS__)) #define IE_FALSE(...) KUNIT_EXPECT_FALSE(test, is_eacces(__VA_ARGS__)) #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST static void test_is_eacces_with_none(struct kunit *const test) { const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; IE_FALSE(&layer_masks, 0); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } static void test_is_eacces_with_refer(struct kunit *const test) { const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_REFER)] = BIT_ULL(0), }; IE_FALSE(&layer_masks, 0); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } static void test_is_eacces_with_write(struct kunit *const test) { const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(0), }; IE_FALSE(&layer_masks, 0); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); IE_TRUE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ #undef IE_TRUE #undef IE_FALSE /** * is_access_to_paths_allowed - Check accesses for requests with a common path * * @domain: Domain to check against. * @path: File hierarchy to walk through. * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is * equal to @layer_masks_parent2 (if any). This is tied to the unique * requested path for most actions, or the source in case of a refer action * (i.e. rename or link), or the source and destination in case of * RENAME_EXCHANGE. * @layer_masks_parent1: Pointer to a matrix of layer masks per access * masks, identifying the layers that forbid a specific access. Bits from * this matrix can be unset according to the @path walk. An empty matrix * means that @domain allows all possible Landlock accesses (i.e. not only * those identified by @access_request_parent1). This matrix can * initially refer to domain layer masks and, when the accesses for the * destination and source are the same, to requested layer masks. * @dentry_child1: Dentry to the initial child of the parent1 path. This * pointer must be NULL for non-refer actions (i.e. not link nor rename). * @access_request_parent2: Similar to @access_request_parent1 but for a * request involving a source and a destination. This refers to the * destination, except in case of RENAME_EXCHANGE where it also refers to * the source. Must be set to 0 when using a simple path request. * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer * action. This must be NULL otherwise. * @dentry_child2: Dentry to the initial child of the parent2 path. This * pointer is only set for RENAME_EXCHANGE actions and must be NULL * otherwise. * * This helper first checks that the destination has a superset of restrictions * compared to the source (if any) for a common path. Because of * RENAME_EXCHANGE actions, source and destinations may be swapped. It then * checks that the collected accesses and the remaining ones are enough to * allow the request. * * Returns: * - true if the access request is granted; * - false otherwise. */ static bool is_access_to_paths_allowed( const struct landlock_ruleset *const domain, const struct path *const path, const access_mask_t access_request_parent1, layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], const struct dentry *const dentry_child1, const access_mask_t access_request_parent2, layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], const struct dentry *const dentry_child2) { bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check, child1_is_directory = true, child2_is_directory = true; struct path walker_path; access_mask_t access_masked_parent1, access_masked_parent2; layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS], _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS]; layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL, (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL; if (!access_request_parent1 && !access_request_parent2) return true; if (WARN_ON_ONCE(!domain || !path)) return true; if (is_nouser_or_private(path->dentry)) return true; if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1)) return false; if (unlikely(layer_masks_parent2)) { if (WARN_ON_ONCE(!dentry_child1)) return false; /* * For a double request, first check for potential privilege * escalation by looking at domain handled accesses (which are * a superset of the meaningful requested accesses). */ access_masked_parent1 = access_masked_parent2 = get_handled_fs_accesses(domain); is_dom_check = true; } else { if (WARN_ON_ONCE(dentry_child1 || dentry_child2)) return false; /* For a simple request, only check for requested accesses. */ access_masked_parent1 = access_request_parent1; access_masked_parent2 = access_request_parent2; is_dom_check = false; } if (unlikely(dentry_child1)) { landlock_unmask_layers( find_rule(domain, dentry_child1), landlock_init_layer_masks( domain, LANDLOCK_MASK_ACCESS_FS, &_layer_masks_child1, LANDLOCK_KEY_INODE), &_layer_masks_child1, ARRAY_SIZE(_layer_masks_child1)); layer_masks_child1 = &_layer_masks_child1; child1_is_directory = d_is_dir(dentry_child1); } if (unlikely(dentry_child2)) { landlock_unmask_layers( find_rule(domain, dentry_child2), landlock_init_layer_masks( domain, LANDLOCK_MASK_ACCESS_FS, &_layer_masks_child2, LANDLOCK_KEY_INODE), &_layer_masks_child2, ARRAY_SIZE(_layer_masks_child2)); layer_masks_child2 = &_layer_masks_child2; child2_is_directory = d_is_dir(dentry_child2); } walker_path = *path; path_get(&walker_path); /* * We need to walk through all the hierarchy to not miss any relevant * restriction. */ while (true) { struct dentry *parent_dentry; const struct landlock_rule *rule; /* * If at least all accesses allowed on the destination are * already allowed on the source, respectively if there is at * least as much as restrictions on the destination than on the * source, then we can safely refer files from the source to * the destination without risking a privilege escalation. * This also applies in the case of RENAME_EXCHANGE, which * implies checks on both direction. This is crucial for * standalone multilayered security policies. Furthermore, * this helps avoid policy writers to shoot themselves in the * foot. */ if (unlikely(is_dom_check && no_more_access( layer_masks_parent1, layer_masks_child1, child1_is_directory, layer_masks_parent2, layer_masks_child2, child2_is_directory))) { allowed_parent1 = scope_to_request( access_request_parent1, layer_masks_parent1); allowed_parent2 = scope_to_request( access_request_parent2, layer_masks_parent2); /* Stops when all accesses are granted. */ if (allowed_parent1 && allowed_parent2) break; /* * Now, downgrades the remaining checks from domain * handled accesses to requested accesses. */ is_dom_check = false; access_masked_parent1 = access_request_parent1; access_masked_parent2 = access_request_parent2; } rule = find_rule(domain, walker_path.dentry); allowed_parent1 = landlock_unmask_layers( rule, access_masked_parent1, layer_masks_parent1, ARRAY_SIZE(*layer_masks_parent1)); allowed_parent2 = landlock_unmask_layers( rule, access_masked_parent2, layer_masks_parent2, ARRAY_SIZE(*layer_masks_parent2)); /* Stops when a rule from each layer grants access. */ if (allowed_parent1 && allowed_parent2) break; jump_up: if (walker_path.dentry == walker_path.mnt->mnt_root) { if (follow_up(&walker_path)) { /* Ignores hidden mount points. */ goto jump_up; } else { /* * Stops at the real root. Denies access * because not all layers have granted access. */ break; } } if (unlikely(IS_ROOT(walker_path.dentry))) { /* * Stops at disconnected root directories. Only allows * access to internal filesystems (e.g. nsfs, which is * reachable through /proc/<pid>/ns/<namespace>). */ allowed_parent1 = allowed_parent2 = !!(walker_path.mnt->mnt_flags & MNT_INTERNAL); break; } parent_dentry = dget_parent(walker_path.dentry); dput(walker_path.dentry); walker_path.dentry = parent_dentry; } path_put(&walker_path); return allowed_parent1 && allowed_parent2; } static int check_access_path(const struct landlock_ruleset *const domain, const struct path *const path, access_mask_t access_request) { layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; access_request = landlock_init_layer_masks( domain, access_request, &layer_masks, LANDLOCK_KEY_INODE); if (is_access_to_paths_allowed(domain, path, access_request, &layer_masks, NULL, 0, NULL, NULL)) return 0; return -EACCES; } static int current_check_access_path(const struct path *const path, const access_mask_t access_request) { const struct landlock_ruleset *const dom = get_current_fs_domain(); if (!dom) return 0; return check_access_path(dom, path, access_request); } static access_mask_t get_mode_access(const umode_t mode) { switch (mode & S_IFMT) { case S_IFLNK: return LANDLOCK_ACCESS_FS_MAKE_SYM; case 0: /* A zero mode translates to S_IFREG. */ case S_IFREG: return LANDLOCK_ACCESS_FS_MAKE_REG; case S_IFDIR: return LANDLOCK_ACCESS_FS_MAKE_DIR; case S_IFCHR: return LANDLOCK_ACCESS_FS_MAKE_CHAR; case S_IFBLK: return LANDLOCK_ACCESS_FS_MAKE_BLOCK; case S_IFIFO: return LANDLOCK_ACCESS_FS_MAKE_FIFO; case S_IFSOCK: return LANDLOCK_ACCESS_FS_MAKE_SOCK; default: WARN_ON_ONCE(1); return 0; } } static access_mask_t maybe_remove(const struct dentry *const dentry) { if (d_is_negative(dentry)) return 0; return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR : LANDLOCK_ACCESS_FS_REMOVE_FILE; } /** * collect_domain_accesses - Walk through a file path and collect accesses * * @domain: Domain to check against. * @mnt_root: Last directory to check. * @dir: Directory to start the walk from. * @layer_masks_dom: Where to store the collected accesses. * * This helper is useful to begin a path walk from the @dir directory to a * @mnt_root directory used as a mount point. This mount point is the common * ancestor between the source and the destination of a renamed and linked * file. While walking from @dir to @mnt_root, we record all the domain's * allowed accesses in @layer_masks_dom. * * This is similar to is_access_to_paths_allowed() but much simpler because it * only handles walking on the same mount point and only checks one set of * accesses. * * Returns: * - true if all the domain access rights are allowed for @dir; * - false if the walk reached @mnt_root. */ static bool collect_domain_accesses( const struct landlock_ruleset *const domain, const struct dentry *const mnt_root, struct dentry *dir, layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS]) { unsigned long access_dom; bool ret = false; if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) return true; if (is_nouser_or_private(dir)) return true; access_dom = landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, layer_masks_dom, LANDLOCK_KEY_INODE); dget(dir); while (true) { struct dentry *parent_dentry; /* Gets all layers allowing all domain accesses. */ if (landlock_unmask_layers(find_rule(domain, dir), access_dom, layer_masks_dom, ARRAY_SIZE(*layer_masks_dom))) { /* * Stops when all handled accesses are allowed by at * least one rule in each layer. */ ret = true; break; } /* We should not reach a root other than @mnt_root. */ if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir))) break; parent_dentry = dget_parent(dir); dput(dir); dir = parent_dentry; } dput(dir); return ret; } /** * current_check_refer_path - Check if a rename or link action is allowed * * @old_dentry: File or directory requested to be moved or linked. * @new_dir: Destination parent directory. * @new_dentry: Destination file or directory. * @removable: Sets to true if it is a rename operation. * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE. * * Because of its unprivileged constraints, Landlock relies on file hierarchies * (and not only inodes) to tie access rights to files. Being able to link or * rename a file hierarchy brings some challenges. Indeed, moving or linking a * file (i.e. creating a new reference to an inode) can have an impact on the * actions allowed for a set of files if it would change its parent directory * (i.e. reparenting). * * To avoid trivial access right bypasses, Landlock first checks if the file or * directory requested to be moved would gain new access rights inherited from * its new hierarchy. Before returning any error, Landlock then checks that * the parent source hierarchy and the destination hierarchy would allow the * link or rename action. If it is not the case, an error with EACCES is * returned to inform user space that there is no way to remove or create the * requested source file type. If it should be allowed but the new inherited * access rights would be greater than the source access rights, then the * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables * user space to abort the whole operation if there is no way to do it, or to * manually copy the source to the destination if this remains allowed, e.g. * because file creation is allowed on the destination directory but not direct * linking. * * To achieve this goal, the kernel needs to compare two file hierarchies: the * one identifying the source file or directory (including itself), and the * destination one. This can be seen as a multilayer partial ordering problem. * The kernel walks through these paths and collects in a matrix the access * rights that are denied per layer. These matrices are then compared to see * if the destination one has more (or the same) restrictions as the source * one. If this is the case, the requested action will not return EXDEV, which * doesn't mean the action is allowed. The parent hierarchy of the source * (i.e. parent directory), and the destination hierarchy must also be checked * to verify that they explicitly allow such action (i.e. referencing, * creation and potentially removal rights). The kernel implementation is then * required to rely on potentially four matrices of access rights: one for the * source file or directory (i.e. the child), a potentially other one for the * other source/destination (in case of RENAME_EXCHANGE), one for the source * parent hierarchy and a last one for the destination hierarchy. These * ephemeral matrices take some space on the stack, which limits the number of * layers to a deemed reasonable number: 16. * * Returns: * - 0 if access is allowed; * - -EXDEV if @old_dentry would inherit new access rights from @new_dir; * - -EACCES if file removal or creation is denied. */ static int current_check_refer_path(struct dentry *const old_dentry, const struct path *const new_dir, struct dentry *const new_dentry, const bool removable, const bool exchange) { const struct landlock_ruleset *const dom = get_current_fs_domain(); bool allow_parent1, allow_parent2; access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; struct dentry *old_parent; layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; if (!dom) return 0; if (WARN_ON_ONCE(dom->num_layers < 1)) return -EACCES; if (unlikely(d_is_negative(old_dentry))) return -ENOENT; if (exchange) { if (unlikely(d_is_negative(new_dentry))) return -ENOENT; access_request_parent1 = get_mode_access(d_backing_inode(new_dentry)->i_mode); } else { access_request_parent1 = 0; } access_request_parent2 = get_mode_access(d_backing_inode(old_dentry)->i_mode); if (removable) { access_request_parent1 |= maybe_remove(old_dentry); access_request_parent2 |= maybe_remove(new_dentry); } /* The mount points are the same for old and new paths, cf. EXDEV. */ if (old_dentry->d_parent == new_dir->dentry) { /* * The LANDLOCK_ACCESS_FS_REFER access right is not required * for same-directory referer (i.e. no reparenting). */ access_request_parent1 = landlock_init_layer_masks( dom, access_request_parent1 | access_request_parent2, &layer_masks_parent1, LANDLOCK_KEY_INODE); if (is_access_to_paths_allowed( dom, new_dir, access_request_parent1, &layer_masks_parent1, NULL, 0, NULL, NULL)) return 0; return -EACCES; } access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER; access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER; /* Saves the common mount point. */ mnt_dir.mnt = new_dir->mnt; mnt_dir.dentry = new_dir->mnt->mnt_root; /* * old_dentry may be the root of the common mount point and * !IS_ROOT(old_dentry) at the same time (e.g. with open_tree() and * OPEN_TREE_CLONE). We do not need to call dget(old_parent) because * we keep a reference to old_dentry. */ old_parent = (old_dentry == mnt_dir.dentry) ? old_dentry : old_dentry->d_parent; /* new_dir->dentry is equal to new_dentry->d_parent */ allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry, old_parent, &layer_masks_parent1); allow_parent2 = collect_domain_accesses( dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2); if (allow_parent1 && allow_parent2) return 0; /* * To be able to compare source and destination domain access rights, * take into account the @old_dentry access rights aggregated with its * parent access rights. This will be useful to compare with the * destination parent access rights. */ if (is_access_to_paths_allowed( dom, &mnt_dir, access_request_parent1, &layer_masks_parent1, old_dentry, access_request_parent2, &layer_masks_parent2, exchange ? new_dentry : NULL)) return 0; /* * This prioritizes EACCES over EXDEV for all actions, including * renames with RENAME_EXCHANGE. */ if (likely(is_eacces(&layer_masks_parent1, access_request_parent1) || is_eacces(&layer_masks_parent2, access_request_parent2))) return -EACCES; /* * Gracefully forbids reparenting if the destination directory * hierarchy is not a superset of restrictions of the source directory * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the * source or the destination. */ return -EXDEV; } /* Inode hooks */ static void hook_inode_free_security_rcu(void *inode_security) { struct landlock_inode_security *inode_sec; /* * All inodes must already have been untied from their object by * release_inode() or hook_sb_delete(). */ inode_sec = inode_security + landlock_blob_sizes.lbs_inode; WARN_ON_ONCE(inode_sec->object); } /* Super-block hooks */ /* * Release the inodes used in a security policy. * * Cf. fsnotify_unmount_inodes() and invalidate_inodes() */ static void hook_sb_delete(struct super_block *const sb) { struct inode *inode, *prev_inode = NULL; if (!landlock_initialized) return; spin_lock(&sb->s_inode_list_lock); list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { struct landlock_object *object; /* Only handles referenced inodes. */ if (!atomic_read(&inode->i_count)) continue; /* * Protects against concurrent modification of inode (e.g. * from get_inode_object()). */ spin_lock(&inode->i_lock); /* * Checks I_FREEING and I_WILL_FREE to protect against a race * condition when release_inode() just called iput(), which * could lead to a NULL dereference of inode->security or a * second call to iput() for the same Landlock object. Also * checks I_NEW because such inode cannot be tied to an object. */ if (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW)) { spin_unlock(&inode->i_lock); continue; } rcu_read_lock(); object = rcu_dereference(landlock_inode(inode)->object); if (!object) { rcu_read_unlock(); spin_unlock(&inode->i_lock); continue; } /* Keeps a reference to this inode until the next loop walk. */ __iget(inode); spin_unlock(&inode->i_lock); /* * If there is no concurrent release_inode() ongoing, then we * are in charge of calling iput() on this inode, otherwise we * will just wait for it to finish. */ spin_lock(&object->lock); if (object->underobj == inode) { object->underobj = NULL; spin_unlock(&object->lock); rcu_read_unlock(); /* * Because object->underobj was not NULL, * release_inode() and get_inode_object() guarantee * that it is safe to reset * landlock_inode(inode)->object while it is not NULL. * It is therefore not necessary to lock inode->i_lock. */ rcu_assign_pointer(landlock_inode(inode)->object, NULL); /* * At this point, we own the ihold() reference that was * originally set up by get_inode_object() and the * __iget() reference that we just set in this loop * walk. Therefore the following call to iput() will * not sleep nor drop the inode because there is now at * least two references to it. */ iput(inode); } else { spin_unlock(&object->lock); rcu_read_unlock(); } if (prev_inode) { /* * At this point, we still own the __iget() reference * that we just set in this loop walk. Therefore we * can drop the list lock and know that the inode won't * disappear from under us until the next loop walk. */ spin_unlock(&sb->s_inode_list_lock); /* * We can now actually put the inode reference from the * previous loop walk, which is not needed anymore. */ iput(prev_inode); cond_resched(); spin_lock(&sb->s_inode_list_lock); } prev_inode = inode; } spin_unlock(&sb->s_inode_list_lock); /* Puts the inode reference from the last loop walk, if any. */ if (prev_inode) iput(prev_inode); /* Waits for pending iput() in release_inode(). */ wait_var_event(&landlock_superblock(sb)->inode_refs, !atomic_long_read(&landlock_superblock(sb)->inode_refs)); } /* * Because a Landlock security policy is defined according to the filesystem * topology (i.e. the mount namespace), changing it may grant access to files * not previously allowed. * * To make it simple, deny any filesystem topology modification by landlocked * processes. Non-landlocked processes may still change the namespace of a * landlocked process, but this kind of threat must be handled by a system-wide * access-control security policy. * * This could be lifted in the future if Landlock can safely handle mount * namespace updates requested by a landlocked process. Indeed, we could * update the current domain (which is currently read-only) by taking into * account the accesses of the source and the destination of a new mount point. * However, it would also require to make all the child domains dynamically * inherit these new constraints. Anyway, for backward compatibility reasons, * a dedicated user space option would be required (e.g. as a ruleset flag). */ static int hook_sb_mount(const char *const dev_name, const struct path *const path, const char *const type, const unsigned long flags, void *const data) { if (!get_current_fs_domain()) return 0; return -EPERM; } static int hook_move_mount(const struct path *const from_path, const struct path *const to_path) { if (!get_current_fs_domain()) return 0; return -EPERM; } /* * Removing a mount point may reveal a previously hidden file hierarchy, which * may then grant access to files, which may have previously been forbidden. */ static int hook_sb_umount(struct vfsmount *const mnt, const int flags) { if (!get_current_fs_domain()) return 0; return -EPERM; } static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts) { if (!get_current_fs_domain()) return 0; return -EPERM; } /* * pivot_root(2), like mount(2), changes the current mount namespace. It must * then be forbidden for a landlocked process. * * However, chroot(2) may be allowed because it only changes the relative root * directory of the current process. Moreover, it can be used to restrict the * view of the filesystem. */ static int hook_sb_pivotroot(const struct path *const old_path, const struct path *const new_path) { if (!get_current_fs_domain()) return 0; return -EPERM; } /* Path hooks */ static int hook_path_link(struct dentry *const old_dentry, const struct path *const new_dir, struct dentry *const new_dentry) { return current_check_refer_path(old_dentry, new_dir, new_dentry, false, false); } static int hook_path_rename(const struct path *const old_dir, struct dentry *const old_dentry, const struct path *const new_dir, struct dentry *const new_dentry, const unsigned int flags) { /* old_dir refers to old_dentry->d_parent and new_dir->mnt */ return current_check_refer_path(old_dentry, new_dir, new_dentry, true, !!(flags & RENAME_EXCHANGE)); } static int hook_path_mkdir(const struct path *const dir, struct dentry *const dentry, const umode_t mode) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR); } static int hook_path_mknod(const struct path *const dir, struct dentry *const dentry, const umode_t mode, const unsigned int dev) { const struct landlock_ruleset *const dom = get_current_fs_domain(); if (!dom) return 0; return check_access_path(dom, dir, get_mode_access(mode)); } static int hook_path_symlink(const struct path *const dir, struct dentry *const dentry, const char *const old_name) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM); } static int hook_path_unlink(const struct path *const dir, struct dentry *const dentry) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE); } static int hook_path_rmdir(const struct path *const dir, struct dentry *const dentry) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR); } static int hook_path_truncate(const struct path *const path) { return current_check_access_path(path, LANDLOCK_ACCESS_FS_TRUNCATE); } /* File hooks */ /** * get_required_file_open_access - Get access needed to open a file * * @file: File being opened. * * Returns the access rights that are required for opening the given file, * depending on the file type and open mode. */ static access_mask_t get_required_file_open_access(const struct file *const file) { access_mask_t access = 0; if (file->f_mode & FMODE_READ) { /* A directory can only be opened in read mode. */ if (S_ISDIR(file_inode(file)->i_mode)) return LANDLOCK_ACCESS_FS_READ_DIR; access = LANDLOCK_ACCESS_FS_READ_FILE; } if (file->f_mode & FMODE_WRITE) access |= LANDLOCK_ACCESS_FS_WRITE_FILE; /* __FMODE_EXEC is indeed part of f_flags, not f_mode. */ if (file->f_flags & __FMODE_EXEC) access |= LANDLOCK_ACCESS_FS_EXECUTE; return access; } static int hook_file_alloc_security(struct file *const file) { /* * Grants all access rights, even if most of them are not checked later * on. It is more consistent. * * Notably, file descriptors for regular files can also be acquired * without going through the file_open hook, for example when using * memfd_create(2). */ landlock_file(file)->allowed_access = LANDLOCK_MASK_ACCESS_FS; return 0; } static bool is_device(const struct file *const file) { const struct inode *inode = file_inode(file); return S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode); } static int hook_file_open(struct file *const file) { layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; access_mask_t open_access_request, full_access_request, allowed_access, optional_access; const struct landlock_ruleset *const dom = landlock_get_applicable_domain( landlock_cred(file->f_cred)->domain, any_fs); if (!dom) return 0; /* * Because a file may be opened with O_PATH, get_required_file_open_access() * may return 0. This case will be handled with a future Landlock * evolution. */ open_access_request = get_required_file_open_access(file); /* * We look up more access than what we immediately need for open(), so * that we can later authorize operations on opened files. */ optional_access = LANDLOCK_ACCESS_FS_TRUNCATE; if (is_device(file)) optional_access |= LANDLOCK_ACCESS_FS_IOCTL_DEV; full_access_request = open_access_request | optional_access; if (is_access_to_paths_allowed( dom, &file->f_path, landlock_init_layer_masks(dom, full_access_request, &layer_masks, LANDLOCK_KEY_INODE), &layer_masks, NULL, 0, NULL, NULL)) { allowed_access = full_access_request; } else { unsigned long access_bit; const unsigned long access_req = full_access_request; /* * Calculate the actual allowed access rights from layer_masks. * Add each access right to allowed_access which has not been * vetoed by any layer. */ allowed_access = 0; for_each_set_bit(access_bit, &access_req, ARRAY_SIZE(layer_masks)) { if (!layer_masks[access_bit]) allowed_access |= BIT_ULL(access_bit); } } /* * For operations on already opened files (i.e. ftruncate()), it is the * access rights at the time of open() which decide whether the * operation is permitted. Therefore, we record the relevant subset of * file access rights in the opened struct file. */ landlock_file(file)->allowed_access = allowed_access; if ((open_access_request & allowed_access) == open_access_request) return 0; return -EACCES; } static int hook_file_truncate(struct file *const file) { /* * Allows truncation if the truncate right was available at the time of * opening the file, to get a consistent access check as for read, write * and execute operations. * * Note: For checks done based on the file's Landlock allowed access, we * enforce them independently of whether the current thread is in a * Landlock domain, so that open files passed between independent * processes retain their behaviour. */ if (landlock_file(file)->allowed_access & LANDLOCK_ACCESS_FS_TRUNCATE) return 0; return -EACCES; } static int hook_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { access_mask_t allowed_access = landlock_file(file)->allowed_access; /* * It is the access rights at the time of opening the file which * determine whether IOCTL can be used on the opened file later. * * The access right is attached to the opened file in hook_file_open(). */ if (allowed_access & LANDLOCK_ACCESS_FS_IOCTL_DEV) return 0; if (!is_device(file)) return 0; if (is_masked_device_ioctl(cmd)) return 0; return -EACCES; } static int hook_file_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) { access_mask_t allowed_access = landlock_file(file)->allowed_access; /* * It is the access rights at the time of opening the file which * determine whether IOCTL can be used on the opened file later. * * The access right is attached to the opened file in hook_file_open(). */ if (allowed_access & LANDLOCK_ACCESS_FS_IOCTL_DEV) return 0; if (!is_device(file)) return 0; if (is_masked_device_ioctl_compat(cmd)) return 0; return -EACCES; } static void hook_file_set_fowner(struct file *file) { struct landlock_ruleset *new_dom, *prev_dom; /* * Lock already held by __f_setown(), see commit 26f204380a3c ("fs: Fix * file_set_fowner LSM hook inconsistencies"). */ lockdep_assert_held(&file_f_owner(file)->lock); new_dom = landlock_get_current_domain(); landlock_get_ruleset(new_dom); prev_dom = landlock_file(file)->fown_domain; landlock_file(file)->fown_domain = new_dom; /* Called in an RCU read-side critical section. */ landlock_put_ruleset_deferred(prev_dom); } static void hook_file_free_security(struct file *file) { landlock_put_ruleset_deferred(landlock_file(file)->fown_domain); } static struct security_hook_list landlock_hooks[] __ro_after_init = { LSM_HOOK_INIT(inode_free_security_rcu, hook_inode_free_security_rcu), LSM_HOOK_INIT(sb_delete, hook_sb_delete), LSM_HOOK_INIT(sb_mount, hook_sb_mount), LSM_HOOK_INIT(move_mount, hook_move_mount), LSM_HOOK_INIT(sb_umount, hook_sb_umount), LSM_HOOK_INIT(sb_remount, hook_sb_remount), LSM_HOOK_INIT(sb_pivotroot, hook_sb_pivotroot), LSM_HOOK_INIT(path_link, hook_path_link), LSM_HOOK_INIT(path_rename, hook_path_rename), LSM_HOOK_INIT(path_mkdir, hook_path_mkdir), LSM_HOOK_INIT(path_mknod, hook_path_mknod), LSM_HOOK_INIT(path_symlink, hook_path_symlink), LSM_HOOK_INIT(path_unlink, hook_path_unlink), LSM_HOOK_INIT(path_rmdir, hook_path_rmdir), LSM_HOOK_INIT(path_truncate, hook_path_truncate), LSM_HOOK_INIT(file_alloc_security, hook_file_alloc_security), LSM_HOOK_INIT(file_open, hook_file_open), LSM_HOOK_INIT(file_truncate, hook_file_truncate), LSM_HOOK_INIT(file_ioctl, hook_file_ioctl), LSM_HOOK_INIT(file_ioctl_compat, hook_file_ioctl_compat), LSM_HOOK_INIT(file_set_fowner, hook_file_set_fowner), LSM_HOOK_INIT(file_free_security, hook_file_free_security), }; __init void landlock_add_fs_hooks(void) { security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), &landlock_lsmid); } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST /* clang-format off */ static struct kunit_case test_cases[] = { KUNIT_CASE(test_no_more_access), KUNIT_CASE(test_scope_to_request_with_exec_none), KUNIT_CASE(test_scope_to_request_with_exec_some), KUNIT_CASE(test_scope_to_request_without_access), KUNIT_CASE(test_is_eacces_with_none), KUNIT_CASE(test_is_eacces_with_refer), KUNIT_CASE(test_is_eacces_with_write), {} }; /* clang-format on */ static struct kunit_suite test_suite = { .name = "landlock_fs", .test_cases = test_cases, }; kunit_test_suite(test_suite); #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ |
1139 1140 1 949 111 96 3 2 4 4 4238 4243 4236 4250 4244 4233 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 | // SPDX-License-Identifier: GPL-2.0 #include <linux/irq_work.h> #include <linux/spinlock.h> #include <linux/task_work.h> #include <linux/resume_user_mode.h> static struct callback_head work_exited; /* all we need is ->next == NULL */ #ifdef CONFIG_IRQ_WORK static void task_work_set_notify_irq(struct irq_work *entry) { test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); } static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = IRQ_WORK_INIT_HARD(task_work_set_notify_irq); #endif /** * task_work_add - ask the @task to execute @work->func() * @task: the task which should run the callback * @work: the callback to run * @notify: how to notify the targeted task * * Queue @work for task_work_run() below and notify the @task if @notify * is @TWA_RESUME, @TWA_SIGNAL, @TWA_SIGNAL_NO_IPI or @TWA_NMI_CURRENT. * * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted * task and run the task_work, regardless of whether the task is currently * running in the kernel or userspace. * @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a * reschedule IPI to force the targeted task to reschedule and run task_work. * This can be advantageous if there's no strict requirement that the * task_work be run as soon as possible, just whenever the task enters the * kernel anyway. * @TWA_RESUME work is run only when the task exits the kernel and returns to * user mode, or before entering guest mode. * @TWA_NMI_CURRENT works like @TWA_RESUME, except it can only be used for the * current @task and if the current context is NMI. * * Fails if the @task is exiting/exited and thus it can't process this @work. * Otherwise @work->func() will be called when the @task goes through one of * the aforementioned transitions, or exits. * * If the targeted task is exiting, then an error is returned and the work item * is not queued. It's up to the caller to arrange for an alternative mechanism * in that case. * * Note: there is no ordering guarantee on works queued here. The task_work * list is LIFO. * * RETURNS: * 0 if succeeds or -ESRCH. */ int task_work_add(struct task_struct *task, struct callback_head *work, enum task_work_notify_mode notify) { struct callback_head *head; int flags = notify & TWA_FLAGS; notify &= ~TWA_FLAGS; if (notify == TWA_NMI_CURRENT) { if (WARN_ON_ONCE(task != current)) return -EINVAL; if (!IS_ENABLED(CONFIG_IRQ_WORK)) return -EINVAL; } else { /* * Record the work call stack in order to print it in KASAN * reports. * * Note that stack allocation can fail if TWAF_NO_ALLOC flag * is set and new page is needed to expand the stack buffer. */ if (flags & TWAF_NO_ALLOC) kasan_record_aux_stack_noalloc(work); else kasan_record_aux_stack(work); } head = READ_ONCE(task->task_works); do { if (unlikely(head == &work_exited)) return -ESRCH; work->next = head; } while (!try_cmpxchg(&task->task_works, &head, work)); switch (notify) { case TWA_NONE: break; case TWA_RESUME: set_notify_resume(task); break; case TWA_SIGNAL: set_notify_signal(task); break; case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; #ifdef CONFIG_IRQ_WORK case TWA_NMI_CURRENT: irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); break; #endif default: WARN_ON_ONCE(1); break; } return 0; } /** * task_work_cancel_match - cancel a pending work added by task_work_add() * @task: the task which should execute the work * @match: match function to call * @data: data to be passed in to match function * * RETURNS: * The found work or NULL if not found. */ struct callback_head * task_work_cancel_match(struct task_struct *task, bool (*match)(struct callback_head *, void *data), void *data) { struct callback_head **pprev = &task->task_works; struct callback_head *work; unsigned long flags; if (likely(!task_work_pending(task))) return NULL; /* * If cmpxchg() fails we continue without updating pprev. * Either we raced with task_work_add() which added the * new entry before this work, we will find it again. Or * we raced with task_work_run(), *pprev == NULL/exited. */ raw_spin_lock_irqsave(&task->pi_lock, flags); work = READ_ONCE(*pprev); while (work) { if (!match(work, data)) { pprev = &work->next; work = READ_ONCE(*pprev); } else if (try_cmpxchg(pprev, &work, work->next)) break; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); return work; } static bool task_work_func_match(struct callback_head *cb, void *data) { return cb->func == data; } /** * task_work_cancel_func - cancel a pending work matching a function added by task_work_add() * @task: the task which should execute the func's work * @func: identifies the func to match with a work to remove * * Find the last queued pending work with ->func == @func and remove * it from queue. * * RETURNS: * The found work or NULL if not found. */ struct callback_head * task_work_cancel_func(struct task_struct *task, task_work_func_t func) { return task_work_cancel_match(task, task_work_func_match, func); } static bool task_work_match(struct callback_head *cb, void *data) { return cb == data; } /** * task_work_cancel - cancel a pending work added by task_work_add() * @task: the task which should execute the work * @cb: the callback to remove if queued * * Remove a callback from a task's queue if queued. * * RETURNS: * True if the callback was queued and got cancelled, false otherwise. */ bool task_work_cancel(struct task_struct *task, struct callback_head *cb) { struct callback_head *ret; ret = task_work_cancel_match(task, task_work_match, cb); return ret == cb; } /** * task_work_run - execute the works added by task_work_add() * * Flush the pending works. Should be used by the core kernel code. * Called before the task returns to the user-mode or stops, or when * it exits. In the latter case task_work_add() can no longer add the * new work after task_work_run() returns. */ void task_work_run(void) { struct task_struct *task = current; struct callback_head *work, *head, *next; for (;;) { /* * work->func() can do task_work_add(), do not set * work_exited unless the list is empty. */ work = READ_ONCE(task->task_works); do { head = NULL; if (!work) { if (task->flags & PF_EXITING) head = &work_exited; else break; } } while (!try_cmpxchg(&task->task_works, &work, head)); if (!work) break; /* * Synchronize with task_work_cancel_match(). It can not remove * the first entry == work, cmpxchg(task_works) must fail. * But it can remove another entry from the ->next list. */ raw_spin_lock_irq(&task->pi_lock); raw_spin_unlock_irq(&task->pi_lock); do { next = work->next; work->func(work); work = next; cond_resched(); } while (work); } } |
6 2 2 2 2 1 1 7 7 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 | // SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * * CLC (connection layer control) handshake over initial TCP socket to * prepare for RDMA traffic * * Copyright IBM Corp. 2016, 2018 * * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> */ #include <linux/in.h> #include <linux/inetdevice.h> #include <linux/if_ether.h> #include <linux/sched/signal.h> #include <linux/utsname.h> #include <linux/ctype.h> #include <net/addrconf.h> #include <net/sock.h> #include <net/tcp.h> #include "smc.h" #include "smc_core.h" #include "smc_clc.h" #include "smc_ib.h" #include "smc_ism.h" #include "smc_netlink.h" #define SMCR_CLC_ACCEPT_CONFIRM_LEN 68 #define SMCD_CLC_ACCEPT_CONFIRM_LEN 48 #define SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 78 #define SMCR_CLC_ACCEPT_CONFIRM_LEN_V2 108 #define SMC_CLC_RECV_BUF_LEN 100 /* eye catcher "SMCR" EBCDIC for CLC messages */ static const char SMC_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xd9'}; /* eye catcher "SMCD" EBCDIC for CLC messages */ static const char SMCD_EYECATCHER[4] = {'\xe2', '\xd4', '\xc3', '\xc4'}; static u8 smc_hostname[SMC_MAX_HOSTNAME_LEN]; struct smc_clc_eid_table { rwlock_t lock; struct list_head list; u8 ueid_cnt; u8 seid_enabled; }; static struct smc_clc_eid_table smc_clc_eid_table; struct smc_clc_eid_entry { struct list_head list; u8 eid[SMC_MAX_EID_LEN]; }; /* The size of a user EID is 32 characters. * Valid characters should be (single-byte character set) A-Z, 0-9, '.' and '-'. * Blanks should only be used to pad to the expected size. * First character must be alphanumeric. */ static bool smc_clc_ueid_valid(char *ueid) { char *end = ueid + SMC_MAX_EID_LEN; while (--end >= ueid && isspace(*end)) ; if (end < ueid) return false; if (!isalnum(*ueid) || islower(*ueid)) return false; while (ueid <= end) { if ((!isalnum(*ueid) || islower(*ueid)) && *ueid != '.' && *ueid != '-') return false; ueid++; } return true; } static int smc_clc_ueid_add(char *ueid) { struct smc_clc_eid_entry *new_ueid, *tmp_ueid; int rc; if (!smc_clc_ueid_valid(ueid)) return -EINVAL; /* add a new ueid entry to the ueid table if there isn't one */ new_ueid = kzalloc(sizeof(*new_ueid), GFP_KERNEL); if (!new_ueid) return -ENOMEM; memcpy(new_ueid->eid, ueid, SMC_MAX_EID_LEN); write_lock(&smc_clc_eid_table.lock); if (smc_clc_eid_table.ueid_cnt >= SMC_MAX_UEID) { rc = -ERANGE; goto err_out; } list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) { if (!memcmp(tmp_ueid->eid, ueid, SMC_MAX_EID_LEN)) { rc = -EEXIST; goto err_out; } } list_add_tail(&new_ueid->list, &smc_clc_eid_table.list); smc_clc_eid_table.ueid_cnt++; write_unlock(&smc_clc_eid_table.lock); return 0; err_out: write_unlock(&smc_clc_eid_table.lock); kfree(new_ueid); return rc; } int smc_clc_ueid_count(void) { int count; read_lock(&smc_clc_eid_table.lock); count = smc_clc_eid_table.ueid_cnt; read_unlock(&smc_clc_eid_table.lock); return count; } int smc_nl_add_ueid(struct sk_buff *skb, struct genl_info *info) { struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; char *ueid; if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1) return -EINVAL; ueid = (char *)nla_data(nla_ueid); return smc_clc_ueid_add(ueid); } /* remove one or all ueid entries from the table */ static int smc_clc_ueid_remove(char *ueid) { struct smc_clc_eid_entry *lst_ueid, *tmp_ueid; int rc = -ENOENT; /* remove table entry */ write_lock(&smc_clc_eid_table.lock); list_for_each_entry_safe(lst_ueid, tmp_ueid, &smc_clc_eid_table.list, list) { if (!ueid || !memcmp(lst_ueid->eid, ueid, SMC_MAX_EID_LEN)) { list_del(&lst_ueid->list); smc_clc_eid_table.ueid_cnt--; kfree(lst_ueid); rc = 0; } } #if IS_ENABLED(CONFIG_S390) if (!rc && !smc_clc_eid_table.ueid_cnt) { smc_clc_eid_table.seid_enabled = 1; rc = -EAGAIN; /* indicate success and enabling of seid */ } #endif write_unlock(&smc_clc_eid_table.lock); return rc; } int smc_nl_remove_ueid(struct sk_buff *skb, struct genl_info *info) { struct nlattr *nla_ueid = info->attrs[SMC_NLA_EID_TABLE_ENTRY]; char *ueid; if (!nla_ueid || nla_len(nla_ueid) != SMC_MAX_EID_LEN + 1) return -EINVAL; ueid = (char *)nla_data(nla_ueid); return smc_clc_ueid_remove(ueid); } int smc_nl_flush_ueid(struct sk_buff *skb, struct genl_info *info) { smc_clc_ueid_remove(NULL); return 0; } static int smc_nl_ueid_dumpinfo(struct sk_buff *skb, u32 portid, u32 seq, u32 flags, char *ueid) { char ueid_str[SMC_MAX_EID_LEN + 1]; void *hdr; hdr = genlmsg_put(skb, portid, seq, &smc_gen_nl_family, flags, SMC_NETLINK_DUMP_UEID); if (!hdr) return -ENOMEM; memcpy(ueid_str, ueid, SMC_MAX_EID_LEN); ueid_str[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_EID_TABLE_ENTRY, ueid_str)) { genlmsg_cancel(skb, hdr); return -EMSGSIZE; } genlmsg_end(skb, hdr); return 0; } static int _smc_nl_ueid_dump(struct sk_buff *skb, u32 portid, u32 seq, int start_idx) { struct smc_clc_eid_entry *lst_ueid; int idx = 0; read_lock(&smc_clc_eid_table.lock); list_for_each_entry(lst_ueid, &smc_clc_eid_table.list, list) { if (idx++ < start_idx) continue; if (smc_nl_ueid_dumpinfo(skb, portid, seq, NLM_F_MULTI, lst_ueid->eid)) { --idx; break; } } read_unlock(&smc_clc_eid_table.lock); return idx; } int smc_nl_dump_ueid(struct sk_buff *skb, struct netlink_callback *cb) { struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); int idx; idx = _smc_nl_ueid_dump(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb_ctx->pos[0]); cb_ctx->pos[0] = idx; return skb->len; } int smc_nl_dump_seid(struct sk_buff *skb, struct netlink_callback *cb) { struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(cb); char seid_str[SMC_MAX_EID_LEN + 1]; u8 seid_enabled; void *hdr; u8 *seid; if (cb_ctx->pos[0]) return skb->len; hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &smc_gen_nl_family, NLM_F_MULTI, SMC_NETLINK_DUMP_SEID); if (!hdr) return -ENOMEM; if (!smc_ism_is_v2_capable()) goto end; smc_ism_get_system_eid(&seid); memcpy(seid_str, seid, SMC_MAX_EID_LEN); seid_str[SMC_MAX_EID_LEN] = 0; if (nla_put_string(skb, SMC_NLA_SEID_ENTRY, seid_str)) goto err; read_lock(&smc_clc_eid_table.lock); seid_enabled = smc_clc_eid_table.seid_enabled; read_unlock(&smc_clc_eid_table.lock); if (nla_put_u8(skb, SMC_NLA_SEID_ENABLED, seid_enabled)) goto err; end: genlmsg_end(skb, hdr); cb_ctx->pos[0]++; return skb->len; err: genlmsg_cancel(skb, hdr); return -EMSGSIZE; } int smc_nl_enable_seid(struct sk_buff *skb, struct genl_info *info) { #if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); smc_clc_eid_table.seid_enabled = 1; write_unlock(&smc_clc_eid_table.lock); return 0; #else return -EOPNOTSUPP; #endif } int smc_nl_disable_seid(struct sk_buff *skb, struct genl_info *info) { int rc = 0; #if IS_ENABLED(CONFIG_S390) write_lock(&smc_clc_eid_table.lock); if (!smc_clc_eid_table.ueid_cnt) rc = -ENOENT; else smc_clc_eid_table.seid_enabled = 0; write_unlock(&smc_clc_eid_table.lock); #else rc = -EOPNOTSUPP; #endif return rc; } static bool _smc_clc_match_ueid(u8 *peer_ueid) { struct smc_clc_eid_entry *tmp_ueid; list_for_each_entry(tmp_ueid, &smc_clc_eid_table.list, list) { if (!memcmp(tmp_ueid->eid, peer_ueid, SMC_MAX_EID_LEN)) return true; } return false; } bool smc_clc_match_eid(u8 *negotiated_eid, struct smc_clc_v2_extension *smc_v2_ext, u8 *peer_eid, u8 *local_eid) { bool match = false; int i; negotiated_eid[0] = 0; read_lock(&smc_clc_eid_table.lock); if (peer_eid && local_eid && smc_clc_eid_table.seid_enabled && smc_v2_ext->hdr.flag.seid && !memcmp(peer_eid, local_eid, SMC_MAX_EID_LEN)) { memcpy(negotiated_eid, peer_eid, SMC_MAX_EID_LEN); match = true; goto out; } for (i = 0; i < smc_v2_ext->hdr.eid_cnt; i++) { if (_smc_clc_match_ueid(smc_v2_ext->user_eids[i])) { memcpy(negotiated_eid, smc_v2_ext->user_eids[i], SMC_MAX_EID_LEN); match = true; goto out; } } out: read_unlock(&smc_clc_eid_table.lock); return match; } /* check arriving CLC proposal */ static bool smc_clc_msg_prop_valid(struct smc_clc_msg_proposal *pclc) { struct smc_clc_msg_proposal_prefix *pclc_prfx; struct smc_clc_smcd_v2_extension *smcd_v2_ext; struct smc_clc_msg_hdr *hdr = &pclc->hdr; struct smc_clc_v2_extension *v2_ext; v2_ext = smc_get_clc_v2_ext(pclc); pclc_prfx = smc_clc_proposal_get_prefix(pclc); if (hdr->version == SMC_V1) { if (hdr->typev1 == SMC_TYPE_N) return false; if (ntohs(hdr->length) != sizeof(*pclc) + ntohs(pclc->iparea_offset) + sizeof(*pclc_prfx) + pclc_prfx->ipv6_prefixes_cnt * sizeof(struct smc_clc_ipv6_prefix) + sizeof(struct smc_clc_msg_trail)) return false; } else { if (ntohs(hdr->length) != sizeof(*pclc) + sizeof(struct smc_clc_msg_smcd) + (hdr->typev1 != SMC_TYPE_N ? sizeof(*pclc_prfx) + pclc_prfx->ipv6_prefixes_cnt * sizeof(struct smc_clc_ipv6_prefix) : 0) + (hdr->typev2 != SMC_TYPE_N ? sizeof(*v2_ext) + v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN : 0) + (smcd_indicated(hdr->typev2) ? sizeof(*smcd_v2_ext) + v2_ext->hdr.ism_gid_cnt * sizeof(struct smc_clc_smcd_gid_chid) : 0) + sizeof(struct smc_clc_msg_trail)) return false; } return true; } /* check arriving CLC accept or confirm */ static bool smc_clc_msg_acc_conf_valid(struct smc_clc_msg_accept_confirm *clc) { struct smc_clc_msg_hdr *hdr = &clc->hdr; if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) return false; if (hdr->version == SMC_V1) { if ((hdr->typev1 == SMC_TYPE_R && ntohs(hdr->length) != SMCR_CLC_ACCEPT_CONFIRM_LEN) || (hdr->typev1 == SMC_TYPE_D && ntohs(hdr->length) != SMCD_CLC_ACCEPT_CONFIRM_LEN)) return false; } else { if (hdr->typev1 == SMC_TYPE_D && ntohs(hdr->length) < SMCD_CLC_ACCEPT_CONFIRM_LEN_V2) return false; if (hdr->typev1 == SMC_TYPE_R && ntohs(hdr->length) < SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) return false; } return true; } /* check arriving CLC decline */ static bool smc_clc_msg_decl_valid(struct smc_clc_msg_decline *dclc) { struct smc_clc_msg_hdr *hdr = &dclc->hdr; if (hdr->typev1 != SMC_TYPE_R && hdr->typev1 != SMC_TYPE_D) return false; if (hdr->version == SMC_V1) { if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline)) return false; } else { if (ntohs(hdr->length) != sizeof(struct smc_clc_msg_decline_v2)) return false; } return true; } static int smc_clc_fill_fce_v2x(struct smc_clc_first_contact_ext_v2x *fce_v2x, struct smc_init_info *ini) { int ret = sizeof(*fce_v2x); memset(fce_v2x, 0, sizeof(*fce_v2x)); fce_v2x->fce_v2_base.os_type = SMC_CLC_OS_LINUX; fce_v2x->fce_v2_base.release = ini->release_nr; memcpy(fce_v2x->fce_v2_base.hostname, smc_hostname, sizeof(smc_hostname)); if (ini->is_smcd && ini->release_nr < SMC_RELEASE_1) { ret = sizeof(struct smc_clc_first_contact_ext); goto out; } if (ini->release_nr >= SMC_RELEASE_1) { if (!ini->is_smcd) { fce_v2x->max_conns = ini->max_conns; fce_v2x->max_links = ini->max_links; } fce_v2x->feature_mask = htons(ini->feature_mask); } out: return ret; } /* check if received message has a correct header length and contains valid * heading and trailing eyecatchers */ static bool smc_clc_msg_hdr_valid(struct smc_clc_msg_hdr *clcm, bool check_trl) { struct smc_clc_msg_accept_confirm *clc; struct smc_clc_msg_proposal *pclc; struct smc_clc_msg_decline *dclc; struct smc_clc_msg_trail *trl; if (memcmp(clcm->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && memcmp(clcm->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) return false; switch (clcm->type) { case SMC_CLC_PROPOSAL: pclc = (struct smc_clc_msg_proposal *)clcm; if (!smc_clc_msg_prop_valid(pclc)) return false; trl = (struct smc_clc_msg_trail *) ((u8 *)pclc + ntohs(pclc->hdr.length) - sizeof(*trl)); break; case SMC_CLC_ACCEPT: case SMC_CLC_CONFIRM: clc = (struct smc_clc_msg_accept_confirm *)clcm; if (!smc_clc_msg_acc_conf_valid(clc)) return false; trl = (struct smc_clc_msg_trail *) ((u8 *)clc + ntohs(clc->hdr.length) - sizeof(*trl)); break; case SMC_CLC_DECLINE: dclc = (struct smc_clc_msg_decline *)clcm; if (!smc_clc_msg_decl_valid(dclc)) return false; check_trl = false; break; default: return false; } if (check_trl && memcmp(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)) && memcmp(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER))) return false; return true; } /* find ipv4 addr on device and get the prefix len, fill CLC proposal msg */ static int smc_clc_prfx_set4_rcu(struct dst_entry *dst, __be32 ipv4, struct smc_clc_msg_proposal_prefix *prop) { struct in_device *in_dev = __in_dev_get_rcu(dst->dev); const struct in_ifaddr *ifa; if (!in_dev) return -ENODEV; in_dev_for_each_ifa_rcu(ifa, in_dev) { if (!inet_ifa_match(ipv4, ifa)) continue; prop->prefix_len = inet_mask_len(ifa->ifa_mask); prop->outgoing_subnet = ifa->ifa_address & ifa->ifa_mask; /* prop->ipv6_prefixes_cnt = 0; already done by memset before */ return 0; } return -ENOENT; } /* fill CLC proposal msg with ipv6 prefixes from device */ static int smc_clc_prfx_set6_rcu(struct dst_entry *dst, struct smc_clc_msg_proposal_prefix *prop, struct smc_clc_ipv6_prefix *ipv6_prfx) { #if IS_ENABLED(CONFIG_IPV6) struct inet6_dev *in6_dev = __in6_dev_get(dst->dev); struct inet6_ifaddr *ifa; int cnt = 0; if (!in6_dev) return -ENODEV; /* use a maximum of 8 IPv6 prefixes from device */ list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) continue; ipv6_addr_prefix(&ipv6_prfx[cnt].prefix, &ifa->addr, ifa->prefix_len); ipv6_prfx[cnt].prefix_len = ifa->prefix_len; cnt++; if (cnt == SMC_CLC_MAX_V6_PREFIX) break; } prop->ipv6_prefixes_cnt = cnt; if (cnt) return 0; #endif return -ENOENT; } /* retrieve and set prefixes in CLC proposal msg */ static int smc_clc_prfx_set(struct socket *clcsock, struct smc_clc_msg_proposal_prefix *prop, struct smc_clc_ipv6_prefix *ipv6_prfx) { struct dst_entry *dst = sk_dst_get(clcsock->sk); struct sockaddr_storage addrs; struct sockaddr_in6 *addr6; struct sockaddr_in *addr; int rc = -ENOENT; if (!dst) { rc = -ENOTCONN; goto out; } if (!dst->dev) { rc = -ENODEV; goto out_rel; } /* get address to which the internal TCP socket is bound */ if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0) goto out_rel; /* analyze IP specific data of net_device belonging to TCP socket */ addr6 = (struct sockaddr_in6 *)&addrs; rcu_read_lock(); if (addrs.ss_family == PF_INET) { /* IPv4 */ addr = (struct sockaddr_in *)&addrs; rc = smc_clc_prfx_set4_rcu(dst, addr->sin_addr.s_addr, prop); } else if (ipv6_addr_v4mapped(&addr6->sin6_addr)) { /* mapped IPv4 address - peer is IPv4 only */ rc = smc_clc_prfx_set4_rcu(dst, addr6->sin6_addr.s6_addr32[3], prop); } else { /* IPv6 */ rc = smc_clc_prfx_set6_rcu(dst, prop, ipv6_prfx); } rcu_read_unlock(); out_rel: dst_release(dst); out: return rc; } /* match ipv4 addrs of dev against addr in CLC proposal */ static int smc_clc_prfx_match4_rcu(struct net_device *dev, struct smc_clc_msg_proposal_prefix *prop) { struct in_device *in_dev = __in_dev_get_rcu(dev); const struct in_ifaddr *ifa; if (!in_dev) return -ENODEV; in_dev_for_each_ifa_rcu(ifa, in_dev) { if (prop->prefix_len == inet_mask_len(ifa->ifa_mask) && inet_ifa_match(prop->outgoing_subnet, ifa)) return 0; } return -ENOENT; } /* match ipv6 addrs of dev against addrs in CLC proposal */ static int smc_clc_prfx_match6_rcu(struct net_device *dev, struct smc_clc_msg_proposal_prefix *prop) { #if IS_ENABLED(CONFIG_IPV6) struct inet6_dev *in6_dev = __in6_dev_get(dev); struct smc_clc_ipv6_prefix *ipv6_prfx; struct inet6_ifaddr *ifa; int i, max; if (!in6_dev) return -ENODEV; /* ipv6 prefix list starts behind smc_clc_msg_proposal_prefix */ ipv6_prfx = (struct smc_clc_ipv6_prefix *)((u8 *)prop + sizeof(*prop)); max = min_t(u8, prop->ipv6_prefixes_cnt, SMC_CLC_MAX_V6_PREFIX); list_for_each_entry(ifa, &in6_dev->addr_list, if_list) { if (ipv6_addr_type(&ifa->addr) & IPV6_ADDR_LINKLOCAL) continue; for (i = 0; i < max; i++) { if (ifa->prefix_len == ipv6_prfx[i].prefix_len && ipv6_prefix_equal(&ifa->addr, &ipv6_prfx[i].prefix, ifa->prefix_len)) return 0; } } #endif return -ENOENT; } /* check if proposed prefixes match one of our device prefixes */ int smc_clc_prfx_match(struct socket *clcsock, struct smc_clc_msg_proposal_prefix *prop) { struct dst_entry *dst = sk_dst_get(clcsock->sk); int rc; if (!dst) { rc = -ENOTCONN; goto out; } if (!dst->dev) { rc = -ENODEV; goto out_rel; } rcu_read_lock(); if (!prop->ipv6_prefixes_cnt) rc = smc_clc_prfx_match4_rcu(dst->dev, prop); else rc = smc_clc_prfx_match6_rcu(dst->dev, prop); rcu_read_unlock(); out_rel: dst_release(dst); out: return rc; } /* Wait for data on the tcp-socket, analyze received data * Returns: * 0 if success and it was not a decline that we received. * SMC_CLC_DECL_REPLY if decline received for fallback w/o another decl send. * clcsock error, -EINTR, -ECONNRESET, -EPROTO otherwise. */ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type, unsigned long timeout) { long rcvtimeo = smc->clcsock->sk->sk_rcvtimeo; struct sock *clc_sk = smc->clcsock->sk; struct smc_clc_msg_hdr *clcm = buf; struct msghdr msg = {NULL, 0}; int reason_code = 0; struct kvec vec = {buf, buflen}; int len, datlen, recvlen; bool check_trl = true; int krflags; /* peek the first few bytes to determine length of data to receive * so we don't consume any subsequent CLC message or payload data * in the TCP byte stream */ /* * Caller must make sure that buflen is no less than * sizeof(struct smc_clc_msg_hdr) */ krflags = MSG_PEEK | MSG_WAITALL; clc_sk->sk_rcvtimeo = timeout; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, sizeof(struct smc_clc_msg_hdr)); len = sock_recvmsg(smc->clcsock, &msg, krflags); if (signal_pending(current)) { reason_code = -EINTR; clc_sk->sk_err = EINTR; smc->sk.sk_err = EINTR; goto out; } if (clc_sk->sk_err) { reason_code = -clc_sk->sk_err; if (clc_sk->sk_err == EAGAIN && expected_type == SMC_CLC_DECLINE) clc_sk->sk_err = 0; /* reset for fallback usage */ else smc->sk.sk_err = clc_sk->sk_err; goto out; } if (!len) { /* peer has performed orderly shutdown */ smc->sk.sk_err = ECONNRESET; reason_code = -ECONNRESET; goto out; } if (len < 0) { if (len != -EAGAIN || expected_type != SMC_CLC_DECLINE) smc->sk.sk_err = -len; reason_code = len; goto out; } datlen = ntohs(clcm->length); if ((len < sizeof(struct smc_clc_msg_hdr)) || (clcm->version < SMC_V1) || ((clcm->type != SMC_CLC_DECLINE) && (clcm->type != expected_type))) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; } /* receive the complete CLC message */ memset(&msg, 0, sizeof(struct msghdr)); if (datlen > buflen) { check_trl = false; recvlen = buflen; } else { recvlen = datlen; } iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, recvlen); krflags = MSG_WAITALL; len = sock_recvmsg(smc->clcsock, &msg, krflags); if (len < recvlen || !smc_clc_msg_hdr_valid(clcm, check_trl)) { smc->sk.sk_err = EPROTO; reason_code = -EPROTO; goto out; } datlen -= len; while (datlen) { u8 tmp[SMC_CLC_RECV_BUF_LEN]; vec.iov_base = &tmp; vec.iov_len = SMC_CLC_RECV_BUF_LEN; /* receive remaining proposal message */ recvlen = datlen > SMC_CLC_RECV_BUF_LEN ? SMC_CLC_RECV_BUF_LEN : datlen; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &vec, 1, recvlen); len = sock_recvmsg(smc->clcsock, &msg, krflags); datlen -= len; } if (clcm->type == SMC_CLC_DECLINE) { struct smc_clc_msg_decline *dclc; dclc = (struct smc_clc_msg_decline *)clcm; reason_code = SMC_CLC_DECL_PEERDECL; smc->peer_diagnosis = ntohl(dclc->peer_diagnosis); if (((struct smc_clc_msg_decline *)buf)->hdr.typev2 & SMC_FIRST_CONTACT_MASK) { smc->conn.lgr->sync_err = 1; smc_lgr_terminate_sched(smc->conn.lgr); } } out: clc_sk->sk_rcvtimeo = rcvtimeo; return reason_code; } /* send CLC DECLINE message across internal TCP socket */ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, u8 version) { struct smc_clc_msg_decline *dclc_v1; struct smc_clc_msg_decline_v2 dclc; struct msghdr msg; int len, send_len; struct kvec vec; dclc_v1 = (struct smc_clc_msg_decline *)&dclc; memset(&dclc, 0, sizeof(dclc)); memcpy(dclc.hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); dclc.hdr.type = SMC_CLC_DECLINE; dclc.hdr.version = version; dclc.os_type = version == SMC_V1 ? 0 : SMC_CLC_OS_LINUX; dclc.hdr.typev2 = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? SMC_FIRST_CONTACT_MASK : 0; if ((!smc_conn_lgr_valid(&smc->conn) || !smc->conn.lgr->is_smcd) && smc_ib_is_valid_local_systemid()) memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); dclc.peer_diagnosis = htonl(peer_diag_info); if (version == SMC_V1) { memcpy(dclc_v1->trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); send_len = sizeof(*dclc_v1); } else { memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); send_len = sizeof(dclc); } dclc.hdr.length = htons(send_len); memset(&msg, 0, sizeof(msg)); vec.iov_base = &dclc; vec.iov_len = send_len; len = kernel_sendmsg(smc->clcsock, &msg, &vec, 1, send_len); if (len < 0 || len < send_len) len = -EPROTO; return len > 0 ? 0 : len; } /* send CLC PROPOSAL message across internal TCP socket */ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) { struct smc_clc_smcd_v2_extension *smcd_v2_ext; struct smc_clc_msg_proposal_prefix *pclc_prfx; struct smc_clc_msg_proposal *pclc_base; struct smc_clc_smcd_gid_chid *gidchids; struct smc_clc_msg_proposal_area *pclc; struct smc_clc_ipv6_prefix *ipv6_prfx; struct net *net = sock_net(&smc->sk); struct smc_clc_v2_extension *v2_ext; struct smc_clc_msg_smcd *pclc_smcd; struct smc_clc_msg_trail *trl; struct smcd_dev *smcd; int len, i, plen, rc; int reason_code = 0; struct kvec vec[8]; struct msghdr msg; pclc = kzalloc(sizeof(*pclc), GFP_KERNEL); if (!pclc) return -ENOMEM; pclc_base = &pclc->pclc_base; pclc_smcd = &pclc->pclc_smcd; pclc_prfx = &pclc->pclc_prfx; ipv6_prfx = pclc->pclc_prfx_ipv6; v2_ext = container_of(&pclc->pclc_v2_ext, struct smc_clc_v2_extension, fixed); smcd_v2_ext = container_of(&pclc->pclc_smcd_v2_ext, struct smc_clc_smcd_v2_extension, fixed); gidchids = pclc->pclc_gidchids; trl = &pclc->pclc_trl; pclc_base->hdr.version = SMC_V2; pclc_base->hdr.typev1 = ini->smc_type_v1; pclc_base->hdr.typev2 = ini->smc_type_v2; plen = sizeof(*pclc_base) + sizeof(*pclc_smcd) + sizeof(*trl); /* retrieve ip prefixes for CLC proposal msg */ if (ini->smc_type_v1 != SMC_TYPE_N) { rc = smc_clc_prfx_set(smc->clcsock, pclc_prfx, ipv6_prfx); if (rc) { if (ini->smc_type_v2 == SMC_TYPE_N) { kfree(pclc); return SMC_CLC_DECL_CNFERR; } pclc_base->hdr.typev1 = SMC_TYPE_N; } else { pclc_base->iparea_offset = htons(sizeof(*pclc_smcd)); plen += sizeof(*pclc_prfx) + pclc_prfx->ipv6_prefixes_cnt * sizeof(ipv6_prfx[0]); } } /* build SMC Proposal CLC message */ memcpy(pclc_base->hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); pclc_base->hdr.type = SMC_CLC_PROPOSAL; if (smcr_indicated(ini->smc_type_v1)) { /* add SMC-R specifics */ memcpy(pclc_base->lcl.id_for_peer, local_systemid, sizeof(local_systemid)); memcpy(pclc_base->lcl.gid, ini->ib_gid, SMC_GID_SIZE); memcpy(pclc_base->lcl.mac, &ini->ib_dev->mac[ini->ib_port - 1], ETH_ALEN); } if (smcd_indicated(ini->smc_type_v1)) { struct smcd_gid smcd_gid; /* add SMC-D specifics */ if (ini->ism_dev[0]) { smcd = ini->ism_dev[0]; smcd->ops->get_local_gid(smcd, &smcd_gid); pclc_smcd->ism.gid = htonll(smcd_gid.gid); pclc_smcd->ism.chid = htons(smc_ism_get_chid(ini->ism_dev[0])); } } if (ini->smc_type_v2 == SMC_TYPE_N) { pclc_smcd->v2_ext_offset = 0; } else { struct smc_clc_eid_entry *ueident; u16 v2_ext_offset; v2_ext->hdr.flag.release = SMC_RELEASE; v2_ext_offset = sizeof(*pclc_smcd) - offsetofend(struct smc_clc_msg_smcd, v2_ext_offset); if (ini->smc_type_v1 != SMC_TYPE_N) v2_ext_offset += sizeof(*pclc_prfx) + pclc_prfx->ipv6_prefixes_cnt * sizeof(ipv6_prfx[0]); pclc_smcd->v2_ext_offset = htons(v2_ext_offset); plen += sizeof(*v2_ext); v2_ext->feature_mask = htons(SMC_FEATURE_MASK); read_lock(&smc_clc_eid_table.lock); v2_ext->hdr.eid_cnt = smc_clc_eid_table.ueid_cnt; plen += smc_clc_eid_table.ueid_cnt * SMC_MAX_EID_LEN; i = 0; list_for_each_entry(ueident, &smc_clc_eid_table.list, list) { memcpy(v2_ext->user_eids[i++], ueident->eid, sizeof(ueident->eid)); } read_unlock(&smc_clc_eid_table.lock); } if (smcd_indicated(ini->smc_type_v2)) { struct smcd_gid smcd_gid; u8 *eid = NULL; int entry = 0; v2_ext->hdr.flag.seid = smc_clc_eid_table.seid_enabled; v2_ext->hdr.smcd_v2_ext_offset = htons(sizeof(*v2_ext) - offsetofend(struct smc_clnt_opts_area_hdr, smcd_v2_ext_offset) + v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); smc_ism_get_system_eid(&eid); if (eid && v2_ext->hdr.flag.seid) memcpy(smcd_v2_ext->system_eid, eid, SMC_MAX_EID_LEN); plen += sizeof(*smcd_v2_ext); if (ini->ism_offered_cnt) { for (i = 1; i <= ini->ism_offered_cnt; i++) { smcd = ini->ism_dev[i]; smcd->ops->get_local_gid(smcd, &smcd_gid); gidchids[entry].chid = htons(smc_ism_get_chid(ini->ism_dev[i])); gidchids[entry].gid = htonll(smcd_gid.gid); if (smc_ism_is_emulated(smcd)) { /* an Emulated-ISM device takes two * entries. CHID of the second entry * repeats that of the first entry. */ gidchids[entry + 1].chid = gidchids[entry].chid; gidchids[entry + 1].gid = htonll(smcd_gid.gid_ext); entry++; } entry++; } plen += entry * sizeof(struct smc_clc_smcd_gid_chid); } v2_ext->hdr.ism_gid_cnt = entry; } if (smcr_indicated(ini->smc_type_v2)) { memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); v2_ext->max_conns = net->smc.sysctl_max_conns_per_lgr; v2_ext->max_links = net->smc.sysctl_max_links_per_lgr; } pclc_base->hdr.length = htons(plen); memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); /* send SMC Proposal CLC message */ memset(&msg, 0, sizeof(msg)); i = 0; vec[i].iov_base = pclc_base; vec[i++].iov_len = sizeof(*pclc_base); vec[i].iov_base = pclc_smcd; vec[i++].iov_len = sizeof(*pclc_smcd); if (ini->smc_type_v1 != SMC_TYPE_N) { vec[i].iov_base = pclc_prfx; vec[i++].iov_len = sizeof(*pclc_prfx); if (pclc_prfx->ipv6_prefixes_cnt > 0) { vec[i].iov_base = ipv6_prfx; vec[i++].iov_len = pclc_prfx->ipv6_prefixes_cnt * sizeof(ipv6_prfx[0]); } } if (ini->smc_type_v2 != SMC_TYPE_N) { vec[i].iov_base = v2_ext; vec[i++].iov_len = sizeof(*v2_ext) + (v2_ext->hdr.eid_cnt * SMC_MAX_EID_LEN); if (smcd_indicated(ini->smc_type_v2)) { vec[i].iov_base = smcd_v2_ext; vec[i++].iov_len = sizeof(*smcd_v2_ext); if (ini->ism_offered_cnt) { vec[i].iov_base = gidchids; vec[i++].iov_len = v2_ext->hdr.ism_gid_cnt * sizeof(struct smc_clc_smcd_gid_chid); } } } vec[i].iov_base = trl; vec[i++].iov_len = sizeof(*trl); /* due to the few bytes needed for clc-handshake this cannot block */ len = kernel_sendmsg(smc->clcsock, &msg, vec, i, plen); if (len < 0) { smc->sk.sk_err = smc->clcsock->sk->sk_err; reason_code = -smc->sk.sk_err; } else if (len < ntohs(pclc_base->hdr.length)) { reason_code = -ENETUNREACH; smc->sk.sk_err = -reason_code; } kfree(pclc); return reason_code; } static void smcd_clc_prep_confirm_accept(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc, int first_contact, u8 version, u8 *eid, struct smc_init_info *ini, int *fce_len, struct smc_clc_first_contact_ext_v2x *fce_v2x, struct smc_clc_msg_trail *trl) { struct smcd_dev *smcd = conn->lgr->smcd; struct smcd_gid smcd_gid; u16 chid; int len; /* SMC-D specific settings */ memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)); smcd->ops->get_local_gid(smcd, &smcd_gid); clc->hdr.typev1 = SMC_TYPE_D; clc->d0.gid = htonll(smcd_gid.gid); clc->d0.token = htonll(conn->rmb_desc->token); clc->d0.dmbe_size = conn->rmbe_size_comp; clc->d0.dmbe_idx = 0; memcpy(&clc->d0.linkid, conn->lgr->id, SMC_LGR_ID_SIZE); if (version == SMC_V1) { clc->hdr.length = htons(SMCD_CLC_ACCEPT_CONFIRM_LEN); } else { chid = smc_ism_get_chid(smcd); clc->d1.chid = htons(chid); if (eid && eid[0]) memcpy(clc->d1.eid, eid, SMC_MAX_EID_LEN); if (__smc_ism_is_emulated(chid)) clc->d1.gid_ext = htonll(smcd_gid.gid_ext); len = SMCD_CLC_ACCEPT_CONFIRM_LEN_V2; if (first_contact) { *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini); len += *fce_len; } clc->hdr.length = htons(len); } memcpy(trl->eyecatcher, SMCD_EYECATCHER, sizeof(SMCD_EYECATCHER)); } static void smcr_clc_prep_confirm_accept(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc, int first_contact, u8 version, u8 *eid, struct smc_init_info *ini, int *fce_len, struct smc_clc_first_contact_ext_v2x *fce_v2x, struct smc_clc_fce_gid_ext *gle, struct smc_clc_msg_trail *trl) { struct smc_link *link = conn->lnk; int len; /* SMC-R specific settings */ memcpy(clc->hdr.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); clc->hdr.typev1 = SMC_TYPE_R; memcpy(clc->r0.lcl.id_for_peer, local_systemid, sizeof(local_systemid)); memcpy(&clc->r0.lcl.gid, link->gid, SMC_GID_SIZE); memcpy(&clc->r0.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(clc->r0.qpn, link->roce_qp->qp_num); clc->r0.rmb_rkey = htonl(conn->rmb_desc->mr[link->link_idx]->rkey); clc->r0.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */ clc->r0.rmbe_alert_token = htonl(conn->alert_token_local); switch (clc->hdr.type) { case SMC_CLC_ACCEPT: clc->r0.qp_mtu = link->path_mtu; break; case SMC_CLC_CONFIRM: clc->r0.qp_mtu = min(link->path_mtu, link->peer_mtu); break; } clc->r0.rmbe_size = conn->rmbe_size_comp; clc->r0.rmb_dma_addr = conn->rmb_desc->is_vm ? cpu_to_be64((uintptr_t)conn->rmb_desc->cpu_addr) : cpu_to_be64((u64)sg_dma_address (conn->rmb_desc->sgt[link->link_idx].sgl)); hton24(clc->r0.psn, link->psn_initial); if (version == SMC_V1) { clc->hdr.length = htons(SMCR_CLC_ACCEPT_CONFIRM_LEN); } else { if (eid && eid[0]) memcpy(clc->r1.eid, eid, SMC_MAX_EID_LEN); len = SMCR_CLC_ACCEPT_CONFIRM_LEN_V2; if (first_contact) { *fce_len = smc_clc_fill_fce_v2x(fce_v2x, ini); len += *fce_len; fce_v2x->fce_v2_base.v2_direct = !link->lgr->uses_gateway; if (clc->hdr.type == SMC_CLC_CONFIRM) { memset(gle, 0, sizeof(*gle)); gle->gid_cnt = ini->smcrv2.gidlist.len; len += sizeof(*gle); len += gle->gid_cnt * sizeof(gle->gid[0]); } } clc->hdr.length = htons(len); } memcpy(trl->eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); } /* build and send CLC CONFIRM / ACCEPT message */ static int smc_clc_send_confirm_accept(struct smc_sock *smc, struct smc_clc_msg_accept_confirm *clc, int first_contact, u8 version, u8 *eid, struct smc_init_info *ini) { struct smc_clc_first_contact_ext_v2x fce_v2x; struct smc_connection *conn = &smc->conn; struct smc_clc_fce_gid_ext gle; struct smc_clc_msg_trail trl; int i, fce_len; struct kvec vec[5]; struct msghdr msg; /* send SMC Confirm CLC msg */ clc->hdr.version = version; /* SMC version */ if (first_contact) clc->hdr.typev2 |= SMC_FIRST_CONTACT_MASK; if (conn->lgr->is_smcd) smcd_clc_prep_confirm_accept(conn, clc, first_contact, version, eid, ini, &fce_len, &fce_v2x, &trl); else smcr_clc_prep_confirm_accept(conn, clc, first_contact, version, eid, ini, &fce_len, &fce_v2x, &gle, &trl); memset(&msg, 0, sizeof(msg)); i = 0; vec[i].iov_base = clc; if (version > SMC_V1) vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? SMCD_CLC_ACCEPT_CONFIRM_LEN_V2 : SMCR_CLC_ACCEPT_CONFIRM_LEN_V2) - sizeof(trl); else vec[i++].iov_len = (clc->hdr.typev1 == SMC_TYPE_D ? SMCD_CLC_ACCEPT_CONFIRM_LEN : SMCR_CLC_ACCEPT_CONFIRM_LEN) - sizeof(trl); if (version > SMC_V1 && first_contact) { vec[i].iov_base = &fce_v2x; vec[i++].iov_len = fce_len; if (!conn->lgr->is_smcd) { if (clc->hdr.type == SMC_CLC_CONFIRM) { vec[i].iov_base = &gle; vec[i++].iov_len = sizeof(gle); vec[i].iov_base = &ini->smcrv2.gidlist.list; vec[i++].iov_len = gle.gid_cnt * sizeof(gle.gid[0]); } } } vec[i].iov_base = &trl; vec[i++].iov_len = sizeof(trl); return kernel_sendmsg(smc->clcsock, &msg, vec, 1, ntohs(clc->hdr.length)); } /* send CLC CONFIRM message across internal TCP socket */ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, u8 version, u8 *eid, struct smc_init_info *ini) { struct smc_clc_msg_accept_confirm cclc; int reason_code = 0; int len; /* send SMC Confirm CLC msg */ memset(&cclc, 0, sizeof(cclc)); cclc.hdr.type = SMC_CLC_CONFIRM; len = smc_clc_send_confirm_accept(smc, &cclc, clnt_first_contact, version, eid, ini); if (len < ntohs(cclc.hdr.length)) { if (len >= 0) { reason_code = -ENETUNREACH; smc->sk.sk_err = -reason_code; } else { smc->sk.sk_err = smc->clcsock->sk->sk_err; reason_code = -smc->sk.sk_err; } } return reason_code; } /* send CLC ACCEPT message across internal TCP socket */ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, u8 version, u8 *negotiated_eid, struct smc_init_info *ini) { struct smc_clc_msg_accept_confirm aclc; int len; memset(&aclc, 0, sizeof(aclc)); aclc.hdr.type = SMC_CLC_ACCEPT; len = smc_clc_send_confirm_accept(new_smc, &aclc, srv_first_contact, version, negotiated_eid, ini); if (len < ntohs(aclc.hdr.length)) len = len >= 0 ? -EPROTO : -new_smc->clcsock->sk->sk_err; return len > 0 ? 0 : len; } int smc_clc_srv_v2x_features_validate(struct smc_sock *smc, struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { struct smc_clc_v2_extension *pclc_v2_ext; struct net *net = sock_net(&smc->sk); ini->max_conns = SMC_CONN_PER_LGR_MAX; ini->max_links = SMC_LINKS_ADD_LNK_MAX; ini->feature_mask = SMC_FEATURE_MASK; if ((!(ini->smcd_version & SMC_V2) && !(ini->smcr_version & SMC_V2)) || ini->release_nr < SMC_RELEASE_1) return 0; pclc_v2_ext = smc_get_clc_v2_ext(pclc); if (!pclc_v2_ext) return SMC_CLC_DECL_NOV2EXT; if (ini->smcr_version & SMC_V2) { ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, net->smc.sysctl_max_conns_per_lgr); if (ini->max_conns < SMC_CONN_PER_LGR_MIN) return SMC_CLC_DECL_MAXCONNERR; ini->max_links = min_t(u8, pclc_v2_ext->max_links, net->smc.sysctl_max_links_per_lgr); if (ini->max_links < SMC_LINKS_ADD_LNK_MIN) return SMC_CLC_DECL_MAXLINKERR; } return 0; } int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, struct smc_init_info *ini) { struct smc_clc_first_contact_ext_v2x *fce_v2x = (struct smc_clc_first_contact_ext_v2x *)fce; if (ini->release_nr < SMC_RELEASE_1) return 0; if (!ini->is_smcd) { if (fce_v2x->max_conns < SMC_CONN_PER_LGR_MIN) return SMC_CLC_DECL_MAXCONNERR; ini->max_conns = fce_v2x->max_conns; if (fce_v2x->max_links > SMC_LINKS_ADD_LNK_MAX || fce_v2x->max_links < SMC_LINKS_ADD_LNK_MIN) return SMC_CLC_DECL_MAXLINKERR; ini->max_links = fce_v2x->max_links; } /* common supplemental features of server and client */ ini->feature_mask = ntohs(fce_v2x->feature_mask) & SMC_FEATURE_MASK; return 0; } int smc_clc_v2x_features_confirm_check(struct smc_clc_msg_accept_confirm *cclc, struct smc_init_info *ini) { struct smc_clc_first_contact_ext *fce = smc_get_clc_first_contact_ext(cclc, ini->is_smcd); struct smc_clc_first_contact_ext_v2x *fce_v2x = (struct smc_clc_first_contact_ext_v2x *)fce; if (cclc->hdr.version == SMC_V1 || !(cclc->hdr.typev2 & SMC_FIRST_CONTACT_MASK)) return 0; if (ini->release_nr != fce->release) return SMC_CLC_DECL_RELEASEERR; if (fce->release < SMC_RELEASE_1) return 0; if (!ini->is_smcd) { if (fce_v2x->max_conns != ini->max_conns) return SMC_CLC_DECL_MAXCONNERR; if (fce_v2x->max_links != ini->max_links) return SMC_CLC_DECL_MAXLINKERR; } /* common supplemental features returned by client */ ini->feature_mask = ntohs(fce_v2x->feature_mask); return 0; } void smc_clc_get_hostname(u8 **host) { *host = &smc_hostname[0]; } void __init smc_clc_init(void) { struct new_utsname *u; memset(smc_hostname, _S, sizeof(smc_hostname)); /* ASCII blanks */ u = utsname(); memcpy(smc_hostname, u->nodename, min_t(size_t, strlen(u->nodename), sizeof(smc_hostname))); INIT_LIST_HEAD(&smc_clc_eid_table.list); rwlock_init(&smc_clc_eid_table.lock); smc_clc_eid_table.ueid_cnt = 0; #if IS_ENABLED(CONFIG_S390) smc_clc_eid_table.seid_enabled = 1; #else smc_clc_eid_table.seid_enabled = 0; #endif } void smc_clc_exit(void) { smc_clc_ueid_remove(NULL); } |
273 62 62 9 90 28 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 | /* SPDX-License-Identifier: GPL-2.0-or-later */ /* Internal procfs definitions * * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/refcount.h> #include <linux/spinlock.h> #include <linux/atomic.h> #include <linux/binfmts.h> #include <linux/sched/coredump.h> #include <linux/sched/task.h> #include <linux/mm.h> struct ctl_table_header; struct mempolicy; /* * This is not completely implemented yet. The idea is to * create an in-memory tree (like the actual /proc filesystem * tree) of these proc_dir_entries, so that we can dynamically * add new files to /proc. * * parent/subdir are used for the directory structure (every /proc file has a * parent, but "subdir" is empty for all non-directory entries). * subdir_node is used to build the rb tree "subdir" of the parent. */ struct proc_dir_entry { /* * number of callers into module in progress; * negative -> it's going away RSN */ atomic_t in_use; refcount_t refcnt; struct list_head pde_openers; /* who did ->open, but not ->release */ /* protects ->pde_openers and all struct pde_opener instances */ spinlock_t pde_unload_lock; struct completion *pde_unload_completion; const struct inode_operations *proc_iops; union { const struct proc_ops *proc_ops; const struct file_operations *proc_dir_ops; }; const struct dentry_operations *proc_dops; union { const struct seq_operations *seq_ops; int (*single_show)(struct seq_file *, void *); }; proc_write_t write; void *data; unsigned int state_size; unsigned int low_ino; nlink_t nlink; kuid_t uid; kgid_t gid; loff_t size; struct proc_dir_entry *parent; struct rb_root subdir; struct rb_node subdir_node; char *name; umode_t mode; u8 flags; u8 namelen; char inline_name[]; } __randomize_layout; #define SIZEOF_PDE ( \ sizeof(struct proc_dir_entry) < 128 ? 128 : \ sizeof(struct proc_dir_entry) < 192 ? 192 : \ sizeof(struct proc_dir_entry) < 256 ? 256 : \ sizeof(struct proc_dir_entry) < 512 ? 512 : \ 0) #define SIZEOF_PDE_INLINE_NAME (SIZEOF_PDE - sizeof(struct proc_dir_entry)) static inline bool pde_is_permanent(const struct proc_dir_entry *pde) { return pde->flags & PROC_ENTRY_PERMANENT; } static inline void pde_make_permanent(struct proc_dir_entry *pde) { pde->flags |= PROC_ENTRY_PERMANENT; } extern struct kmem_cache *proc_dir_entry_cache; void pde_free(struct proc_dir_entry *pde); union proc_op { int (*proc_get_link)(struct dentry *, struct path *); int (*proc_show)(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *task); int lsmid; }; struct proc_inode { struct pid *pid; unsigned int fd; union proc_op op; struct proc_dir_entry *pde; struct ctl_table_header *sysctl; const struct ctl_table *sysctl_entry; struct hlist_node sibling_inodes; const struct proc_ns_operations *ns_ops; struct inode vfs_inode; } __randomize_layout; /* * General functions */ static inline struct proc_inode *PROC_I(const struct inode *inode) { return container_of(inode, struct proc_inode, vfs_inode); } static inline struct proc_dir_entry *PDE(const struct inode *inode) { return PROC_I(inode)->pde; } static inline struct pid *proc_pid(const struct inode *inode) { return PROC_I(inode)->pid; } static inline struct task_struct *get_proc_task(const struct inode *inode) { return get_pid_task(proc_pid(inode), PIDTYPE_PID); } void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t *ruid, kgid_t *rgid); unsigned name_to_int(const struct qstr *qstr); /* * Offset of the first process in the /proc root directory.. */ #define FIRST_PROCESS_ENTRY 256 /* Worst case buffer size needed for holding an integer. */ #define PROC_NUMBUF 13 /** * folio_precise_page_mapcount() - Number of mappings of this folio page. * @folio: The folio. * @page: The page. * * The number of present user page table entries that reference this page * as tracked via the RMAP: either referenced directly (PTE) or as part of * a larger area that covers this page (e.g., PMD). * * Use this function only for the calculation of existing statistics * (USS, PSS, mapcount_max) and for debugging purposes (/proc/kpagecount). * * Do not add new users. * * Returns: The number of mappings of this folio page. 0 for * folios that are not mapped to user space or are not tracked via the RMAP * (e.g., shared zeropage). */ static inline int folio_precise_page_mapcount(struct folio *folio, struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; if (page_mapcount_is_type(mapcount)) mapcount = 0; if (folio_test_large(folio)) mapcount += folio_entire_mapcount(folio); return mapcount; } /* * array.c */ extern const struct file_operations proc_tid_children_operations; extern void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape); extern int proc_tid_stat(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_tgid_stat(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_pid_status(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); extern int proc_pid_statm(struct seq_file *, struct pid_namespace *, struct pid *, struct task_struct *); /* * base.c */ extern const struct dentry_operations pid_dentry_operations; extern int pid_getattr(struct mnt_idmap *, const struct path *, struct kstat *, u32, unsigned int); extern int proc_setattr(struct mnt_idmap *, struct dentry *, struct iattr *); extern void proc_pid_evict_inode(struct proc_inode *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); extern void pid_update_inode(struct task_struct *, struct inode *); extern int pid_delete_dentry(const struct dentry *); extern int proc_pid_readdir(struct file *, struct dir_context *); struct dentry *proc_pid_lookup(struct dentry *, unsigned int); extern loff_t mem_lseek(struct file *, loff_t, int); /* Lookups */ typedef struct dentry *instantiate_t(struct dentry *, struct task_struct *, const void *); bool proc_fill_cache(struct file *, struct dir_context *, const char *, unsigned int, instantiate_t, struct task_struct *, const void *); /* * generic.c */ struct proc_dir_entry *proc_create_reg(const char *name, umode_t mode, struct proc_dir_entry **parent, void *data); struct proc_dir_entry *proc_register(struct proc_dir_entry *dir, struct proc_dir_entry *dp); extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *); extern int proc_readdir(struct file *, struct dir_context *); int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *); static inline void pde_get(struct proc_dir_entry *pde) { refcount_inc(&pde->refcnt); } extern void pde_put(struct proc_dir_entry *); static inline bool is_empty_pde(const struct proc_dir_entry *pde) { return S_ISDIR(pde->mode) && !pde->proc_iops; } extern ssize_t proc_simple_write(struct file *, const char __user *, size_t, loff_t *); /* * inode.c */ struct pde_opener { struct list_head lh; struct file *file; bool closing; struct completion *c; } __randomize_layout; extern const struct inode_operations proc_link_inode_operations; extern const struct inode_operations proc_pid_link_inode_operations; extern const struct super_operations proc_sops; void proc_init_kmemcache(void); void proc_invalidate_siblings_dcache(struct hlist_head *inodes, spinlock_t *lock); void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern void proc_entry_rundown(struct proc_dir_entry *); /* * proc_namespaces.c */ extern const struct inode_operations proc_ns_dir_inode_operations; extern const struct file_operations proc_ns_dir_operations; /* * proc_net.c */ extern const struct file_operations proc_net_operations; extern const struct inode_operations proc_net_inode_operations; #ifdef CONFIG_NET extern int proc_net_init(void); #else static inline int proc_net_init(void) { return 0; } #endif /* * proc_self.c */ extern int proc_setup_self(struct super_block *); /* * proc_thread_self.c */ extern int proc_setup_thread_self(struct super_block *); extern void proc_thread_self_init(void); /* * proc_sysctl.c */ #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); extern void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head); #else static inline void proc_sys_init(void) { } static inline void proc_sys_evict_inode(struct inode *inode, struct ctl_table_header *head) { } #endif /* * proc_tty.c */ #ifdef CONFIG_TTY extern void proc_tty_init(void); #else static inline void proc_tty_init(void) {} #endif /* * root.c */ extern struct proc_dir_entry proc_root; extern void proc_self_init(void); /* * task_[no]mmu.c */ struct mem_size_stats; struct proc_maps_private { struct inode *inode; struct task_struct *task; struct mm_struct *mm; struct vma_iterator iter; #ifdef CONFIG_NUMA struct mempolicy *task_mempolicy; #endif } __randomize_layout; struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode); extern const struct file_operations proc_pid_maps_operations; extern const struct file_operations proc_pid_numa_maps_operations; extern const struct file_operations proc_pid_smaps_operations; extern const struct file_operations proc_pid_smaps_rollup_operations; extern const struct file_operations proc_clear_refs_operations; extern const struct file_operations proc_pagemap_operations; extern unsigned long task_vsize(struct mm_struct *); extern unsigned long task_statm(struct mm_struct *, unsigned long *, unsigned long *, unsigned long *, unsigned long *); extern void task_mem(struct seq_file *, struct mm_struct *); extern const struct dentry_operations proc_net_dentry_ops; static inline void pde_force_lookup(struct proc_dir_entry *pde) { /* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */ pde->proc_dops = &proc_net_dentry_ops; } /* * Add a new procfs dentry that can't serve as a mountpoint. That should * encompass anything that is ephemeral and can just disappear while the * process is still around. */ static inline struct dentry *proc_splice_unmountable(struct inode *inode, struct dentry *dentry, const struct dentry_operations *d_ops) { d_set_d_op(dentry, d_ops); dont_mount(dentry); return d_splice_alias(inode, dentry); } |
20 10 13 66 1 1 63 61 66 14 1 11 2 11 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | // SPDX-License-Identifier: GPL-2.0-or-later /** -*- linux-c -*- *********************************************************** * Linux PPP over X/Ethernet (PPPoX/PPPoE) Sockets * * PPPoX --- Generic PPP encapsulation socket family * PPPoE --- PPP over Ethernet (RFC 2516) * * Version: 0.5.2 * * Author: Michal Ostrowski <mostrows@speakeasy.net> * * 051000 : Initialization cleanup * * License: */ #include <linux/string.h> #include <linux/module.h> #include <linux/kernel.h> #include <linux/compat.h> #include <linux/errno.h> #include <linux/netdevice.h> #include <linux/net.h> #include <linux/init.h> #include <linux/if_pppox.h> #include <linux/ppp_defs.h> #include <linux/ppp-ioctl.h> #include <linux/ppp_channel.h> #include <linux/kmod.h> #include <net/sock.h> #include <linux/uaccess.h> static const struct pppox_proto *pppox_protos[PX_MAX_PROTO + 1]; int register_pppox_proto(int proto_num, const struct pppox_proto *pp) { if (proto_num < 0 || proto_num > PX_MAX_PROTO) return -EINVAL; if (pppox_protos[proto_num]) return -EALREADY; pppox_protos[proto_num] = pp; return 0; } void unregister_pppox_proto(int proto_num) { if (proto_num >= 0 && proto_num <= PX_MAX_PROTO) pppox_protos[proto_num] = NULL; } void pppox_unbind_sock(struct sock *sk) { /* Clear connection to ppp device, if attached. */ if (sk->sk_state & (PPPOX_BOUND | PPPOX_CONNECTED)) { ppp_unregister_channel(&pppox_sk(sk)->chan); sk->sk_state = PPPOX_DEAD; } } EXPORT_SYMBOL(register_pppox_proto); EXPORT_SYMBOL(unregister_pppox_proto); EXPORT_SYMBOL(pppox_unbind_sock); int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct pppox_sock *po = pppox_sk(sk); int rc; lock_sock(sk); switch (cmd) { case PPPIOCGCHAN: { int index; rc = -ENOTCONN; if (!(sk->sk_state & PPPOX_CONNECTED)) break; rc = -EINVAL; index = ppp_channel_index(&po->chan); if (put_user(index , (int __user *) arg)) break; rc = 0; sk->sk_state |= PPPOX_BOUND; break; } default: rc = pppox_protos[sk->sk_protocol]->ioctl ? pppox_protos[sk->sk_protocol]->ioctl(sock, cmd, arg) : -ENOTTY; } release_sock(sk); return rc; } EXPORT_SYMBOL(pppox_ioctl); #ifdef CONFIG_COMPAT int pppox_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { if (cmd == PPPOEIOCSFWD32) cmd = PPPOEIOCSFWD; return pppox_ioctl(sock, cmd, (unsigned long)compat_ptr(arg)); } EXPORT_SYMBOL(pppox_compat_ioctl); #endif static int pppox_create(struct net *net, struct socket *sock, int protocol, int kern) { int rc = -EPROTOTYPE; if (protocol < 0 || protocol > PX_MAX_PROTO) goto out; rc = -EPROTONOSUPPORT; if (!pppox_protos[protocol]) request_module("net-pf-%d-proto-%d", PF_PPPOX, protocol); if (!pppox_protos[protocol] || !try_module_get(pppox_protos[protocol]->owner)) goto out; rc = pppox_protos[protocol]->create(net, sock, kern); module_put(pppox_protos[protocol]->owner); out: return rc; } static const struct net_proto_family pppox_proto_family = { .family = PF_PPPOX, .create = pppox_create, .owner = THIS_MODULE, }; static int __init pppox_init(void) { return sock_register(&pppox_proto_family); } static void __exit pppox_exit(void) { sock_unregister(PF_PPPOX); } module_init(pppox_init); module_exit(pppox_exit); MODULE_AUTHOR("Michal Ostrowski <mostrows@speakeasy.net>"); MODULE_DESCRIPTION("PPP over Ethernet driver (generic socket layer)"); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_PPPOX); |
22 53 54 55 32 53 52 83 57 2 32 22 22 22 42 38 1 3 20 20 57 59 14 118 258 44 43 20 16 16 10 171 110 61 141 140 226 225 3800 3801 1369 1366 1265 164 2 24 24 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 | // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2005-2010 IBM Corporation * * Author: * Mimi Zohar <zohar@us.ibm.com> * Kylene Hall <kjhall@us.ibm.com> * * File: evm_main.c * implements evm_inode_setxattr, evm_inode_post_setxattr, * evm_inode_removexattr, evm_verifyxattr, and evm_inode_set_acl. */ #define pr_fmt(fmt) "EVM: "fmt #include <linux/init.h> #include <linux/audit.h> #include <linux/xattr.h> #include <linux/integrity.h> #include <linux/evm.h> #include <linux/magic.h> #include <linux/posix_acl_xattr.h> #include <linux/lsm_hooks.h> #include <crypto/hash.h> #include <crypto/hash_info.h> #include <crypto/utils.h> #include "evm.h" int evm_initialized; static const char * const integrity_status_msg[] = { "pass", "pass_immutable", "fail", "fail_immutable", "no_label", "no_xattrs", "unknown" }; int evm_hmac_attrs; static struct xattr_list evm_config_default_xattrnames[] = { { .name = XATTR_NAME_SELINUX, .enabled = IS_ENABLED(CONFIG_SECURITY_SELINUX) }, { .name = XATTR_NAME_SMACK, .enabled = IS_ENABLED(CONFIG_SECURITY_SMACK) }, { .name = XATTR_NAME_SMACKEXEC, .enabled = IS_ENABLED(CONFIG_EVM_EXTRA_SMACK_XATTRS) }, { .name = XATTR_NAME_SMACKTRANSMUTE, .enabled = IS_ENABLED(CONFIG_EVM_EXTRA_SMACK_XATTRS) }, { .name = XATTR_NAME_SMACKMMAP, .enabled = IS_ENABLED(CONFIG_EVM_EXTRA_SMACK_XATTRS) }, { .name = XATTR_NAME_APPARMOR, .enabled = IS_ENABLED(CONFIG_SECURITY_APPARMOR) }, { .name = XATTR_NAME_IMA, .enabled = IS_ENABLED(CONFIG_IMA_APPRAISE) }, { .name = XATTR_NAME_CAPS, .enabled = true }, }; LIST_HEAD(evm_config_xattrnames); static int evm_fixmode __ro_after_init; static int __init evm_set_fixmode(char *str) { if (strncmp(str, "fix", 3) == 0) evm_fixmode = 1; else pr_err("invalid \"%s\" mode", str); return 1; } __setup("evm=", evm_set_fixmode); static void __init evm_init_config(void) { int i, xattrs; xattrs = ARRAY_SIZE(evm_config_default_xattrnames); pr_info("Initialising EVM extended attributes:\n"); for (i = 0; i < xattrs; i++) { pr_info("%s%s\n", evm_config_default_xattrnames[i].name, !evm_config_default_xattrnames[i].enabled ? " (disabled)" : ""); list_add_tail(&evm_config_default_xattrnames[i].list, &evm_config_xattrnames); } #ifdef CONFIG_EVM_ATTR_FSUUID evm_hmac_attrs |= EVM_ATTR_FSUUID; #endif pr_info("HMAC attrs: 0x%x\n", evm_hmac_attrs); } static bool evm_key_loaded(void) { return (bool)(evm_initialized & EVM_KEY_MASK); } /* * This function determines whether or not it is safe to ignore verification * errors, based on the ability of EVM to calculate HMACs. If the HMAC key * is not loaded, and it cannot be loaded in the future due to the * EVM_SETUP_COMPLETE initialization flag, allowing an operation despite the * attrs/xattrs being found invalid will not make them valid. */ static bool evm_hmac_disabled(void) { if (evm_initialized & EVM_INIT_HMAC) return false; if (!(evm_initialized & EVM_SETUP_COMPLETE)) return false; return true; } static int evm_find_protected_xattrs(struct dentry *dentry) { struct inode *inode = d_backing_inode(dentry); struct xattr_list *xattr; int error; int count = 0; if (!(inode->i_opflags & IOP_XATTR)) return -EOPNOTSUPP; list_for_each_entry_lockless(xattr, &evm_config_xattrnames, list) { error = __vfs_getxattr(dentry, inode, xattr->name, NULL, 0); if (error < 0) { if (error == -ENODATA) continue; return error; } count++; } return count; } static int is_unsupported_hmac_fs(struct dentry *dentry) { struct inode *inode = d_backing_inode(dentry); if (inode->i_sb->s_iflags & SB_I_EVM_HMAC_UNSUPPORTED) { pr_info_once("%s not supported\n", inode->i_sb->s_type->name); return 1; } return 0; } /* * evm_verify_hmac - calculate and compare the HMAC with the EVM xattr * * Compute the HMAC on the dentry's protected set of extended attributes * and compare it against the stored security.evm xattr. * * For performance: * - use the previoulsy retrieved xattr value and length to calculate the * HMAC.) * - cache the verification result in the iint, when available. * * Returns integrity status */ static enum integrity_status evm_verify_hmac(struct dentry *dentry, const char *xattr_name, char *xattr_value, size_t xattr_value_len) { struct evm_ima_xattr_data *xattr_data = NULL; struct signature_v2_hdr *hdr; enum integrity_status evm_status = INTEGRITY_PASS; struct evm_digest digest; struct inode *inode = d_backing_inode(dentry); struct evm_iint_cache *iint = evm_iint_inode(inode); int rc, xattr_len, evm_immutable = 0; if (iint && (iint->evm_status == INTEGRITY_PASS || iint->evm_status == INTEGRITY_PASS_IMMUTABLE)) return iint->evm_status; /* * On unsupported filesystems without EVM_INIT_X509 enabled, skip * signature verification. */ if (!(evm_initialized & EVM_INIT_X509) && is_unsupported_hmac_fs(dentry)) return INTEGRITY_UNKNOWN; /* if status is not PASS, try to check again - against -ENOMEM */ /* first need to know the sig type */ rc = vfs_getxattr_alloc(&nop_mnt_idmap, dentry, XATTR_NAME_EVM, (char **)&xattr_data, 0, GFP_NOFS); if (rc <= 0) { evm_status = INTEGRITY_FAIL; if (rc == -ENODATA) { rc = evm_find_protected_xattrs(dentry); if (rc > 0) evm_status = INTEGRITY_NOLABEL; else if (rc == 0) evm_status = INTEGRITY_NOXATTRS; /* new file */ } else if (rc == -EOPNOTSUPP) { evm_status = INTEGRITY_UNKNOWN; } goto out; } xattr_len = rc; /* check value type */ switch (xattr_data->type) { case EVM_XATTR_HMAC: if (xattr_len != sizeof(struct evm_xattr)) { evm_status = INTEGRITY_FAIL; goto out; } digest.hdr.algo = HASH_ALGO_SHA1; rc = evm_calc_hmac(dentry, xattr_name, xattr_value, xattr_value_len, &digest, iint); if (rc) break; rc = crypto_memneq(xattr_data->data, digest.digest, SHA1_DIGEST_SIZE); if (rc) rc = -EINVAL; break; case EVM_XATTR_PORTABLE_DIGSIG: evm_immutable = 1; fallthrough; case EVM_IMA_XATTR_DIGSIG: /* accept xattr with non-empty signature field */ if (xattr_len <= sizeof(struct signature_v2_hdr)) { evm_status = INTEGRITY_FAIL; goto out; } hdr = (struct signature_v2_hdr *)xattr_data; digest.hdr.algo = hdr->hash_algo; rc = evm_calc_hash(dentry, xattr_name, xattr_value, xattr_value_len, xattr_data->type, &digest, iint); if (rc) break; rc = integrity_digsig_verify(INTEGRITY_KEYRING_EVM, (const char *)xattr_data, xattr_len, digest.digest, digest.hdr.length); if (!rc) { if (xattr_data->type == EVM_XATTR_PORTABLE_DIGSIG) { if (iint) iint->flags |= EVM_IMMUTABLE_DIGSIG; evm_status = INTEGRITY_PASS_IMMUTABLE; } else if (!IS_RDONLY(inode) && !(inode->i_sb->s_readonly_remount) && !IS_IMMUTABLE(inode) && !is_unsupported_hmac_fs(dentry)) { evm_update_evmxattr(dentry, xattr_name, xattr_value, xattr_value_len); } } break; default: rc = -EINVAL; break; } if (rc) { if (rc == -ENODATA) evm_status = INTEGRITY_NOXATTRS; else if (evm_immutable) evm_status = INTEGRITY_FAIL_IMMUTABLE; else evm_status = INTEGRITY_FAIL; } pr_debug("digest: (%d) [%*phN]\n", digest.hdr.length, digest.hdr.length, digest.digest); out: if (iint) iint->evm_status = evm_status; kfree(xattr_data); return evm_status; } static int evm_protected_xattr_common(const char *req_xattr_name, bool all_xattrs) { int namelen; int found = 0; struct xattr_list *xattr; namelen = strlen(req_xattr_name); list_for_each_entry_lockless(xattr, &evm_config_xattrnames, list) { if (!all_xattrs && !xattr->enabled) continue; if ((strlen(xattr->name) == namelen) && (strncmp(req_xattr_name, xattr->name, namelen) == 0)) { found = 1; break; } if (strncmp(req_xattr_name, xattr->name + XATTR_SECURITY_PREFIX_LEN, strlen(req_xattr_name)) == 0) { found = 1; break; } } return found; } int evm_protected_xattr(const char *req_xattr_name) { return evm_protected_xattr_common(req_xattr_name, false); } int evm_protected_xattr_if_enabled(const char *req_xattr_name) { return evm_protected_xattr_common(req_xattr_name, true); } /** * evm_read_protected_xattrs - read EVM protected xattr names, lengths, values * @dentry: dentry of the read xattrs * @buffer: buffer xattr names, lengths or values are copied to * @buffer_size: size of buffer * @type: n: names, l: lengths, v: values * @canonical_fmt: data format (true: little endian, false: native format) * * Read protected xattr names (separated by |), lengths (u32) or values for a * given dentry and return the total size of copied data. If buffer is NULL, * just return the total size. * * Returns the total size on success, a negative value on error. */ int evm_read_protected_xattrs(struct dentry *dentry, u8 *buffer, int buffer_size, char type, bool canonical_fmt) { struct xattr_list *xattr; int rc, size, total_size = 0; list_for_each_entry_lockless(xattr, &evm_config_xattrnames, list) { rc = __vfs_getxattr(dentry, d_backing_inode(dentry), xattr->name, NULL, 0); if (rc < 0 && rc == -ENODATA) continue; else if (rc < 0) return rc; switch (type) { case 'n': size = strlen(xattr->name) + 1; if (buffer) { if (total_size) *(buffer + total_size - 1) = '|'; memcpy(buffer + total_size, xattr->name, size); } break; case 'l': size = sizeof(u32); if (buffer) { if (canonical_fmt) rc = (__force int)cpu_to_le32(rc); *(u32 *)(buffer + total_size) = rc; } break; case 'v': size = rc; if (buffer) { rc = __vfs_getxattr(dentry, d_backing_inode(dentry), xattr->name, buffer + total_size, buffer_size - total_size); if (rc < 0) return rc; } break; default: return -EINVAL; } total_size += size; } return total_size; } /** * evm_verifyxattr - verify the integrity of the requested xattr * @dentry: object of the verify xattr * @xattr_name: requested xattr * @xattr_value: requested xattr value * @xattr_value_len: requested xattr value length * * Calculate the HMAC for the given dentry and verify it against the stored * security.evm xattr. For performance, use the xattr value and length * previously retrieved to calculate the HMAC. * * Returns the xattr integrity status. * * This function requires the caller to lock the inode's i_mutex before it * is executed. */ enum integrity_status evm_verifyxattr(struct dentry *dentry, const char *xattr_name, void *xattr_value, size_t xattr_value_len) { if (!evm_key_loaded() || !evm_protected_xattr(xattr_name)) return INTEGRITY_UNKNOWN; return evm_verify_hmac(dentry, xattr_name, xattr_value, xattr_value_len); } EXPORT_SYMBOL_GPL(evm_verifyxattr); /* * evm_verify_current_integrity - verify the dentry's metadata integrity * @dentry: pointer to the affected dentry * * Verify and return the dentry's metadata integrity. The exceptions are * before EVM is initialized or in 'fix' mode. */ static enum integrity_status evm_verify_current_integrity(struct dentry *dentry) { struct inode *inode = d_backing_inode(dentry); if (!evm_key_loaded() || !S_ISREG(inode->i_mode) || evm_fixmode) return INTEGRITY_PASS; return evm_verify_hmac(dentry, NULL, NULL, 0); } /* * evm_xattr_change - check if passed xattr value differs from current value * @idmap: idmap of the mount * @dentry: pointer to the affected dentry * @xattr_name: requested xattr * @xattr_value: requested xattr value * @xattr_value_len: requested xattr value length * * Check if passed xattr value differs from current value. * * Returns 1 if passed xattr value differs from current value, 0 otherwise. */ static int evm_xattr_change(struct mnt_idmap *idmap, struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len) { char *xattr_data = NULL; int rc = 0; rc = vfs_getxattr_alloc(&nop_mnt_idmap, dentry, xattr_name, &xattr_data, 0, GFP_NOFS); if (rc < 0) { rc = 1; goto out; } if (rc == xattr_value_len) rc = !!memcmp(xattr_value, xattr_data, rc); else rc = 1; out: kfree(xattr_data); return rc; } /* * evm_protect_xattr - protect the EVM extended attribute * * Prevent security.evm from being modified or removed without the * necessary permissions or when the existing value is invalid. * * The posix xattr acls are 'system' prefixed, which normally would not * affect security.evm. An interesting side affect of writing posix xattr * acls is their modifying of the i_mode, which is included in security.evm. * For posix xattr acls only, permit security.evm, even if it currently * doesn't exist, to be updated unless the EVM signature is immutable. */ static int evm_protect_xattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len) { enum integrity_status evm_status; if (strcmp(xattr_name, XATTR_NAME_EVM) == 0) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (is_unsupported_hmac_fs(dentry)) return -EPERM; } else if (!evm_protected_xattr(xattr_name)) { if (!posix_xattr_acl(xattr_name)) return 0; if (is_unsupported_hmac_fs(dentry)) return 0; evm_status = evm_verify_current_integrity(dentry); if ((evm_status == INTEGRITY_PASS) || (evm_status == INTEGRITY_NOXATTRS)) return 0; goto out; } else if (is_unsupported_hmac_fs(dentry)) return 0; evm_status = evm_verify_current_integrity(dentry); if (evm_status == INTEGRITY_NOXATTRS) { struct evm_iint_cache *iint; /* Exception if the HMAC is not going to be calculated. */ if (evm_hmac_disabled()) return 0; iint = evm_iint_inode(d_backing_inode(dentry)); if (iint && (iint->flags & EVM_NEW_FILE)) return 0; /* exception for pseudo filesystems */ if (dentry->d_sb->s_magic == TMPFS_MAGIC || dentry->d_sb->s_magic == SYSFS_MAGIC) return 0; integrity_audit_msg(AUDIT_INTEGRITY_METADATA, dentry->d_inode, dentry->d_name.name, "update_metadata", integrity_status_msg[evm_status], -EPERM, 0); } out: /* Exception if the HMAC is not going to be calculated. */ if (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL || evm_status == INTEGRITY_UNKNOWN)) return 0; /* * Writing other xattrs is safe for portable signatures, as portable * signatures are immutable and can never be updated. */ if (evm_status == INTEGRITY_FAIL_IMMUTABLE) return 0; if (evm_status == INTEGRITY_PASS_IMMUTABLE && !evm_xattr_change(idmap, dentry, xattr_name, xattr_value, xattr_value_len)) return 0; if (evm_status != INTEGRITY_PASS && evm_status != INTEGRITY_PASS_IMMUTABLE) integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry), dentry->d_name.name, "appraise_metadata", integrity_status_msg[evm_status], -EPERM, 0); return evm_status == INTEGRITY_PASS ? 0 : -EPERM; } /** * evm_inode_setxattr - protect the EVM extended attribute * @idmap: idmap of the mount * @dentry: pointer to the affected dentry * @xattr_name: pointer to the affected extended attribute name * @xattr_value: pointer to the new extended attribute value * @xattr_value_len: pointer to the new extended attribute value length * @flags: flags to pass into filesystem operations * * Before allowing the 'security.evm' protected xattr to be updated, * verify the existing value is valid. As only the kernel should have * access to the EVM encrypted key needed to calculate the HMAC, prevent * userspace from writing HMAC value. Writing 'security.evm' requires * requires CAP_SYS_ADMIN privileges. */ static int evm_inode_setxattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len, int flags) { const struct evm_ima_xattr_data *xattr_data = xattr_value; /* Policy permits modification of the protected xattrs even though * there's no HMAC key loaded */ if (evm_initialized & EVM_ALLOW_METADATA_WRITES) return 0; if (strcmp(xattr_name, XATTR_NAME_EVM) == 0) { if (!xattr_value_len) return -EINVAL; if (xattr_data->type != EVM_IMA_XATTR_DIGSIG && xattr_data->type != EVM_XATTR_PORTABLE_DIGSIG) return -EPERM; } return evm_protect_xattr(idmap, dentry, xattr_name, xattr_value, xattr_value_len); } /** * evm_inode_removexattr - protect the EVM extended attribute * @idmap: idmap of the mount * @dentry: pointer to the affected dentry * @xattr_name: pointer to the affected extended attribute name * * Removing 'security.evm' requires CAP_SYS_ADMIN privileges and that * the current value is valid. */ static int evm_inode_removexattr(struct mnt_idmap *idmap, struct dentry *dentry, const char *xattr_name) { /* Policy permits modification of the protected xattrs even though * there's no HMAC key loaded */ if (evm_initialized & EVM_ALLOW_METADATA_WRITES) return 0; return evm_protect_xattr(idmap, dentry, xattr_name, NULL, 0); } #ifdef CONFIG_FS_POSIX_ACL static int evm_inode_set_acl_change(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, struct posix_acl *kacl) { int rc; umode_t mode; struct inode *inode = d_backing_inode(dentry); if (!kacl) return 1; rc = posix_acl_update_mode(idmap, inode, &mode, &kacl); if (rc || (inode->i_mode != mode)) return 1; return 0; } #else static inline int evm_inode_set_acl_change(struct mnt_idmap *idmap, struct dentry *dentry, const char *name, struct posix_acl *kacl) { return 0; } #endif /** * evm_inode_set_acl - protect the EVM extended attribute from posix acls * @idmap: idmap of the idmapped mount * @dentry: pointer to the affected dentry * @acl_name: name of the posix acl * @kacl: pointer to the posix acls * * Prevent modifying posix acls causing the EVM HMAC to be re-calculated * and 'security.evm' xattr updated, unless the existing 'security.evm' is * valid. * * Return: zero on success, -EPERM on failure. */ static int evm_inode_set_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) { enum integrity_status evm_status; /* Policy permits modification of the protected xattrs even though * there's no HMAC key loaded */ if (evm_initialized & EVM_ALLOW_METADATA_WRITES) return 0; evm_status = evm_verify_current_integrity(dentry); if ((evm_status == INTEGRITY_PASS) || (evm_status == INTEGRITY_NOXATTRS)) return 0; /* Exception if the HMAC is not going to be calculated. */ if (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL || evm_status == INTEGRITY_UNKNOWN)) return 0; /* * Writing other xattrs is safe for portable signatures, as portable * signatures are immutable and can never be updated. */ if (evm_status == INTEGRITY_FAIL_IMMUTABLE) return 0; if (evm_status == INTEGRITY_PASS_IMMUTABLE && !evm_inode_set_acl_change(idmap, dentry, acl_name, kacl)) return 0; if (evm_status != INTEGRITY_PASS_IMMUTABLE) integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry), dentry->d_name.name, "appraise_metadata", integrity_status_msg[evm_status], -EPERM, 0); return -EPERM; } /** * evm_inode_remove_acl - Protect the EVM extended attribute from posix acls * @idmap: idmap of the mount * @dentry: pointer to the affected dentry * @acl_name: name of the posix acl * * Prevent removing posix acls causing the EVM HMAC to be re-calculated * and 'security.evm' xattr updated, unless the existing 'security.evm' is * valid. * * Return: zero on success, -EPERM on failure. */ static int evm_inode_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { return evm_inode_set_acl(idmap, dentry, acl_name, NULL); } static void evm_reset_status(struct inode *inode) { struct evm_iint_cache *iint; iint = evm_iint_inode(inode); if (iint) iint->evm_status = INTEGRITY_UNKNOWN; } /** * evm_metadata_changed: Detect changes to the metadata * @inode: a file's inode * @metadata_inode: metadata inode * * On a stacked filesystem detect whether the metadata has changed. If this is * the case reset the evm_status associated with the inode that represents the * file. */ bool evm_metadata_changed(struct inode *inode, struct inode *metadata_inode) { struct evm_iint_cache *iint = evm_iint_inode(inode); bool ret = false; if (iint) { ret = (!IS_I_VERSION(metadata_inode) || integrity_inode_attrs_changed(&iint->metadata_inode, metadata_inode)); if (ret) iint->evm_status = INTEGRITY_UNKNOWN; } return ret; } /** * evm_revalidate_status - report whether EVM status re-validation is necessary * @xattr_name: pointer to the affected extended attribute name * * Report whether callers of evm_verifyxattr() should re-validate the * EVM status. * * Return true if re-validation is necessary, false otherwise. */ bool evm_revalidate_status(const char *xattr_name) { if (!evm_key_loaded()) return false; /* evm_inode_post_setattr() passes NULL */ if (!xattr_name) return true; if (!evm_protected_xattr(xattr_name) && !posix_xattr_acl(xattr_name) && strcmp(xattr_name, XATTR_NAME_EVM)) return false; return true; } /** * evm_inode_post_setxattr - update 'security.evm' to reflect the changes * @dentry: pointer to the affected dentry * @xattr_name: pointer to the affected extended attribute name * @xattr_value: pointer to the new extended attribute value * @xattr_value_len: pointer to the new extended attribute value length * @flags: flags to pass into filesystem operations * * Update the HMAC stored in 'security.evm' to reflect the change. * * No need to take the i_mutex lock here, as this function is called from * __vfs_setxattr_noperm(). The caller of which has taken the inode's * i_mutex lock. */ static void evm_inode_post_setxattr(struct dentry *dentry, const char *xattr_name, const void *xattr_value, size_t xattr_value_len, int flags) { if (!evm_revalidate_status(xattr_name)) return; evm_reset_status(dentry->d_inode); if (!strcmp(xattr_name, XATTR_NAME_EVM)) return; if (!(evm_initialized & EVM_INIT_HMAC)) return; if (is_unsupported_hmac_fs(dentry)) return; evm_update_evmxattr(dentry, xattr_name, xattr_value, xattr_value_len); } /** * evm_inode_post_set_acl - Update the EVM extended attribute from posix acls * @dentry: pointer to the affected dentry * @acl_name: name of the posix acl * @kacl: pointer to the posix acls * * Update the 'security.evm' xattr with the EVM HMAC re-calculated after setting * posix acls. */ static void evm_inode_post_set_acl(struct dentry *dentry, const char *acl_name, struct posix_acl *kacl) { return evm_inode_post_setxattr(dentry, acl_name, NULL, 0, 0); } /** * evm_inode_post_removexattr - update 'security.evm' after removing the xattr * @dentry: pointer to the affected dentry * @xattr_name: pointer to the affected extended attribute name * * Update the HMAC stored in 'security.evm' to reflect removal of the xattr. * * No need to take the i_mutex lock here, as this function is called from * vfs_removexattr() which takes the i_mutex. */ static void evm_inode_post_removexattr(struct dentry *dentry, const char *xattr_name) { if (!evm_revalidate_status(xattr_name)) return; evm_reset_status(dentry->d_inode); if (!strcmp(xattr_name, XATTR_NAME_EVM)) return; if (!(evm_initialized & EVM_INIT_HMAC)) return; evm_update_evmxattr(dentry, xattr_name, NULL, 0); } /** * evm_inode_post_remove_acl - Update the EVM extended attribute from posix acls * @idmap: idmap of the mount * @dentry: pointer to the affected dentry * @acl_name: name of the posix acl * * Update the 'security.evm' xattr with the EVM HMAC re-calculated after * removing posix acls. */ static inline void evm_inode_post_remove_acl(struct mnt_idmap *idmap, struct dentry *dentry, const char *acl_name) { evm_inode_post_removexattr(dentry, acl_name); } static int evm_attr_change(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { struct inode *inode = d_backing_inode(dentry); unsigned int ia_valid = attr->ia_valid; if (!i_uid_needs_update(idmap, attr, inode) && !i_gid_needs_update(idmap, attr, inode) && (!(ia_valid & ATTR_MODE) || attr->ia_mode == inode->i_mode)) return 0; return 1; } /** * evm_inode_setattr - prevent updating an invalid EVM extended attribute * @idmap: idmap of the mount * @dentry: pointer to the affected dentry * @attr: iattr structure containing the new file attributes * * Permit update of file attributes when files have a valid EVM signature, * except in the case of them having an immutable portable signature. */ static int evm_inode_setattr(struct mnt_idmap *idmap, struct dentry *dentry, struct iattr *attr) { unsigned int ia_valid = attr->ia_valid; enum integrity_status evm_status; /* Policy permits modification of the protected attrs even though * there's no HMAC key loaded */ if (evm_initialized & EVM_ALLOW_METADATA_WRITES) return 0; if (is_unsupported_hmac_fs(dentry)) return 0; if (!(ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))) return 0; evm_status = evm_verify_current_integrity(dentry); /* * Writing attrs is safe for portable signatures, as portable signatures * are immutable and can never be updated. */ if ((evm_status == INTEGRITY_PASS) || (evm_status == INTEGRITY_NOXATTRS) || (evm_status == INTEGRITY_FAIL_IMMUTABLE) || (evm_hmac_disabled() && (evm_status == INTEGRITY_NOLABEL || evm_status == INTEGRITY_UNKNOWN))) return 0; if (evm_status == INTEGRITY_PASS_IMMUTABLE && !evm_attr_change(idmap, dentry, attr)) return 0; integrity_audit_msg(AUDIT_INTEGRITY_METADATA, d_backing_inode(dentry), dentry->d_name.name, "appraise_metadata", integrity_status_msg[evm_status], -EPERM, 0); return -EPERM; } /** * evm_inode_post_setattr - update 'security.evm' after modifying metadata * @idmap: idmap of the idmapped mount * @dentry: pointer to the affected dentry * @ia_valid: for the UID and GID status * * For now, update the HMAC stored in 'security.evm' to reflect UID/GID * changes. * * This function is called from notify_change(), which expects the caller * to lock the inode's i_mutex. */ static void evm_inode_post_setattr(struct mnt_idmap *idmap, struct dentry *dentry, int ia_valid) { if (!evm_revalidate_status(NULL)) return; evm_reset_status(dentry->d_inode); if (!(evm_initialized & EVM_INIT_HMAC)) return; if (is_unsupported_hmac_fs(dentry)) return; if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)) evm_update_evmxattr(dentry, NULL, NULL, 0); } static int evm_inode_copy_up_xattr(struct dentry *src, const char *name) { struct evm_ima_xattr_data *xattr_data = NULL; int rc; if (strcmp(name, XATTR_NAME_EVM) != 0) return -EOPNOTSUPP; /* first need to know the sig type */ rc = vfs_getxattr_alloc(&nop_mnt_idmap, src, XATTR_NAME_EVM, (char **)&xattr_data, 0, GFP_NOFS); if (rc <= 0) return -EPERM; if (rc < offsetof(struct evm_ima_xattr_data, type) + sizeof(xattr_data->type)) return -EPERM; switch (xattr_data->type) { case EVM_XATTR_PORTABLE_DIGSIG: rc = 0; /* allow copy-up */ break; case EVM_XATTR_HMAC: case EVM_IMA_XATTR_DIGSIG: default: rc = -ECANCELED; /* discard */ } kfree(xattr_data); return rc; } /* * evm_inode_init_security - initializes security.evm HMAC value */ int evm_inode_init_security(struct inode *inode, struct inode *dir, const struct qstr *qstr, struct xattr *xattrs, int *xattr_count) { struct evm_xattr *xattr_data; struct xattr *xattr, *evm_xattr; bool evm_protected_xattrs = false; int rc; if (!(evm_initialized & EVM_INIT_HMAC) || !xattrs) return 0; /* * security_inode_init_security() makes sure that the xattrs array is * contiguous, there is enough space for security.evm, and that there is * a terminator at the end of the array. */ for (xattr = xattrs; xattr->name; xattr++) { if (evm_protected_xattr(xattr->name)) evm_protected_xattrs = true; } /* EVM xattr not needed. */ if (!evm_protected_xattrs) return 0; evm_xattr = lsm_get_xattr_slot(xattrs, xattr_count); /* * Array terminator (xattr name = NULL) must be the first non-filled * xattr slot. */ WARN_ONCE(evm_xattr != xattr, "%s: xattrs terminator is not the first non-filled slot\n", __func__); xattr_data = kzalloc(sizeof(*xattr_data), GFP_NOFS); if (!xattr_data) return -ENOMEM; xattr_data->data.type = EVM_XATTR_HMAC; rc = evm_init_hmac(inode, xattrs, xattr_data->digest); if (rc < 0) goto out; evm_xattr->value = xattr_data; evm_xattr->value_len = sizeof(*xattr_data); evm_xattr->name = XATTR_EVM_SUFFIX; return 0; out: kfree(xattr_data); return rc; } EXPORT_SYMBOL_GPL(evm_inode_init_security); static int evm_inode_alloc_security(struct inode *inode) { struct evm_iint_cache *iint = evm_iint_inode(inode); /* Called by security_inode_alloc(), it cannot be NULL. */ iint->flags = 0UL; iint->evm_status = INTEGRITY_UNKNOWN; return 0; } static void evm_file_release(struct file *file) { struct inode *inode = file_inode(file); struct evm_iint_cache *iint = evm_iint_inode(inode); fmode_t mode = file->f_mode; if (!S_ISREG(inode->i_mode) || !(mode & FMODE_WRITE)) return; if (iint && iint->flags & EVM_NEW_FILE && atomic_read(&inode->i_writecount) == 1) iint->flags &= ~EVM_NEW_FILE; } static void evm_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry) { struct inode *inode = d_backing_inode(dentry); struct evm_iint_cache *iint = evm_iint_inode(inode); if (!S_ISREG(inode->i_mode)) return; if (iint) iint->flags |= EVM_NEW_FILE; } #ifdef CONFIG_EVM_LOAD_X509 void __init evm_load_x509(void) { int rc; rc = integrity_load_x509(INTEGRITY_KEYRING_EVM, CONFIG_EVM_X509_PATH); if (!rc) evm_initialized |= EVM_INIT_X509; } #endif static int __init init_evm(void) { int error; struct list_head *pos, *q; evm_init_config(); error = integrity_init_keyring(INTEGRITY_KEYRING_EVM); if (error) goto error; error = evm_init_secfs(); if (error < 0) { pr_info("Error registering secfs\n"); goto error; } error: if (error != 0) { if (!list_empty(&evm_config_xattrnames)) { list_for_each_safe(pos, q, &evm_config_xattrnames) list_del(pos); } } return error; } static struct security_hook_list evm_hooks[] __ro_after_init = { LSM_HOOK_INIT(inode_setattr, evm_inode_setattr), LSM_HOOK_INIT(inode_post_setattr, evm_inode_post_setattr), LSM_HOOK_INIT(inode_copy_up_xattr, evm_inode_copy_up_xattr), LSM_HOOK_INIT(inode_setxattr, evm_inode_setxattr), LSM_HOOK_INIT(inode_post_setxattr, evm_inode_post_setxattr), LSM_HOOK_INIT(inode_set_acl, evm_inode_set_acl), LSM_HOOK_INIT(inode_post_set_acl, evm_inode_post_set_acl), LSM_HOOK_INIT(inode_remove_acl, evm_inode_remove_acl), LSM_HOOK_INIT(inode_post_remove_acl, evm_inode_post_remove_acl), LSM_HOOK_INIT(inode_removexattr, evm_inode_removexattr), LSM_HOOK_INIT(inode_post_removexattr, evm_inode_post_removexattr), LSM_HOOK_INIT(inode_init_security, evm_inode_init_security), LSM_HOOK_INIT(inode_alloc_security, evm_inode_alloc_security), LSM_HOOK_INIT(file_release, evm_file_release), LSM_HOOK_INIT(path_post_mknod, evm_post_path_mknod), }; static const struct lsm_id evm_lsmid = { .name = "evm", .id = LSM_ID_EVM, }; static int __init init_evm_lsm(void) { security_add_hooks(evm_hooks, ARRAY_SIZE(evm_hooks), &evm_lsmid); return 0; } struct lsm_blob_sizes evm_blob_sizes __ro_after_init = { .lbs_inode = sizeof(struct evm_iint_cache), .lbs_xattr_count = 1, }; DEFINE_LSM(evm) = { .name = "evm", .init = init_evm_lsm, .order = LSM_ORDER_LAST, .blobs = &evm_blob_sizes, }; late_initcall(init_evm); |
64 1 1 1947 92 102 101 26 25 26 145 330 2 764 672 211 213 113 1588 69 401 410 18 1 1 183 183 4 77 4 92 25 6 4 76 89 89 89 89 89 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 | /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_PAGEMAP_H #define _LINUX_PAGEMAP_H /* * Copyright 1995 Linus Torvalds */ #include <linux/mm.h> #include <linux/fs.h> #include <linux/list.h> #include <linux/highmem.h> #include <linux/compiler.h> #include <linux/uaccess.h> #include <linux/gfp.h> #include <linux/bitops.h> #include <linux/hardirq.h> /* for in_interrupt() */ #include <linux/hugetlb_inline.h> struct folio_batch; unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); static inline void invalidate_remote_inode(struct inode *inode) { if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) invalidate_mapping_pages(inode->i_mapping, 0, -1); } int invalidate_inode_pages2(struct address_space *mapping); int invalidate_inode_pages2_range(struct address_space *mapping, pgoff_t start, pgoff_t end); int kiocb_invalidate_pages(struct kiocb *iocb, size_t count); void kiocb_invalidate_post_direct_write(struct kiocb *iocb, size_t count); int filemap_invalidate_pages(struct address_space *mapping, loff_t pos, loff_t end, bool nowait); int write_inode_now(struct inode *, int sync); int filemap_fdatawrite(struct address_space *); int filemap_flush(struct address_space *); int filemap_fdatawait_keep_errors(struct address_space *mapping); int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); int filemap_fdatawait_range_keep_errors(struct address_space *mapping, loff_t start_byte, loff_t end_byte); int filemap_invalidate_inode(struct inode *inode, bool flush, loff_t start, loff_t end); static inline int filemap_fdatawait(struct address_space *mapping) { return filemap_fdatawait_range(mapping, 0, LLONG_MAX); } bool filemap_range_has_page(struct address_space *, loff_t lstart, loff_t lend); int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); int filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end); int filemap_check_errors(struct address_space *mapping); void __filemap_set_wb_err(struct address_space *mapping, int err); int filemap_fdatawrite_wbc(struct address_space *mapping, struct writeback_control *wbc); int kiocb_write_and_wait(struct kiocb *iocb, size_t count); static inline int filemap_write_and_wait(struct address_space *mapping) { return filemap_write_and_wait_range(mapping, 0, LLONG_MAX); } /** * filemap_set_wb_err - set a writeback error on an address_space * @mapping: mapping in which to set writeback error * @err: error to be set in mapping * * When writeback fails in some way, we must record that error so that * userspace can be informed when fsync and the like are called. We endeavor * to report errors on any file that was open at the time of the error. Some * internal callers also need to know when writeback errors have occurred. * * When a writeback error occurs, most filesystems will want to call * filemap_set_wb_err to record the error in the mapping so that it will be * automatically reported whenever fsync is called on the file. */ static inline void filemap_set_wb_err(struct address_space *mapping, int err) { /* Fastpath for common case of no error */ if (unlikely(err)) __filemap_set_wb_err(mapping, err); } /** * filemap_check_wb_err - has an error occurred since the mark was sampled? * @mapping: mapping to check for writeback errors * @since: previously-sampled errseq_t * * Grab the errseq_t value from the mapping, and see if it has changed "since" * the given value was sampled. * * If it has then report the latest error set, otherwise return 0. */ static inline int filemap_check_wb_err(struct address_space *mapping, errseq_t since) { return errseq_check(&mapping->wb_err, since); } /** * filemap_sample_wb_err - sample the current errseq_t to test for later errors * @mapping: mapping to be sampled * * Writeback errors are always reported relative to a particular sample point * in the past. This function provides those sample points. */ static inline errseq_t filemap_sample_wb_err(struct address_space *mapping) { return errseq_sample(&mapping->wb_err); } /** * file_sample_sb_err - sample the current errseq_t to test for later errors * @file: file pointer to be sampled * * Grab the most current superblock-level errseq_t value for the given * struct file. */ static inline errseq_t file_sample_sb_err(struct file *file) { return errseq_sample(&file->f_path.dentry->d_sb->s_wb_err); } /* * Flush file data before changing attributes. Caller must hold any locks * required to prevent further writes to this file until we're done setting * flags. */ static inline int inode_drain_writes(struct inode *inode) { inode_dio_wait(inode); return filemap_write_and_wait(inode->i_mapping); } static inline bool mapping_empty(struct address_space *mapping) { return xa_empty(&mapping->i_pages); } /* * mapping_shrinkable - test if page cache state allows inode reclaim * @mapping: the page cache mapping * * This checks the mapping's cache state for the pupose of inode * reclaim and LRU management. * * The caller is expected to hold the i_lock, but is not required to * hold the i_pages lock, which usually protects cache state. That's * because the i_lock and the list_lru lock that protect the inode and * its LRU state don't nest inside the irq-safe i_pages lock. * * Cache deletions are performed under the i_lock, which ensures that * when an inode goes empty, it will reliably get queued on the LRU. * * Cache additions do not acquire the i_lock and may race with this * check, in which case we'll report the inode as shrinkable when it * has cache pages. This is okay: the shrinker also checks the * refcount and the referenced bit, which will be elevated or set in * the process of adding new cache pages to an inode. */ static inline bool mapping_shrinkable(struct address_space *mapping) { void *head; /* * On highmem systems, there could be lowmem pressure from the * inodes before there is highmem pressure from the page * cache. Make inodes shrinkable regardless of cache state. */ if (IS_ENABLED(CONFIG_HIGHMEM)) return true; /* Cache completely empty? Shrink away. */ head = rcu_access_pointer(mapping->i_pages.xa_head); if (!head) return true; /* * The xarray stores single offset-0 entries directly in the * head pointer, which allows non-resident page cache entries * to escape the shadow shrinker's list of xarray nodes. The * inode shrinker needs to pick them up under memory pressure. */ if (!xa_is_node(head) && xa_is_value(head)) return true; return false; } /* * Bits in mapping->flags. */ enum mapping_flags { AS_EIO = 0, /* IO error on async write */ AS_ENOSPC = 1, /* ENOSPC on async write */ AS_MM_ALL_LOCKS = 2, /* under mm_take_all_locks() */ AS_UNEVICTABLE = 3, /* e.g., ramdisk, SHM_LOCK */ AS_EXITING = 4, /* final truncate in progress */ /* writeback related tags are not used */ AS_NO_WRITEBACK_TAGS = 5, AS_RELEASE_ALWAYS = 6, /* Call ->release_folio(), even if no private data */ AS_STABLE_WRITES = 7, /* must wait for writeback before modifying folio contents */ AS_INACCESSIBLE = 8, /* Do not attempt direct R/W access to the mapping */ /* Bits 16-25 are used for FOLIO_ORDER */ AS_FOLIO_ORDER_BITS = 5, AS_FOLIO_ORDER_MIN = 16, AS_FOLIO_ORDER_MAX = AS_FOLIO_ORDER_MIN + AS_FOLIO_ORDER_BITS, }; #define AS_FOLIO_ORDER_BITS_MASK ((1u << AS_FOLIO_ORDER_BITS) - 1) #define AS_FOLIO_ORDER_MIN_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MIN) #define AS_FOLIO_ORDER_MAX_MASK (AS_FOLIO_ORDER_BITS_MASK << AS_FOLIO_ORDER_MAX) #define AS_FOLIO_ORDER_MASK (AS_FOLIO_ORDER_MIN_MASK | AS_FOLIO_ORDER_MAX_MASK) /** * mapping_set_error - record a writeback error in the address_space * @mapping: the mapping in which an error should be set * @error: the error to set in the mapping * * When writeback fails in some way, we must record that error so that * userspace can be informed when fsync and the like are called. We endeavor * to report errors on any file that was open at the time of the error. Some * internal callers also need to know when writeback errors have occurred. * * When a writeback error occurs, most filesystems will want to call * mapping_set_error to record the error in the mapping so that it can be * reported when the application calls fsync(2). */ static inline void mapping_set_error(struct address_space *mapping, int error) { if (likely(!error)) return; /* Record in wb_err for checkers using errseq_t based tracking */ __filemap_set_wb_err(mapping, error); /* Record it in superblock */ if (mapping->host) errseq_set(&mapping->host->i_sb->s_wb_err, error); /* Record it in flags for now, for legacy callers */ if (error == -ENOSPC) set_bit(AS_ENOSPC, &mapping->flags); else set_bit(AS_EIO, &mapping->flags); } static inline void mapping_set_unevictable(struct address_space *mapping) { set_bit(AS_UNEVICTABLE, &mapping->flags); } static inline void mapping_clear_unevictable(struct address_space *mapping) { clear_bit(AS_UNEVICTABLE, &mapping->flags); } static inline bool mapping_unevictable(struct address_space *mapping) { return mapping && test_bit(AS_UNEVICTABLE, &mapping->flags); } static inline void mapping_set_exiting(struct address_space *mapping) { set_bit(AS_EXITING, &mapping->flags); } static inline int mapping_exiting(struct address_space *mapping) { return test_bit(AS_EXITING, &mapping->flags); } static inline void mapping_set_no_writeback_tags(struct address_space *mapping) { set_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); } static inline int mapping_use_writeback_tags(struct address_space *mapping) { return !test_bit(AS_NO_WRITEBACK_TAGS, &mapping->flags); } static inline bool mapping_release_always(const struct address_space *mapping) { return test_bit(AS_RELEASE_ALWAYS, &mapping->flags); } static inline void mapping_set_release_always(struct address_space *mapping) { set_bit(AS_RELEASE_ALWAYS, &mapping->flags); } static inline void mapping_clear_release_always(struct address_space *mapping) { clear_bit(AS_RELEASE_ALWAYS, &mapping->flags); } static inline bool mapping_stable_writes(const struct address_space *mapping) { return test_bit(AS_STABLE_WRITES, &mapping->flags); } static inline void mapping_set_stable_writes(struct address_space *mapping) { set_bit(AS_STABLE_WRITES, &mapping->flags); } static inline void mapping_clear_stable_writes(struct address_space *mapping) { clear_bit(AS_STABLE_WRITES, &mapping->flags); } static inline void mapping_set_inaccessible(struct address_space *mapping) { /* * It's expected inaccessible mappings are also unevictable. Compaction * migrate scanner (isolate_migratepages_block()) relies on this to * reduce page locking. */ set_bit(AS_UNEVICTABLE, &mapping->flags); set_bit(AS_INACCESSIBLE, &mapping->flags); } static inline bool mapping_inaccessible(struct address_space *mapping) { return test_bit(AS_INACCESSIBLE, &mapping->flags); } static inline gfp_t mapping_gfp_mask(struct address_space * mapping) { return mapping->gfp_mask; } /* Restricts the given gfp_mask to what the mapping allows. */ static inline gfp_t mapping_gfp_constraint(struct address_space *mapping, gfp_t gfp_mask) { return mapping_gfp_mask(mapping) & gfp_mask; } /* * This is non-atomic. Only to be used before the mapping is activated. * Probably needs a barrier... */ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) { m->gfp_mask = mask; } /* * There are some parts of the kernel which assume that PMD entries * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, * limit the maximum allocation order to PMD size. I'm not aware of any * assumptions about maximum order if THP are disabled, but 8 seems like * a good order (that's 1MB if you're using 4kB pages) */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define PREFERRED_MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER #else #define PREFERRED_MAX_PAGECACHE_ORDER 8 #endif /* * xas_split_alloc() does not support arbitrary orders. This implies no * 512MB THP on ARM64 with 64KB base page size. */ #define MAX_XAS_ORDER (XA_CHUNK_SHIFT * 2 - 1) #define MAX_PAGECACHE_ORDER min(MAX_XAS_ORDER, PREFERRED_MAX_PAGECACHE_ORDER) /* * mapping_max_folio_size_supported() - Check the max folio size supported * * The filesystem should call this function at mount time if there is a * requirement on the folio mapping size in the page cache. */ static inline size_t mapping_max_folio_size_supported(void) { if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return 1U << (PAGE_SHIFT + MAX_PAGECACHE_ORDER); return PAGE_SIZE; } /* * mapping_set_folio_order_range() - Set the orders supported by a file. * @mapping: The address space of the file. * @min: Minimum folio order (between 0-MAX_PAGECACHE_ORDER inclusive). * @max: Maximum folio order (between @min-MAX_PAGECACHE_ORDER inclusive). * * The filesystem should call this function in its inode constructor to * indicate which base size (min) and maximum size (max) of folio the VFS * can use to cache the contents of the file. This should only be used * if the filesystem needs special handling of folio sizes (ie there is * something the core cannot know). * Do not tune it based on, eg, i_size. * * Context: This should not be called while the inode is active as it * is non-atomic. */ static inline void mapping_set_folio_order_range(struct address_space *mapping, unsigned int min, unsigned int max) { if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return; if (min > MAX_PAGECACHE_ORDER) min = MAX_PAGECACHE_ORDER; if (max > MAX_PAGECACHE_ORDER) max = MAX_PAGECACHE_ORDER; if (max < min) max = min; mapping->flags = (mapping->flags & ~AS_FOLIO_ORDER_MASK) | (min << AS_FOLIO_ORDER_MIN) | (max << AS_FOLIO_ORDER_MAX); } static inline void mapping_set_folio_min_order(struct address_space *mapping, unsigned int min) { mapping_set_folio_order_range(mapping, min, MAX_PAGECACHE_ORDER); } /** * mapping_set_large_folios() - Indicate the file supports large folios. * @mapping: The address space of the file. * * The filesystem should call this function in its inode constructor to * indicate that the VFS can use large folios to cache the contents of * the file. * * Context: This should not be called while the inode is active as it * is non-atomic. */ static inline void mapping_set_large_folios(struct address_space *mapping) { mapping_set_folio_order_range(mapping, 0, MAX_PAGECACHE_ORDER); } static inline unsigned int mapping_max_folio_order(const struct address_space *mapping) { if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return 0; return (mapping->flags & AS_FOLIO_ORDER_MAX_MASK) >> AS_FOLIO_ORDER_MAX; } static inline unsigned int mapping_min_folio_order(const struct address_space *mapping) { if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) return 0; return (mapping->flags & AS_FOLIO_ORDER_MIN_MASK) >> AS_FOLIO_ORDER_MIN; } static inline unsigned long mapping_min_folio_nrpages(struct address_space *mapping) { return 1UL << mapping_min_folio_order(mapping); } /** * mapping_align_index() - Align index for this mapping. * @mapping: The address_space. * @index: The page index. * * The index of a folio must be naturally aligned. If you are adding a * new folio to the page cache and need to know what index to give it, * call this function. */ static inline pgoff_t mapping_align_index(struct address_space *mapping, pgoff_t index) { return round_down(index, mapping_min_folio_nrpages(mapping)); } /* * Large folio support currently depends on THP. These dependencies are * being worked on but are not yet fixed. */ static inline bool mapping_large_folio_support(struct address_space *mapping) { /* AS_FOLIO_ORDER is only reasonable for pagecache folios */ VM_WARN_ONCE((unsigned long)mapping & PAGE_MAPPING_ANON, "Anonymous mapping always supports large folio"); return mapping_max_folio_order(mapping) > 0; } /* Return the maximum folio size for this pagecache mapping, in bytes. */ static inline size_t mapping_max_folio_size(const struct address_space *mapping) { return PAGE_SIZE << mapping_max_folio_order(mapping); } static inline int filemap_nr_thps(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS return atomic_read(&mapping->nr_thps); #else return 0; #endif } static inline void filemap_nr_thps_inc(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS if (!mapping_large_folio_support(mapping)) atomic_inc(&mapping->nr_thps); #else WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0); #endif } static inline void filemap_nr_thps_dec(struct address_space *mapping) { #ifdef CONFIG_READ_ONLY_THP_FOR_FS if (!mapping_large_folio_support(mapping)) atomic_dec(&mapping->nr_thps); #else WARN_ON_ONCE(mapping_large_folio_support(mapping) == 0); #endif } struct address_space *folio_mapping(struct folio *); struct address_space *swapcache_mapping(struct folio *); /** * folio_file_mapping - Find the mapping this folio belongs to. * @folio: The folio. * * For folios which are in the page cache, return the mapping that this * page belongs to. Folios in the swap cache return the mapping of the * swap file or swap device where the data is stored. This is different * from the mapping returned by folio_mapping(). The only reason to * use it is if, like NFS, you return 0 from ->activate_swapfile. * * Do not call this for folios which aren't in the page cache or swap cache. */ static inline struct address_space *folio_file_mapping(struct folio *folio) { if (unlikely(folio_test_swapcache(folio))) return swapcache_mapping(folio); return folio->mapping; } /** * folio_flush_mapping - Find the file mapping this folio belongs to. * @folio: The folio. * * For folios which are in the page cache, return the mapping that this * page belongs to. Anonymous folios return NULL, even if they're in * the swap cache. Other kinds of folio also return NULL. * * This is ONLY used by architecture cache flushing code. If you aren't * writing cache flushing code, you want either folio_mapping() or * folio_file_mapping(). */ static inline struct address_space *folio_flush_mapping(struct folio *folio) { if (unlikely(folio_test_swapcache(folio))) return NULL; return folio_mapping(folio); } static inline struct address_space *page_file_mapping(struct page *page) { return folio_file_mapping(page_folio(page)); } /** * folio_inode - Get the host inode for this folio. * @folio: The folio. * * For folios which are in the page cache, return the inode that this folio * belongs to. * * Do not call this for folios which aren't in the page cache. */ static inline struct inode *folio_inode(struct folio *folio) { return folio->mapping->host; } /** * folio_attach_private - Attach private data to a folio. * @folio: Folio to attach data to. * @data: Data to attach to folio. * * Attaching private data to a folio increments the page's reference count. * The data must be detached before the folio will be freed. */ static inline void folio_attach_private(struct folio *folio, void *data) { folio_get(folio); folio->private = data; folio_set_private(folio); } /** * folio_change_private - Change private data on a folio. * @folio: Folio to change the data on. * @data: Data to set on the folio. * * Change the private data attached to a folio and return the old * data. The page must previously have had data attached and the data * must be detached before the folio will be freed. * * Return: Data that was previously attached to the folio. */ static inline void *folio_change_private(struct folio *folio, void *data) { void *old = folio_get_private(folio); folio->private = data; return old; } /** * folio_detach_private - Detach private data from a folio. * @folio: Folio to detach data from. * * Removes the data that was previously attached to the folio and decrements * the refcount on the page. * * Return: Data that was attached to the folio. */ static inline void *folio_detach_private(struct folio *folio) { void *data = folio_get_private(folio); if (!folio_test_private(folio)) return NULL; folio_clear_private(folio); folio->private = NULL; folio_put(folio); return data; } static inline void attach_page_private(struct page *page, void *data) { folio_attach_private(page_folio(page), data); } static inline void *detach_page_private(struct page *page) { return folio_detach_private(page_folio(page)); } #ifdef CONFIG_NUMA struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order); #else static inline struct folio *filemap_alloc_folio_noprof(gfp_t gfp, unsigned int order) { return folio_alloc_noprof(gfp, order); } #endif #define filemap_alloc_folio(...) \ alloc_hooks(filemap_alloc_folio_noprof(__VA_ARGS__)) static inline struct page *__page_cache_alloc(gfp_t gfp) { return &filemap_alloc_folio(gfp, 0)->page; } static inline gfp_t readahead_gfp_mask(struct address_space *x) { return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN; } typedef int filler_t(struct file *, struct folio *); pgoff_t page_cache_next_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan); pgoff_t page_cache_prev_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan); /** * typedef fgf_t - Flags for getting folios from the page cache. * * Most users of the page cache will not need to use these flags; * there are convenience functions such as filemap_get_folio() and * filemap_lock_folio(). For users which need more control over exactly * what is done with the folios, these flags to __filemap_get_folio() * are available. * * * %FGP_ACCESSED - The folio will be marked accessed. * * %FGP_LOCK - The folio is returned locked. * * %FGP_CREAT - If no folio is present then a new folio is allocated, * added to the page cache and the VM's LRU list. The folio is * returned locked. * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the * folio is already in cache. If the folio was allocated, unlock it * before returning so the caller can do the same dance. * * %FGP_WRITE - The folio will be written to by the caller. * * %FGP_NOFS - __GFP_FS will get cleared in gfp. * * %FGP_NOWAIT - Don't block on the folio lock. * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin() * implementation. */ typedef unsigned int __bitwise fgf_t; #define FGP_ACCESSED ((__force fgf_t)0x00000001) #define FGP_LOCK ((__force fgf_t)0x00000002) #define FGP_CREAT ((__force fgf_t)0x00000004) #define FGP_WRITE ((__force fgf_t)0x00000008) #define FGP_NOFS ((__force fgf_t)0x00000010) #define FGP_NOWAIT ((__force fgf_t)0x00000020) #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) #define FGP_STABLE ((__force fgf_t)0x00000080) #define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) /** * fgf_set_order - Encode a length in the fgf_t flags. * @size: The suggested size of the folio to create. * * The caller of __filemap_get_folio() can use this to suggest a preferred * size for the folio that is created. If there is already a folio at * the index, it will be returned, no matter what its size. If a folio * is freshly created, it may be of a different size than requested * due to alignment constraints, memory pressure, or the presence of * other folios at nearby indices. */ static inline fgf_t fgf_set_order(size_t size) { unsigned int shift = ilog2(size); if (shift <= PAGE_SHIFT) return 0; return (__force fgf_t)((shift - PAGE_SHIFT) << 26); } void *filemap_get_entry(struct address_space *mapping, pgoff_t index); struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); /** * filemap_get_folio - Find and get a folio. * @mapping: The address_space to search. * @index: The page index. * * Looks up the page cache entry at @mapping & @index. If a folio is * present, it is returned with an increased refcount. * * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for * this index. Will not return a shadow, swap or DAX entry. */ static inline struct folio *filemap_get_folio(struct address_space *mapping, pgoff_t index) { return __filemap_get_folio(mapping, index, 0, 0); } /** * filemap_lock_folio - Find and lock a folio. * @mapping: The address_space to search. * @index: The page index. * * Looks up the page cache entry at @mapping & @index. If a folio is * present, it is returned locked with an increased refcount. * * Context: May sleep. * Return: A folio or ERR_PTR(-ENOENT) if there is no folio in the cache for * this index. Will not return a shadow, swap or DAX entry. */ static inline struct folio *filemap_lock_folio(struct address_space *mapping, pgoff_t index) { return __filemap_get_folio(mapping, index, FGP_LOCK, 0); } /** * filemap_grab_folio - grab a folio from the page cache * @mapping: The address space to search * @index: The page index * * Looks up the page cache entry at @mapping & @index. If no folio is found, * a new folio is created. The folio is locked, marked as accessed, and * returned. * * Return: A found or created folio. ERR_PTR(-ENOMEM) if no folio is found * and failed to create a folio. */ static inline struct folio *filemap_grab_folio(struct address_space *mapping, pgoff_t index) { return __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, mapping_gfp_mask(mapping)); } /** * find_get_page - find and get a page reference * @mapping: the address_space to search * @offset: the page index * * Looks up the page cache slot at @mapping & @offset. If there is a * page cache page, it is returned with an increased refcount. * * Otherwise, %NULL is returned. */ static inline struct page *find_get_page(struct address_space *mapping, pgoff_t offset) { return pagecache_get_page(mapping, offset, 0, 0); } static inline struct page *find_get_page_flags(struct address_space *mapping, pgoff_t offset, fgf_t fgp_flags) { return pagecache_get_page(mapping, offset, fgp_flags, 0); } /** * find_lock_page - locate, pin and lock a pagecache page * @mapping: the address_space to search * @index: the page index * * Looks up the page cache entry at @mapping & @index. If there is a * page cache page, it is returned locked and with an increased * refcount. * * Context: May sleep. * Return: A struct page or %NULL if there is no page in the cache for this * index. */ static inline struct page *find_lock_page(struct address_space *mapping, pgoff_t index) { return pagecache_get_page(mapping, index, FGP_LOCK, 0); } /** * find_or_create_page - locate or add a pagecache page * @mapping: the page's address_space * @index: the page's index into the mapping * @gfp_mask: page allocation mode * * Looks up the page cache slot at @mapping & @offset. If there is a * page cache page, it is returned locked and with an increased * refcount. * * If the page is not present, a new page is allocated using @gfp_mask * and added to the page cache and the VM's LRU list. The page is * returned locked and with an increased refcount. * * On memory exhaustion, %NULL is returned. * * find_or_create_page() may sleep, even if @gfp_flags specifies an * atomic allocation! */ static inline struct page *find_or_create_page(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask) { return pagecache_get_page(mapping, index, FGP_LOCK|FGP_ACCESSED|FGP_CREAT, gfp_mask); } /** * grab_cache_page_nowait - returns locked page at given index in given cache * @mapping: target address_space * @index: the page index * * Same as grab_cache_page(), but do not wait if the page is unavailable. * This is intended for speculative data generators, where the data can * be regenerated if the page couldn't be grabbed. This routine should * be safe to call while holding the lock for another page. * * Clear __GFP_FS when allocating the page to avoid recursion into the fs * and deadlock against the caller's locked page. */ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, pgoff_t index) { return pagecache_get_page(mapping, index, FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT, mapping_gfp_mask(mapping)); } extern pgoff_t __folio_swap_cache_index(struct folio *folio); /** * folio_index - File index of a folio. * @folio: The folio. * * For a folio which is either in the page cache or the swap cache, * return its index within the address_space it belongs to. If you know * the page is definitely in the page cache, you can look at the folio's * index directly. * * Return: The index (offset in units of pages) of a folio in its file. */ static inline pgoff_t folio_index(struct folio *folio) { if (unlikely(folio_test_swapcache(folio))) return __folio_swap_cache_index(folio); return folio->index; } /** * folio_next_index - Get the index of the next folio. * @folio: The current folio. * * Return: The index of the folio which follows this folio in the file. */ static inline pgoff_t folio_next_index(struct folio *folio) { return folio->index + folio_nr_pages(folio); } /** * folio_file_page - The page for a particular index. * @folio: The folio which contains this index. * @index: The index we want to look up. * * Sometimes after looking up a folio in the page cache, we need to * obtain the specific page for an index (eg a page fault). * * Return: The page containing the file data for this index. */ static inline struct page *folio_file_page(struct folio *folio, pgoff_t index) { return folio_page(folio, index & (folio_nr_pages(folio) - 1)); } /** * folio_contains - Does this folio contain this index? * @folio: The folio. * @index: The page index within the file. * * Context: The caller should have the page locked in order to prevent * (eg) shmem from moving the page between the page cache and swap cache * and changing its index in the middle of the operation. * Return: true or false. */ static inline bool folio_contains(struct folio *folio, pgoff_t index) { return index - folio_index(folio) < folio_nr_pages(folio); } /* * Given the page we found in the page cache, return the page corresponding * to this index in the file */ static inline struct page *find_subpage(struct page *head, pgoff_t index) { /* HugeTLBfs wants the head page regardless */ if (PageHuge(head)) return head; return head + (index & (thp_nr_pages(head) - 1)); } unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); unsigned filemap_get_folios_contig(struct address_space *mapping, pgoff_t *start, pgoff_t end, struct folio_batch *fbatch); unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start, pgoff_t end, xa_mark_t tag, struct folio_batch *fbatch); struct page *grab_cache_page_write_begin(struct address_space *mapping, pgoff_t index); /* * Returns locked page at given index in given cache, creating it if needed. */ static inline struct page *grab_cache_page(struct address_space *mapping, pgoff_t index) { return find_or_create_page(mapping, index, mapping_gfp_mask(mapping)); } struct folio *read_cache_folio(struct address_space *, pgoff_t index, filler_t *filler, struct file *file); struct folio *mapping_read_folio_gfp(struct address_space *, pgoff_t index, gfp_t flags); struct page *read_cache_page(struct address_space *, pgoff_t index, filler_t *filler, struct file *file); extern struct page * read_cache_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); static inline struct page *read_mapping_page(struct address_space *mapping, pgoff_t index, struct file *file) { return read_cache_page(mapping, index, NULL, file); } static inline struct folio *read_mapping_folio(struct address_space *mapping, pgoff_t index, struct file *file) { return read_cache_folio(mapping, index, NULL, file); } /** * page_pgoff - Calculate the logical page offset of this page. * @folio: The folio containing this page. * @page: The page which we need the offset of. * * For file pages, this is the offset from the beginning of the file * in units of PAGE_SIZE. For anonymous pages, this is the offset from * the beginning of the anon_vma in units of PAGE_SIZE. This will * return nonsense for KSM pages. * * Context: Caller must have a reference on the folio or otherwise * prevent it from being split or freed. * * Return: The offset in units of PAGE_SIZE. */ static inline pgoff_t page_pgoff(const struct folio *folio, const struct page *page) { return folio->index + folio_page_idx(folio, page); } /* * Return byte-offset into filesystem object for page. */ static inline loff_t page_offset(struct page *page) { return ((loff_t)page->index) << PAGE_SHIFT; } /** * folio_pos - Returns the byte position of this folio in its file. * @folio: The folio. */ static inline loff_t folio_pos(struct folio *folio) { return page_offset(&folio->page); } /* * Get the offset in PAGE_SIZE (even for hugetlb folios). */ static inline pgoff_t folio_pgoff(struct folio *folio) { return folio->index; } static inline pgoff_t linear_page_index(struct vm_area_struct *vma, unsigned long address) { pgoff_t pgoff; pgoff = (address - vma->vm_start) >> PAGE_SHIFT; pgoff += vma->vm_pgoff; return pgoff; } struct wait_page_key { struct folio *folio; int bit_nr; int page_match; }; struct wait_page_queue { struct folio *folio; int bit_nr; wait_queue_entry_t wait; }; static inline bool wake_page_match(struct wait_page_queue *wait_page, struct wait_page_key *key) { if (wait_page->folio != key->folio) return false; key->page_match = 1; if (wait_page->bit_nr != key->bit_nr) return false; return true; } void __folio_lock(struct folio *folio); int __folio_lock_killable(struct folio *folio); vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf); void unlock_page(struct page *page); void folio_unlock(struct folio *folio); /** * folio_trylock() - Attempt to lock a folio. * @folio: The folio to attempt to lock. * * Sometimes it is undesirable to wait for a folio to be unlocked (eg * when the locks are being taken in the wrong order, or if making * progress through a batch of folios is more important than processing * them in order). Usually folio_lock() is the correct function to call. * * Context: Any context. * Return: Whether the lock was successfully acquired. */ static inline bool folio_trylock(struct folio *folio) { return likely(!test_and_set_bit_lock(PG_locked, folio_flags(folio, 0))); } /* * Return true if the page was successfully locked */ static inline bool trylock_page(struct page *page) { return folio_trylock(page_folio(page)); } /** * folio_lock() - Lock this folio. * @folio: The folio to lock. * * The folio lock protects against many things, probably more than it * should. It is primarily held while a folio is being brought uptodate, * either from its backing file or from swap. It is also held while a * folio is being truncated from its address_space, so holding the lock * is sufficient to keep folio->mapping stable. * * The folio lock is also held while write() is modifying the page to * provide POSIX atomicity guarantees (as long as the write does not * cross a page boundary). Other modifications to the data in the folio * do not hold the folio lock and can race with writes, eg DMA and stores * to mapped pages. * * Context: May sleep. If you need to acquire the locks of two or * more folios, they must be in order of ascending index, if they are * in the same address_space. If they are in different address_spaces, * acquire the lock of the folio which belongs to the address_space which * has the lowest address in memory first. */ static inline void folio_lock(struct folio *folio) { might_sleep(); if (!folio_trylock(folio)) __folio_lock(folio); } /** * lock_page() - Lock the folio containing this page. * @page: The page to lock. * * See folio_lock() for a description of what the lock protects. * This is a legacy function and new code should probably use folio_lock() * instead. * * Context: May sleep. Pages in the same folio share a lock, so do not * attempt to lock two pages which share a folio. */ static inline void lock_page(struct page *page) { struct folio *folio; might_sleep(); folio = page_folio(page); if (!folio_trylock(folio)) __folio_lock(folio); } /** * folio_lock_killable() - Lock this folio, interruptible by a fatal signal. * @folio: The folio to lock. * * Attempts to lock the folio, like folio_lock(), except that the sleep * to acquire the lock is interruptible by a fatal signal. * * Context: May sleep; see folio_lock(). * Return: 0 if the lock was acquired; -EINTR if a fatal signal was received. */ static inline int folio_lock_killable(struct folio *folio) { might_sleep(); if (!folio_trylock(folio)) return __folio_lock_killable(folio); return 0; } /* * folio_lock_or_retry - Lock the folio, unless this would block and the * caller indicated that it can handle a retry. * * Return value and mmap_lock implications depend on flags; see * __folio_lock_or_retry(). */ static inline vm_fault_t folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf) { might_sleep(); if (!folio_trylock(folio)) return __folio_lock_or_retry(folio, vmf); return 0; } /* * This is exported only for folio_wait_locked/folio_wait_writeback, etc., * and should not be used directly. */ void folio_wait_bit(struct folio *folio, int bit_nr); int folio_wait_bit_killable(struct folio *folio, int bit_nr); /* * Wait for a folio to be unlocked. * * This must be called with the caller "holding" the folio, * ie with increased folio reference count so that the folio won't * go away during the wait. */ static inline void folio_wait_locked(struct folio *folio) { if (folio_test_locked(folio)) folio_wait_bit(folio, PG_locked); } static inline int folio_wait_locked_killable(struct folio *folio) { if (!folio_test_locked(folio)) return 0; return folio_wait_bit_killable(folio, PG_locked); } static inline void wait_on_page_locked(struct page *page) { folio_wait_locked(page_folio(page)); } void folio_end_read(struct folio *folio, bool success); void wait_on_page_writeback(struct page *page); void folio_wait_writeback(struct folio *folio); int folio_wait_writeback_killable(struct folio *folio); void end_page_writeback(struct page *page); void folio_end_writeback(struct folio *folio); void wait_for_stable_page(struct page *page); void folio_wait_stable(struct folio *folio); void __folio_mark_dirty(struct folio *folio, struct address_space *, int warn); void folio_account_cleaned(struct folio *folio, struct bdi_writeback *wb); void __folio_cancel_dirty(struct folio *folio); static inline void folio_cancel_dirty(struct folio *folio) { /* Avoid atomic ops, locking, etc. when not actually needed. */ if (folio_test_dirty(folio)) __folio_cancel_dirty(folio); } bool folio_clear_dirty_for_io(struct folio *folio); bool clear_page_dirty_for_io(struct page *page); void folio_invalidate(struct folio *folio, size_t offset, size_t length); bool noop_dirty_folio(struct address_space *mapping, struct folio *folio); #ifdef CONFIG_MIGRATION int filemap_migrate_folio(struct address_space *mapping, struct folio *dst, struct folio *src, enum migrate_mode mode); #else #define filemap_migrate_folio NULL #endif void folio_end_private_2(struct folio *folio); void folio_wait_private_2(struct folio *folio); int folio_wait_private_2_killable(struct folio *folio); /* * Add an arbitrary waiter to a page's wait queue */ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter); /* * Fault in userspace address range. */ size_t fault_in_writeable(char __user *uaddr, size_t size); size_t fault_in_subpage_writeable(char __user *uaddr, size_t size); size_t fault_in_safe_writeable(const char __user *uaddr, size_t size); size_t fault_in_readable(const char __user *uaddr, size_t size); int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp); int filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp); void filemap_remove_folio(struct folio *folio); void __filemap_remove_folio(struct folio *folio, void *shadow); void replace_page_cache_folio(struct folio *old, struct folio *new); void delete_from_page_cache_batch(struct address_space *mapping, struct folio_batch *fbatch); bool filemap_release_folio(struct folio *folio, gfp_t gfp); loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end, int whence); /* Must be non-static for BPF error injection */ int __filemap_add_folio(struct address_space *mapping, struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp); bool filemap_range_has_writeback(struct address_space *mapping, loff_t start_byte, loff_t end_byte); /** * filemap_range_needs_writeback - check if range potentially needs writeback * @mapping: address space within which to check * @start_byte: offset in bytes where the range starts * @end_byte: offset in bytes where the range ends (inclusive) * * Find at least one page in the range supplied, usually used to check if * direct writing in this range will trigger a writeback. Used by O_DIRECT * read/write with IOCB_NOWAIT, to see if the caller needs to do * filemap_write_and_wait_range() before proceeding. * * Return: %true if the caller should do filemap_write_and_wait_range() before * doing O_DIRECT to a page in this range, %false otherwise. */ static inline bool filemap_range_needs_writeback(struct address_space *mapping, loff_t start_byte, loff_t end_byte) { if (!mapping->nrpages) return false; if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) return false; return filemap_range_has_writeback(mapping, start_byte, end_byte); } /** * struct readahead_control - Describes a readahead request. * * A readahead request is for consecutive pages. Filesystems which * implement the ->readahead method should call readahead_page() or * readahead_page_batch() in a loop and attempt to start I/O against * each page in the request. * * Most of the fields in this struct are private and should be accessed * by the functions below. * * @file: The file, used primarily by network filesystems for authentication. * May be NULL if invoked internally by the filesystem. * @mapping: Readahead this filesystem object. * @ra: File readahead state. May be NULL. */ struct readahead_control { struct file *file; struct address_space *mapping; struct file_ra_state *ra; /* private: use the readahead_* accessors instead */ pgoff_t _index; unsigned int _nr_pages; unsigned int _batch_count; bool _workingset; unsigned long _pflags; }; #define DEFINE_READAHEAD(ractl, f, r, m, i) \ struct readahead_control ractl = { \ .file = f, \ .mapping = m, \ .ra = r, \ ._index = i, \ } #define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE) void page_cache_ra_unbounded(struct readahead_control *, unsigned long nr_to_read, unsigned long lookahead_count); void page_cache_sync_ra(struct readahead_control *, unsigned long req_count); void page_cache_async_ra(struct readahead_control *, struct folio *, unsigned long req_count); void readahead_expand(struct readahead_control *ractl, loff_t new_start, size_t new_len); /** * page_cache_sync_readahead - generic file readahead * @mapping: address_space which holds the pagecache and I/O vectors * @ra: file_ra_state which holds the readahead state * @file: Used by the filesystem for authentication. * @index: Index of first page to be read. * @req_count: Total number of pages being read by the caller. * * page_cache_sync_readahead() should be called when a cache miss happened: * it will submit the read. The readahead logic may decide to piggyback more * pages onto the read request if access patterns suggest it will improve * performance. */ static inline void page_cache_sync_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *file, pgoff_t index, unsigned long req_count) { DEFINE_READAHEAD(ractl, file, ra, mapping, index); page_cache_sync_ra(&ractl, req_count); } /** * page_cache_async_readahead - file readahead for marked pages * @mapping: address_space which holds the pagecache and I/O vectors * @ra: file_ra_state which holds the readahead state * @file: Used by the filesystem for authentication. * @folio: The folio which triggered the readahead call. * @req_count: Total number of pages being read by the caller. * * page_cache_async_readahead() should be called when a page is used which * is marked as PageReadahead; this is a marker to suggest that the application * has used up enough of the readahead window that we should start pulling in * more pages. */ static inline void page_cache_async_readahead(struct address_space *mapping, struct file_ra_state *ra, struct file *file, struct folio *folio, unsigned long req_count) { DEFINE_READAHEAD(ractl, file, ra, mapping, folio->index); page_cache_async_ra(&ractl, folio, req_count); } static inline struct folio *__readahead_folio(struct readahead_control *ractl) { struct folio *folio; BUG_ON(ractl->_batch_count > ractl->_nr_pages); ractl->_nr_pages -= ractl->_batch_count; ractl->_index += ractl->_batch_count; if (!ractl->_nr_pages) { ractl->_batch_count = 0; return NULL; } folio = xa_load(&ractl->mapping->i_pages, ractl->_index); VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio); ractl->_batch_count = folio_nr_pages(folio); return folio; } /** * readahead_page - Get the next page to read. * @ractl: The current readahead request. * * Context: The page is locked and has an elevated refcount. The caller * should decreases the refcount once the page has been submitted for I/O * and unlock the page once all I/O to that page has completed. * Return: A pointer to the next page, or %NULL if we are done. */ static inline struct page *readahead_page(struct readahead_control *ractl) { struct folio *folio = __readahead_folio(ractl); return &folio->page; } /** * readahead_folio - Get the next folio to read. * @ractl: The current readahead request. * * Context: The folio is locked. The caller should unlock the folio once * all I/O to that folio has completed. * Return: A pointer to the next folio, or %NULL if we are done. */ static inline struct folio *readahead_folio(struct readahead_control *ractl) { struct folio *folio = __readahead_folio(ractl); if (folio) folio_put(folio); return folio; } static inline unsigned int __readahead_batch(struct readahead_control *rac, struct page **array, unsigned int array_sz) { unsigned int i = 0; XA_STATE(xas, &rac->mapping->i_pages, 0); struct page *page; BUG_ON(rac->_batch_count > rac->_nr_pages); rac->_nr_pages -= rac->_batch_count; rac->_index += rac->_batch_count; rac->_batch_count = 0; xas_set(&xas, rac->_index); rcu_read_lock(); xas_for_each(&xas, page, rac->_index + rac->_nr_pages - 1) { if (xas_retry(&xas, page)) continue; VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageTail(page), page); array[i++] = page; rac->_batch_count += thp_nr_pages(page); if (i == array_sz) break; } rcu_read_unlock(); return i; } /** * readahead_page_batch - Get a batch of pages to read. * @rac: The current readahead request. * @array: An array of pointers to struct page. * * Context: The pages are locked and have an elevated refcount. The caller * should decreases the refcount once the page has been submitted for I/O * and unlock the page once all I/O to that page has completed. * Return: The number of pages placed in the array. 0 indicates the request * is complete. */ #define readahead_page_batch(rac, array) \ __readahead_batch(rac, array, ARRAY_SIZE(array)) /** * readahead_pos - The byte offset into the file of this readahead request. * @rac: The readahead request. */ static inline loff_t readahead_pos(struct readahead_control *rac) { return (loff_t)rac->_index * PAGE_SIZE; } /** * readahead_length - The number of bytes in this readahead request. * @rac: The readahead request. */ static inline size_t readahead_length(struct readahead_control *rac) { return rac->_nr_pages * PAGE_SIZE; } /** * readahead_index - The index of the first page in this readahead request. * @rac: The readahead request. */ static inline pgoff_t readahead_index(struct readahead_control *rac) { return rac->_index; } /** * readahead_count - The number of pages in this readahead request. * @rac: The readahead request. */ static inline unsigned int readahead_count(struct readahead_control *rac) { return rac->_nr_pages; } /** * readahead_batch_length - The number of bytes in the current batch. * @rac: The readahead request. */ static inline size_t readahead_batch_length(struct readahead_control *rac) { return rac->_batch_count * PAGE_SIZE; } static inline unsigned long dir_pages(struct inode *inode) { return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT; } /** * folio_mkwrite_check_truncate - check if folio was truncated * @folio: the folio to check * @inode: the inode to check the folio against * * Return: the number of bytes in the folio up to EOF, * or -EFAULT if the folio was truncated. */ static inline ssize_t folio_mkwrite_check_truncate(struct folio *folio, struct inode *inode) { loff_t size = i_size_read(inode); pgoff_t index = size >> PAGE_SHIFT; size_t offset = offset_in_folio(folio, size); if (!folio->mapping) return -EFAULT; /* folio is wholly inside EOF */ if (folio_next_index(folio) - 1 < index) return folio_size(folio); /* folio is wholly past EOF */ if (folio->index > index || !offset) return -EFAULT; /* folio is partially inside EOF */ return offset; } /** * page_mkwrite_check_truncate - check if page was truncated * @page: the page to check * @inode: the inode to check the page against * * Returns the number of bytes in the page up to EOF, * or -EFAULT if the page was truncated. */ static inline int page_mkwrite_check_truncate(struct page *page, struct inode *inode) { loff_t size = i_size_read(inode); pgoff_t index = size >> PAGE_SHIFT; int offset = offset_in_page(size); if (page->mapping != inode->i_mapping) return -EFAULT; /* page is wholly inside EOF */ if (page->index < index) return PAGE_SIZE; /* page is wholly past EOF */ if (page->index > index || !offset) return -EFAULT; /* page is partially inside EOF */ return offset; } /** * i_blocks_per_folio - How many blocks fit in this folio. * @inode: The inode which contains the blocks. * @folio: The folio. * * If the block size is larger than the size of this folio, return zero. * * Context: The caller should hold a refcount on the folio to prevent it * from being split. * Return: The number of filesystem blocks covered by this folio. */ static inline unsigned int i_blocks_per_folio(struct inode *inode, struct folio *folio) { return folio_size(folio) >> inode->i_blkbits; } #endif /* _LINUX_PAGEMAP_H */ |
136 136 136 136 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 | // SPDX-License-Identifier: GPL-2.0 #include <linux/slab.h> #include <linux/lockdep.h> #include <linux/sysfs.h> #include <linux/kobject.h> #include <linux/memory.h> #include <linux/memory-tiers.h> #include <linux/notifier.h> #include <linux/sched/sysctl.h> #include "internal.h" struct memory_tier { /* hierarchy of memory tiers */ struct list_head list; /* list of all memory types part of this tier */ struct list_head memory_types; /* * start value of abstract distance. memory tier maps * an abstract distance range, * adistance_start .. adistance_start + MEMTIER_CHUNK_SIZE */ int adistance_start; struct device dev; /* All the nodes that are part of all the lower memory tiers. */ nodemask_t lower_tier_mask; }; struct demotion_nodes { nodemask_t preferred; }; struct node_memory_type_map { struct memory_dev_type *memtype; int map_count; }; static DEFINE_MUTEX(memory_tier_lock); static LIST_HEAD(memory_tiers); /* * The list is used to store all memory types that are not created * by a device driver. */ static LIST_HEAD(default_memory_types); static struct node_memory_type_map node_memory_types[MAX_NUMNODES]; struct memory_dev_type *default_dram_type; nodemask_t default_dram_nodes __initdata = NODE_MASK_NONE; static const struct bus_type memory_tier_subsys = { .name = "memory_tiering", .dev_name = "memory_tier", }; #ifdef CONFIG_NUMA_BALANCING /** * folio_use_access_time - check if a folio reuses cpupid for page access time * @folio: folio to check * * folio's _last_cpupid field is repurposed by memory tiering. In memory * tiering mode, cpupid of slow memory folio (not toptier memory) is used to * record page access time. * * Return: the folio _last_cpupid is used to record page access time */ bool folio_use_access_time(struct folio *folio) { return (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) && !node_is_toptier(folio_nid(folio)); } #endif #ifdef CONFIG_MIGRATION static int top_tier_adistance; /* * node_demotion[] examples: * * Example 1: * * Node 0 & 1 are CPU + DRAM nodes, node 2 & 3 are PMEM nodes. * * node distances: * node 0 1 2 3 * 0 10 20 30 40 * 1 20 10 40 30 * 2 30 40 10 40 * 3 40 30 40 10 * * memory_tiers0 = 0-1 * memory_tiers1 = 2-3 * * node_demotion[0].preferred = 2 * node_demotion[1].preferred = 3 * node_demotion[2].preferred = <empty> * node_demotion[3].preferred = <empty> * * Example 2: * * Node 0 & 1 are CPU + DRAM nodes, node 2 is memory-only DRAM node. * * node distances: * node 0 1 2 * 0 10 20 30 * 1 20 10 30 * 2 30 30 10 * * memory_tiers0 = 0-2 * * node_demotion[0].preferred = <empty> * node_demotion[1].preferred = <empty> * node_demotion[2].preferred = <empty> * * Example 3: * * Node 0 is CPU + DRAM nodes, Node 1 is HBM node, node 2 is PMEM node. * * node distances: * node 0 1 2 * 0 10 20 30 * 1 20 10 40 * 2 30 40 10 * * memory_tiers0 = 1 * memory_tiers1 = 0 * memory_tiers2 = 2 * * node_demotion[0].preferred = 2 * node_demotion[1].preferred = 0 * node_demotion[2].preferred = <empty> * */ static struct demotion_nodes *node_demotion __read_mostly; #endif /* CONFIG_MIGRATION */ static BLOCKING_NOTIFIER_HEAD(mt_adistance_algorithms); /* The lock is used to protect `default_dram_perf*` info and nid. */ static DEFINE_MUTEX(default_dram_perf_lock); static bool default_dram_perf_error; static struct access_coordinate default_dram_perf; static int default_dram_perf_ref_nid = NUMA_NO_NODE; static const char *default_dram_perf_ref_source; static inline struct memory_tier *to_memory_tier(struct device *device) { return container_of(device, struct memory_tier, dev); } static __always_inline nodemask_t get_memtier_nodemask(struct memory_tier *memtier) { nodemask_t nodes = NODE_MASK_NONE; struct memory_dev_type *memtype; list_for_each_entry(memtype, &memtier->memory_types, tier_sibling) nodes_or(nodes, nodes, memtype->nodes); return nodes; } static void memory_tier_device_release(struct device *dev) { struct memory_tier *tier = to_memory_tier(dev); /* * synchronize_rcu in clear_node_memory_tier makes sure * we don't have rcu access to this memory tier. */ kfree(tier); } static ssize_t nodelist_show(struct device *dev, struct device_attribute *attr, char *buf) { int ret; nodemask_t nmask; mutex_lock(&memory_tier_lock); nmask = get_memtier_nodemask(to_memory_tier(dev)); ret = sysfs_emit(buf, "%*pbl\n", nodemask_pr_args(&nmask)); mutex_unlock(&memory_tier_lock); return ret; } static DEVICE_ATTR_RO(nodelist); static struct attribute *memtier_dev_attrs[] = { &dev_attr_nodelist.attr, NULL }; static const struct attribute_group memtier_dev_group = { .attrs = memtier_dev_attrs, }; static const struct attribute_group *memtier_dev_groups[] = { &memtier_dev_group, NULL }; static struct memory_tier *find_create_memory_tier(struct memory_dev_type *memtype) { int ret; bool found_slot = false; struct memory_tier *memtier, *new_memtier; int adistance = memtype->adistance; unsigned int memtier_adistance_chunk_size = MEMTIER_CHUNK_SIZE; lockdep_assert_held_once(&memory_tier_lock); adistance = round_down(adistance, memtier_adistance_chunk_size); /* * If the memtype is already part of a memory tier, * just return that. */ if (!list_empty(&memtype->tier_sibling)) { list_for_each_entry(memtier, &memory_tiers, list) { if (adistance == memtier->adistance_start) return memtier; } WARN_ON(1); return ERR_PTR(-EINVAL); } list_for_each_entry(memtier, &memory_tiers, list) { if (adistance == memtier->adistance_start) { goto link_memtype; } else if (adistance < memtier->adistance_start) { found_slot = true; break; } } new_memtier = kzalloc(sizeof(struct memory_tier), GFP_KERNEL); if (!new_memtier) return ERR_PTR(-ENOMEM); new_memtier->adistance_start = adistance; INIT_LIST_HEAD(&new_memtier->list); INIT_LIST_HEAD(&new_memtier->memory_types); if (found_slot) list_add_tail(&new_memtier->list, &memtier->list); else list_add_tail(&new_memtier->list, &memory_tiers); new_memtier->dev.id = adistance >> MEMTIER_CHUNK_BITS; new_memtier->dev.bus = &memory_tier_subsys; new_memtier->dev.release = memory_tier_device_release; new_memtier->dev.groups = memtier_dev_groups; ret = device_register(&new_memtier->dev); if (ret) { list_del(&new_memtier->list); put_device(&new_memtier->dev); return ERR_PTR(ret); } memtier = new_memtier; link_memtype: list_add(&memtype->tier_sibling, &memtier->memory_types); return memtier; } static struct memory_tier *__node_get_memory_tier(int node) { pg_data_t *pgdat; pgdat = NODE_DATA(node); if (!pgdat) return NULL; /* * Since we hold memory_tier_lock, we can avoid * RCU read locks when accessing the details. No * parallel updates are possible here. */ return rcu_dereference_check(pgdat->memtier, lockdep_is_held(&memory_tier_lock)); } #ifdef CONFIG_MIGRATION bool node_is_toptier(int node) { bool toptier; pg_data_t *pgdat; struct memory_tier *memtier; pgdat = NODE_DATA(node); if (!pgdat) return false; rcu_read_lock(); memtier = rcu_dereference(pgdat->memtier); if (!memtier) { toptier = true; goto out; } if (memtier->adistance_start <= top_tier_adistance) toptier = true; else toptier = false; out: rcu_read_unlock(); return toptier; } void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets) { struct memory_tier *memtier; /* * pg_data_t.memtier updates includes a synchronize_rcu() * which ensures that we either find NULL or a valid memtier * in NODE_DATA. protect the access via rcu_read_lock(); */ rcu_read_lock(); memtier = rcu_dereference(pgdat->memtier); if (memtier) *targets = memtier->lower_tier_mask; else *targets = NODE_MASK_NONE; rcu_read_unlock(); } /** * next_demotion_node() - Get the next node in the demotion path * @node: The starting node to lookup the next node * * Return: node id for next memory node in the demotion path hierarchy * from @node; NUMA_NO_NODE if @node is terminal. This does not keep * @node online or guarantee that it *continues* to be the next demotion * target. */ int next_demotion_node(int node) { struct demotion_nodes *nd; int target; if (!node_demotion) return NUMA_NO_NODE; nd = &node_demotion[node]; /* * node_demotion[] is updated without excluding this * function from running. * * Make sure to use RCU over entire code blocks if * node_demotion[] reads need to be consistent. */ rcu_read_lock(); /* * If there are multiple target nodes, just select one * target node randomly. * * In addition, we can also use round-robin to select * target node, but we should introduce another variable * for node_demotion[] to record last selected target node, * that may cause cache ping-pong due to the changing of * last target node. Or introducing per-cpu data to avoid * caching issue, which seems more complicated. So selecting * target node randomly seems better until now. */ target = node_random(&nd->preferred); rcu_read_unlock(); return target; } static void disable_all_demotion_targets(void) { struct memory_tier *memtier; int node; for_each_node_state(node, N_MEMORY) { node_demotion[node].preferred = NODE_MASK_NONE; /* * We are holding memory_tier_lock, it is safe * to access pgda->memtier. */ memtier = __node_get_memory_tier(node); if (memtier) memtier->lower_tier_mask = NODE_MASK_NONE; } /* * Ensure that the "disable" is visible across the system. * Readers will see either a combination of before+disable * state or disable+after. They will never see before and * after state together. */ synchronize_rcu(); } static void dump_demotion_targets(void) { int node; for_each_node_state(node, N_MEMORY) { struct memory_tier *memtier = __node_get_memory_tier(node); nodemask_t preferred = node_demotion[node].preferred; if (!memtier) continue; if (nodes_empty(preferred)) pr_info("Demotion targets for Node %d: null\n", node); else pr_info("Demotion targets for Node %d: preferred: %*pbl, fallback: %*pbl\n", node, nodemask_pr_args(&preferred), nodemask_pr_args(&memtier->lower_tier_mask)); } } /* * Find an automatic demotion target for all memory * nodes. Failing here is OK. It might just indicate * being at the end of a chain. */ static void establish_demotion_targets(void) { struct memory_tier *memtier; struct demotion_nodes *nd; int target = NUMA_NO_NODE, node; int distance, best_distance; nodemask_t tier_nodes, lower_tier; lockdep_assert_held_once(&memory_tier_lock); if (!node_demotion) return; disable_all_demotion_targets(); for_each_node_state(node, N_MEMORY) { best_distance = -1; nd = &node_demotion[node]; memtier = __node_get_memory_tier(node); if (!memtier || list_is_last(&memtier->list, &memory_tiers)) continue; /* * Get the lower memtier to find the demotion node list. */ memtier = list_next_entry(memtier, list); tier_nodes = get_memtier_nodemask(memtier); /* * find_next_best_node, use 'used' nodemask as a skip list. * Add all memory nodes except the selected memory tier * nodelist to skip list so that we find the best node from the * memtier nodelist. */ nodes_andnot(tier_nodes, node_states[N_MEMORY], tier_nodes); /* * Find all the nodes in the memory tier node list of same best distance. * add them to the preferred mask. We randomly select between nodes * in the preferred mask when allocating pages during demotion. */ do { target = find_next_best_node(node, &tier_nodes); if (target == NUMA_NO_NODE) break; distance = node_distance(node, target); if (distance == best_distance || best_distance == -1) { best_distance = distance; node_set(target, nd->preferred); } else { break; } } while (1); } /* * Promotion is allowed from a memory tier to higher * memory tier only if the memory tier doesn't include * compute. We want to skip promotion from a memory tier, * if any node that is part of the memory tier have CPUs. * Once we detect such a memory tier, we consider that tier * as top tiper from which promotion is not allowed. */ list_for_each_entry_reverse(memtier, &memory_tiers, list) { tier_nodes = get_memtier_nodemask(memtier); nodes_and(tier_nodes, node_states[N_CPU], tier_nodes); if (!nodes_empty(tier_nodes)) { /* * abstract distance below the max value of this memtier * is considered toptier. */ top_tier_adistance = memtier->adistance_start + MEMTIER_CHUNK_SIZE - 1; break; } } /* * Now build the lower_tier mask for each node collecting node mask from * all memory tier below it. This allows us to fallback demotion page * allocation to a set of nodes that is closer the above selected * preferred node. */ lower_tier = node_states[N_MEMORY]; list_for_each_entry(memtier, &memory_tiers, list) { /* * Keep removing current tier from lower_tier nodes, * This will remove all nodes in current and above * memory tier from the lower_tier mask. */ tier_nodes = get_memtier_nodemask(memtier); nodes_andnot(lower_tier, lower_tier, tier_nodes); memtier->lower_tier_mask = lower_tier; } dump_demotion_targets(); } #else static inline void establish_demotion_targets(void) {} #endif /* CONFIG_MIGRATION */ static inline void __init_node_memory_type(int node, struct memory_dev_type *memtype) { if (!node_memory_types[node].memtype) node_memory_types[node].memtype = memtype; /* * for each device getting added in the same NUMA node * with this specific memtype, bump the map count. We * Only take memtype device reference once, so that * changing a node memtype can be done by droping the * only reference count taken here. */ if (node_memory_types[node].memtype == memtype) { if (!node_memory_types[node].map_count++) kref_get(&memtype->kref); } } static struct memory_tier *set_node_memory_tier(int node) { struct memory_tier *memtier; struct memory_dev_type *memtype = default_dram_type; int adist = MEMTIER_ADISTANCE_DRAM; pg_data_t *pgdat = NODE_DATA(node); lockdep_assert_held_once(&memory_tier_lock); if (!node_state(node, N_MEMORY)) return ERR_PTR(-EINVAL); mt_calc_adistance(node, &adist); if (!node_memory_types[node].memtype) { memtype = mt_find_alloc_memory_type(adist, &default_memory_types); if (IS_ERR(memtype)) { memtype = default_dram_type; pr_info("Failed to allocate a memory type. Fall back.\n"); } } __init_node_memory_type(node, memtype); memtype = node_memory_types[node].memtype; node_set(node, memtype->nodes); memtier = find_create_memory_tier(memtype); if (!IS_ERR(memtier)) rcu_assign_pointer(pgdat->memtier, memtier); return memtier; } static void destroy_memory_tier(struct memory_tier *memtier) { list_del(&memtier->list); device_unregister(&memtier->dev); } static bool clear_node_memory_tier(int node) { bool cleared = false; pg_data_t *pgdat; struct memory_tier *memtier; pgdat = NODE_DATA(node); if (!pgdat) return false; /* * Make sure that anybody looking at NODE_DATA who finds * a valid memtier finds memory_dev_types with nodes still * linked to the memtier. We achieve this by waiting for * rcu read section to finish using synchronize_rcu. * This also enables us to free the destroyed memory tier * with kfree instead of kfree_rcu */ memtier = __node_get_memory_tier(node); if (memtier) { struct memory_dev_type *memtype; rcu_assign_pointer(pgdat->memtier, NULL); synchronize_rcu(); memtype = node_memory_types[node].memtype; node_clear(node, memtype->nodes); if (nodes_empty(memtype->nodes)) { list_del_init(&memtype->tier_sibling); if (list_empty(&memtier->memory_types)) destroy_memory_tier(memtier); } cleared = true; } return cleared; } static void release_memtype(struct kref *kref) { struct memory_dev_type *memtype; memtype = container_of(kref, struct memory_dev_type, kref); kfree(memtype); } struct memory_dev_type *alloc_memory_type(int adistance) { struct memory_dev_type *memtype; memtype = kmalloc(sizeof(*memtype), GFP_KERNEL); if (!memtype) return ERR_PTR(-ENOMEM); memtype->adistance = adistance; INIT_LIST_HEAD(&memtype->tier_sibling); memtype->nodes = NODE_MASK_NONE; kref_init(&memtype->kref); return memtype; } EXPORT_SYMBOL_GPL(alloc_memory_type); void put_memory_type(struct memory_dev_type *memtype) { kref_put(&memtype->kref, release_memtype); } EXPORT_SYMBOL_GPL(put_memory_type); void init_node_memory_type(int node, struct memory_dev_type *memtype) { mutex_lock(&memory_tier_lock); __init_node_memory_type(node, memtype); mutex_unlock(&memory_tier_lock); } EXPORT_SYMBOL_GPL(init_node_memory_type); void clear_node_memory_type(int node, struct memory_dev_type *memtype) { mutex_lock(&memory_tier_lock); if (node_memory_types[node].memtype == memtype || !memtype) node_memory_types[node].map_count--; /* * If we umapped all the attached devices to this node, * clear the node memory type. */ if (!node_memory_types[node].map_count) { memtype = node_memory_types[node].memtype; node_memory_types[node].memtype = NULL; put_memory_type(memtype); } mutex_unlock(&memory_tier_lock); } EXPORT_SYMBOL_GPL(clear_node_memory_type); struct memory_dev_type *mt_find_alloc_memory_type(int adist, struct list_head *memory_types) { struct memory_dev_type *mtype; list_for_each_entry(mtype, memory_types, list) if (mtype->adistance == adist) return mtype; mtype = alloc_memory_type(adist); if (IS_ERR(mtype)) return mtype; list_add(&mtype->list, memory_types); return mtype; } EXPORT_SYMBOL_GPL(mt_find_alloc_memory_type); void mt_put_memory_types(struct list_head *memory_types) { struct memory_dev_type *mtype, *mtn; list_for_each_entry_safe(mtype, mtn, memory_types, list) { list_del(&mtype->list); put_memory_type(mtype); } } EXPORT_SYMBOL_GPL(mt_put_memory_types); /* * This is invoked via `late_initcall()` to initialize memory tiers for * memory nodes, both with and without CPUs. After the initialization of * firmware and devices, adistance algorithms are expected to be provided. */ static int __init memory_tier_late_init(void) { int nid; struct memory_tier *memtier; get_online_mems(); guard(mutex)(&memory_tier_lock); /* Assign each uninitialized N_MEMORY node to a memory tier. */ for_each_node_state(nid, N_MEMORY) { /* * Some device drivers may have initialized * memory tiers, potentially bringing memory nodes * online and configuring memory tiers. * Exclude them here. */ if (node_memory_types[nid].memtype) continue; memtier = set_node_memory_tier(nid); if (IS_ERR(memtier)) continue; } establish_demotion_targets(); put_online_mems(); return 0; } late_initcall(memory_tier_late_init); static void dump_hmem_attrs(struct access_coordinate *coord, const char *prefix) { pr_info( "%sread_latency: %u, write_latency: %u, read_bandwidth: %u, write_bandwidth: %u\n", prefix, coord->read_latency, coord->write_latency, coord->read_bandwidth, coord->write_bandwidth); } int mt_set_default_dram_perf(int nid, struct access_coordinate *perf, const char *source) { guard(mutex)(&default_dram_perf_lock); if (default_dram_perf_error) return -EIO; if (perf->read_latency + perf->write_latency == 0 || perf->read_bandwidth + perf->write_bandwidth == 0) return -EINVAL; if (default_dram_perf_ref_nid == NUMA_NO_NODE) { default_dram_perf = *perf; default_dram_perf_ref_nid = nid; default_dram_perf_ref_source = kstrdup(source, GFP_KERNEL); return 0; } /* * The performance of all default DRAM nodes is expected to be * same (that is, the variation is less than 10%). And it * will be used as base to calculate the abstract distance of * other memory nodes. */ if (abs(perf->read_latency - default_dram_perf.read_latency) * 10 > default_dram_perf.read_latency || abs(perf->write_latency - default_dram_perf.write_latency) * 10 > default_dram_perf.write_latency || abs(perf->read_bandwidth - default_dram_perf.read_bandwidth) * 10 > default_dram_perf.read_bandwidth || abs(perf->write_bandwidth - default_dram_perf.write_bandwidth) * 10 > default_dram_perf.write_bandwidth) { pr_info( "memory-tiers: the performance of DRAM node %d mismatches that of the reference\n" "DRAM node %d.\n", nid, default_dram_perf_ref_nid); pr_info(" performance of reference DRAM node %d from %s:\n", default_dram_perf_ref_nid, default_dram_perf_ref_source); dump_hmem_attrs(&default_dram_perf, " "); pr_info(" performance of DRAM node %d from %s:\n", nid, source); dump_hmem_attrs(perf, " "); pr_info( " disable default DRAM node performance based abstract distance algorithm.\n"); default_dram_perf_error = true; return -EINVAL; } return 0; } int mt_perf_to_adistance(struct access_coordinate *perf, int *adist) { guard(mutex)(&default_dram_perf_lock); if (default_dram_perf_error) return -EIO; if (perf->read_latency + perf->write_latency == 0 || perf->read_bandwidth + perf->write_bandwidth == 0) return -EINVAL; if (default_dram_perf_ref_nid == NUMA_NO_NODE) return -ENOENT; /* * The abstract distance of a memory node is in direct proportion to * its memory latency (read + write) and inversely proportional to its * memory bandwidth (read + write). The abstract distance, memory * latency, and memory bandwidth of the default DRAM nodes are used as * the base. */ *adist = MEMTIER_ADISTANCE_DRAM * (perf->read_latency + perf->write_latency) / (default_dram_perf.read_latency + default_dram_perf.write_latency) * (default_dram_perf.read_bandwidth + default_dram_perf.write_bandwidth) / (perf->read_bandwidth + perf->write_bandwidth); return 0; } EXPORT_SYMBOL_GPL(mt_perf_to_adistance); /** * register_mt_adistance_algorithm() - Register memory tiering abstract distance algorithm * @nb: The notifier block which describe the algorithm * * Return: 0 on success, errno on error. * * Every memory tiering abstract distance algorithm provider needs to * register the algorithm with register_mt_adistance_algorithm(). To * calculate the abstract distance for a specified memory node, the * notifier function will be called unless some high priority * algorithm has provided result. The prototype of the notifier * function is as follows, * * int (*algorithm_notifier)(struct notifier_block *nb, * unsigned long nid, void *data); * * Where "nid" specifies the memory node, "data" is the pointer to the * returned abstract distance (that is, "int *adist"). If the * algorithm provides the result, NOTIFY_STOP should be returned. * Otherwise, return_value & %NOTIFY_STOP_MASK == 0 to allow the next * algorithm in the chain to provide the result. */ int register_mt_adistance_algorithm(struct notifier_block *nb) { return blocking_notifier_chain_register(&mt_adistance_algorithms, nb); } EXPORT_SYMBOL_GPL(register_mt_adistance_algorithm); /** * unregister_mt_adistance_algorithm() - Unregister memory tiering abstract distance algorithm * @nb: the notifier block which describe the algorithm * * Return: 0 on success, errno on error. */ int unregister_mt_adistance_algorithm(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&mt_adistance_algorithms, nb); } EXPORT_SYMBOL_GPL(unregister_mt_adistance_algorithm); /** * mt_calc_adistance() - Calculate abstract distance with registered algorithms * @node: the node to calculate abstract distance for * @adist: the returned abstract distance * * Return: if return_value & %NOTIFY_STOP_MASK != 0, then some * abstract distance algorithm provides the result, and return it via * @adist. Otherwise, no algorithm can provide the result and @adist * will be kept as it is. */ int mt_calc_adistance(int node, int *adist) { return blocking_notifier_call_chain(&mt_adistance_algorithms, node, adist); } EXPORT_SYMBOL_GPL(mt_calc_adistance); static int __meminit memtier_hotplug_callback(struct notifier_block *self, unsigned long action, void *_arg) { struct memory_tier *memtier; struct memory_notify *arg = _arg; /* * Only update the node migration order when a node is * changing status, like online->offline. */ if (arg->status_change_nid < 0) return notifier_from_errno(0); switch (action) { case MEM_OFFLINE: mutex_lock(&memory_tier_lock); if (clear_node_memory_tier(arg->status_change_nid)) establish_demotion_targets(); mutex_unlock(&memory_tier_lock); break; case MEM_ONLINE: mutex_lock(&memory_tier_lock); memtier = set_node_memory_tier(arg->status_change_nid); if (!IS_ERR(memtier)) establish_demotion_targets(); mutex_unlock(&memory_tier_lock); break; } return notifier_from_errno(0); } static int __init memory_tier_init(void) { int ret; ret = subsys_virtual_register(&memory_tier_subsys, NULL); if (ret) panic("%s() failed to register memory tier subsystem\n", __func__); #ifdef CONFIG_MIGRATION node_demotion = kcalloc(nr_node_ids, sizeof(struct demotion_nodes), GFP_KERNEL); WARN_ON(!node_demotion); #endif mutex_lock(&memory_tier_lock); /* * For now we can have 4 faster memory tiers with smaller adistance * than default DRAM tier. */ default_dram_type = mt_find_alloc_memory_type(MEMTIER_ADISTANCE_DRAM, &default_memory_types); mutex_unlock(&memory_tier_lock); if (IS_ERR(default_dram_type)) panic("%s() failed to allocate default DRAM tier\n", __func__); /* Record nodes with memory and CPU to set default DRAM performance. */ nodes_and(default_dram_nodes, node_states[N_MEMORY], node_states[N_CPU]); hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI); return 0; } subsys_initcall(memory_tier_init); bool numa_demotion_enabled = false; #ifdef CONFIG_MIGRATION #ifdef CONFIG_SYSFS static ssize_t demotion_enabled_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sysfs_emit(buf, "%s\n", str_true_false(numa_demotion_enabled)); } static ssize_t demotion_enabled_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t count) { ssize_t ret; ret = kstrtobool(buf, &numa_demotion_enabled); if (ret) return ret; return count; } static struct kobj_attribute numa_demotion_enabled_attr = __ATTR_RW(demotion_enabled); static struct attribute *numa_attrs[] = { &numa_demotion_enabled_attr.attr, NULL, }; static const struct attribute_group numa_attr_group = { .attrs = numa_attrs, }; static int __init numa_init_sysfs(void) { int err; struct kobject *numa_kobj; numa_kobj = kobject_create_and_add("numa", mm_kobj); if (!numa_kobj) { pr_err("failed to create numa kobject\n"); return -ENOMEM; } err = sysfs_create_group(numa_kobj, &numa_attr_group); if (err) { pr_err("failed to register numa group\n"); goto delete_obj; } return 0; delete_obj: kobject_put(numa_kobj); return err; } subsys_initcall(numa_init_sysfs); #endif /* CONFIG_SYSFS */ #endif |
3 14 11 2 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 | // SPDX-License-Identifier: GPL-2.0 /* Parts of this driver are based on the following: * - Kvaser linux leaf driver (version 4.78) * - CAN driver for esd CAN-USB/2 * - Kvaser linux usbcanII driver (version 5.3) * - Kvaser linux mhydra driver (version 5.24) * * Copyright (C) 2002-2018 KVASER AB, Sweden. All rights reserved. * Copyright (C) 2010 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh * Copyright (C) 2012 Olivier Sobrie <olivier@sobrie.be> * Copyright (C) 2015 Valeo S.A. */ #include <linux/completion.h> #include <linux/device.h> #include <linux/ethtool.h> #include <linux/gfp.h> #include <linux/if.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/spinlock.h> #include <linux/types.h> #include <linux/usb.h> #include <linux/can.h> #include <linux/can/dev.h> #include <linux/can/error.h> #include <linux/can/netlink.h> #include "kvaser_usb.h" /* Kvaser USB vendor id. */ #define KVASER_VENDOR_ID 0x0bfd /* Kvaser Leaf USB devices product ids */ #define USB_LEAF_DEVEL_PRODUCT_ID 0x000a #define USB_LEAF_LITE_PRODUCT_ID 0x000b #define USB_LEAF_PRO_PRODUCT_ID 0x000c #define USB_LEAF_SPRO_PRODUCT_ID 0x000e #define USB_LEAF_PRO_LS_PRODUCT_ID 0x000f #define USB_LEAF_PRO_SWC_PRODUCT_ID 0x0010 #define USB_LEAF_PRO_LIN_PRODUCT_ID 0x0011 #define USB_LEAF_SPRO_LS_PRODUCT_ID 0x0012 #define USB_LEAF_SPRO_SWC_PRODUCT_ID 0x0013 #define USB_MEMO2_DEVEL_PRODUCT_ID 0x0016 #define USB_MEMO2_HSHS_PRODUCT_ID 0x0017 #define USB_UPRO_HSHS_PRODUCT_ID 0x0018 #define USB_LEAF_LITE_GI_PRODUCT_ID 0x0019 #define USB_LEAF_PRO_OBDII_PRODUCT_ID 0x001a #define USB_MEMO2_HSLS_PRODUCT_ID 0x001b #define USB_LEAF_LITE_CH_PRODUCT_ID 0x001c #define USB_BLACKBIRD_SPRO_PRODUCT_ID 0x001d #define USB_OEM_MERCURY_PRODUCT_ID 0x0022 #define USB_OEM_LEAF_PRODUCT_ID 0x0023 #define USB_CAN_R_PRODUCT_ID 0x0027 #define USB_LEAF_LITE_V2_PRODUCT_ID 0x0120 #define USB_MINI_PCIE_HS_PRODUCT_ID 0x0121 #define USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID 0x0122 #define USB_USBCAN_LIGHT_2HS_PRODUCT_ID 0x0123 #define USB_MINI_PCIE_2HS_PRODUCT_ID 0x0124 #define USB_USBCAN_R_V2_PRODUCT_ID 0x0126 #define USB_LEAF_LIGHT_R_V2_PRODUCT_ID 0x0127 #define USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID 0x0128 /* Kvaser USBCan-II devices product ids */ #define USB_USBCAN_REVB_PRODUCT_ID 0x0002 #define USB_VCI2_PRODUCT_ID 0x0003 #define USB_USBCAN2_PRODUCT_ID 0x0004 #define USB_MEMORATOR_PRODUCT_ID 0x0005 /* Kvaser Minihydra USB devices product ids */ #define USB_BLACKBIRD_V2_PRODUCT_ID 0x0102 #define USB_MEMO_PRO_5HS_PRODUCT_ID 0x0104 #define USB_USBCAN_PRO_5HS_PRODUCT_ID 0x0105 #define USB_USBCAN_LIGHT_4HS_PRODUCT_ID 0x0106 #define USB_LEAF_PRO_HS_V2_PRODUCT_ID 0x0107 #define USB_USBCAN_PRO_2HS_V2_PRODUCT_ID 0x0108 #define USB_MEMO_2HS_PRODUCT_ID 0x0109 #define USB_MEMO_PRO_2HS_V2_PRODUCT_ID 0x010a #define USB_HYBRID_2CANLIN_PRODUCT_ID 0x010b #define USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID 0x010c #define USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID 0x010d #define USB_HYBRID_PRO_2CANLIN_PRODUCT_ID 0x010e #define USB_U100_PRODUCT_ID 0x0111 #define USB_U100P_PRODUCT_ID 0x0112 #define USB_U100S_PRODUCT_ID 0x0113 #define USB_USBCAN_PRO_4HS_PRODUCT_ID 0x0114 #define USB_HYBRID_CANLIN_PRODUCT_ID 0x0115 #define USB_HYBRID_PRO_CANLIN_PRODUCT_ID 0x0116 #define USB_LEAF_V3_PRODUCT_ID 0x0117 #define USB_VINING_800_PRODUCT_ID 0x0119 #define USB_USBCAN_PRO_5XCAN_PRODUCT_ID 0x011A #define USB_MINI_PCIE_1XCAN_PRODUCT_ID 0x011B static const struct kvaser_usb_driver_info kvaser_usb_driver_info_hydra = { .quirks = 0, .ops = &kvaser_usb_hydra_dev_ops, }; static const struct kvaser_usb_driver_info kvaser_usb_driver_info_usbcan = { .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | KVASER_USB_QUIRK_HAS_SILENT_MODE, .family = KVASER_USBCAN, .ops = &kvaser_usb_leaf_dev_ops, }; static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf = { .quirks = KVASER_USB_QUIRK_IGNORE_CLK_FREQ, .family = KVASER_LEAF, .ops = &kvaser_usb_leaf_dev_ops, }; static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err = { .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | KVASER_USB_QUIRK_IGNORE_CLK_FREQ, .family = KVASER_LEAF, .ops = &kvaser_usb_leaf_dev_ops, }; static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_listen = { .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS | KVASER_USB_QUIRK_HAS_SILENT_MODE | KVASER_USB_QUIRK_IGNORE_CLK_FREQ, .family = KVASER_LEAF, .ops = &kvaser_usb_leaf_dev_ops, }; static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leafimx = { .quirks = 0, .family = KVASER_LEAF, .ops = &kvaser_usb_leaf_dev_ops, }; static const struct usb_device_id kvaser_usb_table[] = { /* Leaf M32C USB product IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_SWC_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LIN_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_LS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_SWC_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_DEVEL_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSHS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_UPRO_HSHS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_OBDII_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSLS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_CH_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_SPRO_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_MERCURY_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_LEAF_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, { USB_DEVICE(KVASER_VENDOR_ID, USB_CAN_R_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err }, /* Leaf i.MX28 USB product IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_R_V2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_R_V2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx }, /* USBCANII USB product IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_REVB_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMORATOR_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, { USB_DEVICE(KVASER_VENDOR_ID, USB_VCI2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan }, /* Minihydra USB product IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_2CANLIN_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_2CANLIN_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_U100_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_U100P_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_U100S_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_4HS_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_V3_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_VINING_800_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5XCAN_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_1XCAN_PRODUCT_ID), .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra }, { } }; MODULE_DEVICE_TABLE(usb, kvaser_usb_table); int kvaser_usb_send_cmd(const struct kvaser_usb *dev, void *cmd, int len) { return usb_bulk_msg(dev->udev, usb_sndbulkpipe(dev->udev, dev->bulk_out->bEndpointAddress), cmd, len, NULL, KVASER_USB_TIMEOUT); } int kvaser_usb_recv_cmd(const struct kvaser_usb *dev, void *cmd, int len, int *actual_len) { return usb_bulk_msg(dev->udev, usb_rcvbulkpipe(dev->udev, dev->bulk_in->bEndpointAddress), cmd, len, actual_len, KVASER_USB_TIMEOUT); } static void kvaser_usb_send_cmd_callback(struct urb *urb) { struct net_device *netdev = urb->context; kfree(urb->transfer_buffer); if (urb->status) netdev_warn(netdev, "urb status received: %d\n", urb->status); } int kvaser_usb_send_cmd_async(struct kvaser_usb_net_priv *priv, void *cmd, int len) { struct kvaser_usb *dev = priv->dev; struct net_device *netdev = priv->netdev; struct urb *urb; int err; urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) return -ENOMEM; usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, dev->bulk_out->bEndpointAddress), cmd, len, kvaser_usb_send_cmd_callback, netdev); usb_anchor_urb(urb, &priv->tx_submitted); err = usb_submit_urb(urb, GFP_ATOMIC); if (err) { netdev_err(netdev, "Error transmitting URB\n"); usb_unanchor_urb(urb); } usb_free_urb(urb); return err; } int kvaser_usb_can_rx_over_error(struct net_device *netdev) { struct net_device_stats *stats = &netdev->stats; struct can_frame *cf; struct sk_buff *skb; stats->rx_over_errors++; stats->rx_errors++; skb = alloc_can_err_skb(netdev, &cf); if (!skb) { stats->rx_dropped++; netdev_warn(netdev, "No memory left for err_skb\n"); return -ENOMEM; } cf->can_id |= CAN_ERR_CRTL; cf->data[1] = CAN_ERR_CRTL_RX_OVERFLOW; netif_rx(skb); return 0; } static void kvaser_usb_read_bulk_callback(struct urb *urb) { struct kvaser_usb *dev = urb->context; const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; unsigned int i; switch (urb->status) { case 0: break; case -ENOENT: case -EPIPE: case -EPROTO: case -ESHUTDOWN: return; default: dev_info(&dev->intf->dev, "Rx URB aborted (%d)\n", urb->status); goto resubmit_urb; } ops->dev_read_bulk_callback(dev, urb->transfer_buffer, urb->actual_length); resubmit_urb: usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe(dev->udev, dev->bulk_in->bEndpointAddress), urb->transfer_buffer, KVASER_USB_RX_BUFFER_SIZE, kvaser_usb_read_bulk_callback, dev); err = usb_submit_urb(urb, GFP_ATOMIC); if (err == -ENODEV) { for (i = 0; i < dev->nchannels; i++) { if (!dev->nets[i]) continue; netif_device_detach(dev->nets[i]->netdev); } } else if (err) { dev_err(&dev->intf->dev, "Failed resubmitting read bulk urb: %d\n", err); } } static int kvaser_usb_setup_rx_urbs(struct kvaser_usb *dev) { int i, err = 0; if (dev->rxinitdone) return 0; for (i = 0; i < KVASER_USB_MAX_RX_URBS; i++) { struct urb *urb = NULL; u8 *buf = NULL; dma_addr_t buf_dma; urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { err = -ENOMEM; break; } buf = usb_alloc_coherent(dev->udev, KVASER_USB_RX_BUFFER_SIZE, GFP_KERNEL, &buf_dma); if (!buf) { dev_warn(&dev->intf->dev, "No memory left for USB buffer\n"); usb_free_urb(urb); err = -ENOMEM; break; } usb_fill_bulk_urb(urb, dev->udev, usb_rcvbulkpipe (dev->udev, dev->bulk_in->bEndpointAddress), buf, KVASER_USB_RX_BUFFER_SIZE, kvaser_usb_read_bulk_callback, dev); urb->transfer_dma = buf_dma; urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP; usb_anchor_urb(urb, &dev->rx_submitted); err = usb_submit_urb(urb, GFP_KERNEL); if (err) { usb_unanchor_urb(urb); usb_free_coherent(dev->udev, KVASER_USB_RX_BUFFER_SIZE, buf, buf_dma); usb_free_urb(urb); break; } dev->rxbuf[i] = buf; dev->rxbuf_dma[i] = buf_dma; usb_free_urb(urb); } if (i == 0) { dev_warn(&dev->intf->dev, "Cannot setup read URBs, error %d\n", err); return err; } else if (i < KVASER_USB_MAX_RX_URBS) { dev_warn(&dev->intf->dev, "RX performances may be slow\n"); } dev->rxinitdone = true; return 0; } static int kvaser_usb_open(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; err = open_candev(netdev); if (err) return err; err = ops->dev_set_opt_mode(priv); if (err) goto error; err = ops->dev_start_chip(priv); if (err) { netdev_warn(netdev, "Cannot start device, error %d\n", err); goto error; } priv->can.state = CAN_STATE_ERROR_ACTIVE; return 0; error: close_candev(netdev); return err; } static void kvaser_usb_reset_tx_urb_contexts(struct kvaser_usb_net_priv *priv) { int i, max_tx_urbs; max_tx_urbs = priv->dev->max_tx_urbs; priv->active_tx_contexts = 0; for (i = 0; i < max_tx_urbs; i++) priv->tx_contexts[i].echo_index = max_tx_urbs; } /* This method might sleep. Do not call it in the atomic context * of URB completions. */ void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv) { usb_kill_anchored_urbs(&priv->tx_submitted); kvaser_usb_reset_tx_urb_contexts(priv); } static void kvaser_usb_unlink_all_urbs(struct kvaser_usb *dev) { int i; usb_kill_anchored_urbs(&dev->rx_submitted); for (i = 0; i < KVASER_USB_MAX_RX_URBS; i++) usb_free_coherent(dev->udev, KVASER_USB_RX_BUFFER_SIZE, dev->rxbuf[i], dev->rxbuf_dma[i]); for (i = 0; i < dev->nchannels; i++) { struct kvaser_usb_net_priv *priv = dev->nets[i]; if (priv) kvaser_usb_unlink_tx_urbs(priv); } } static int kvaser_usb_close(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int err; netif_stop_queue(netdev); err = ops->dev_flush_queue(priv); if (err) netdev_warn(netdev, "Cannot flush queue, error %d\n", err); if (ops->dev_reset_chip) { err = ops->dev_reset_chip(dev, priv->channel); if (err) netdev_warn(netdev, "Cannot reset card, error %d\n", err); } err = ops->dev_stop_chip(priv); if (err) netdev_warn(netdev, "Cannot stop device, error %d\n", err); /* reset tx contexts */ kvaser_usb_unlink_tx_urbs(priv); priv->can.state = CAN_STATE_STOPPED; close_candev(priv->netdev); return 0; } static int kvaser_usb_set_bittiming(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; struct can_bittiming *bt = &priv->can.bittiming; struct kvaser_usb_busparams busparams; int tseg1 = bt->prop_seg + bt->phase_seg1; int tseg2 = bt->phase_seg2; int sjw = bt->sjw; int err; busparams.bitrate = cpu_to_le32(bt->bitrate); busparams.sjw = (u8)sjw; busparams.tseg1 = (u8)tseg1; busparams.tseg2 = (u8)tseg2; if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES) busparams.nsamples = 3; else busparams.nsamples = 1; err = ops->dev_set_bittiming(netdev, &busparams); if (err) return err; err = kvaser_usb_setup_rx_urbs(priv->dev); if (err) return err; err = ops->dev_get_busparams(priv); if (err) { /* Treat EOPNOTSUPP as success */ if (err == -EOPNOTSUPP) err = 0; return err; } if (memcmp(&busparams, &priv->busparams_nominal, sizeof(priv->busparams_nominal)) != 0) err = -EINVAL; return err; } static int kvaser_usb_set_data_bittiming(struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; struct can_bittiming *dbt = &priv->can.data_bittiming; struct kvaser_usb_busparams busparams; int tseg1 = dbt->prop_seg + dbt->phase_seg1; int tseg2 = dbt->phase_seg2; int sjw = dbt->sjw; int err; if (!ops->dev_set_data_bittiming || !ops->dev_get_data_busparams) return -EOPNOTSUPP; busparams.bitrate = cpu_to_le32(dbt->bitrate); busparams.sjw = (u8)sjw; busparams.tseg1 = (u8)tseg1; busparams.tseg2 = (u8)tseg2; busparams.nsamples = 1; err = ops->dev_set_data_bittiming(netdev, &busparams); if (err) return err; err = kvaser_usb_setup_rx_urbs(priv->dev); if (err) return err; err = ops->dev_get_data_busparams(priv); if (err) return err; if (memcmp(&busparams, &priv->busparams_data, sizeof(priv->busparams_data)) != 0) err = -EINVAL; return err; } static void kvaser_usb_write_bulk_callback(struct urb *urb) { struct kvaser_usb_tx_urb_context *context = urb->context; struct kvaser_usb_net_priv *priv; struct net_device *netdev; if (WARN_ON(!context)) return; priv = context->priv; netdev = priv->netdev; kfree(urb->transfer_buffer); if (!netif_device_present(netdev)) return; if (urb->status) netdev_info(netdev, "Tx URB aborted (%d)\n", urb->status); } static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct kvaser_usb_net_priv *priv = netdev_priv(netdev); struct kvaser_usb *dev = priv->dev; const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; struct net_device_stats *stats = &netdev->stats; struct kvaser_usb_tx_urb_context *context = NULL; struct urb *urb; void *buf; int cmd_len = 0; int err, ret = NETDEV_TX_OK; unsigned int i; unsigned long flags; if (can_dev_dropped_skb(netdev, skb)) return NETDEV_TX_OK; urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { stats->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } spin_lock_irqsave(&priv->tx_contexts_lock, flags); for (i = 0; i < dev->max_tx_urbs; i++) { if (priv->tx_contexts[i].echo_index == dev->max_tx_urbs) { context = &priv->tx_contexts[i]; context->echo_index = i; ++priv->active_tx_contexts; if (priv->active_tx_contexts >= (int)dev->max_tx_urbs) netif_stop_queue(netdev); break; } } spin_unlock_irqrestore(&priv->tx_contexts_lock, flags); /* This should never happen; it implies a flow control bug */ if (!context) { netdev_warn(netdev, "cannot find free context\n"); ret = NETDEV_TX_BUSY; goto freeurb; } buf = ops->dev_frame_to_cmd(priv, skb, &cmd_len, context->echo_index); if (!buf) { stats->tx_dropped++; dev_kfree_skb(skb); spin_lock_irqsave(&priv->tx_contexts_lock, flags); context->echo_index = dev->max_tx_urbs; --priv->active_tx_contexts; netif_wake_queue(netdev); spin_unlock_irqrestore(&priv->tx_contexts_lock, flags); goto freeurb; } context->priv = priv; can_put_echo_skb(skb, netdev, context->echo_index, 0); usb_fill_bulk_urb(urb, dev->udev, usb_sndbulkpipe(dev->udev, dev->bulk_out->bEndpointAddress), buf, cmd_len, kvaser_usb_write_bulk_callback, context); usb_anchor_urb(urb, &priv->tx_submitted); err = usb_submit_urb(urb, GFP_ATOMIC); if (unlikely(err)) { spin_lock_irqsave(&priv->tx_contexts_lock, flags); can_free_echo_skb(netdev, context->echo_index, NULL); context->echo_index = dev->max_tx_urbs; --priv->active_tx_contexts; netif_wake_queue(netdev); spin_unlock_irqrestore(&priv->tx_contexts_lock, flags); usb_unanchor_urb(urb); kfree(buf); stats->tx_dropped++; if (err == -ENODEV) netif_device_detach(netdev); else netdev_warn(netdev, "Failed tx_urb %d\n", err); goto freeurb; } ret = NETDEV_TX_OK; freeurb: usb_free_urb(urb); return ret; } static const struct net_device_ops kvaser_usb_netdev_ops = { .ndo_open = kvaser_usb_open, .ndo_stop = kvaser_usb_close, .ndo_eth_ioctl = can_eth_ioctl_hwts, .ndo_start_xmit = kvaser_usb_start_xmit, .ndo_change_mtu = can_change_mtu, }; static const struct ethtool_ops kvaser_usb_ethtool_ops = { .get_ts_info = can_ethtool_op_get_ts_info_hwts, }; static void kvaser_usb_remove_interfaces(struct kvaser_usb *dev) { const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops; int i; for (i = 0; i < dev->nchannels; i++) { if (!dev->nets[i]) continue; unregister_candev(dev->nets[i]->netdev); } kvaser_usb_unlink_all_urbs(dev); for (i = 0; i < dev->nchannels; i++) { if (!dev->nets[i]) continue; if (ops->dev_remove_channel) ops->dev_remove_channel(dev->nets[i]); free_candev(dev->nets[i]->netdev); } } static int kvaser_usb_init_one(struct kvaser_usb *dev, int channel) { struct net_device *netdev; struct kvaser_usb_net_priv *priv; const struct kvaser_usb_driver_info *driver_info = dev->driver_info; const struct kvaser_usb_dev_ops *ops = driver_info->ops; int err; if (ops->dev_reset_chip) { err = ops->dev_reset_chip(dev, channel); if (err) return err; } netdev = alloc_candev(struct_size(priv, tx_contexts, dev->max_tx_urbs), dev->max_tx_urbs); if (!netdev) { dev_err(&dev->intf->dev, "Cannot alloc candev\n"); return -ENOMEM; } priv = netdev_priv(netdev); init_usb_anchor(&priv->tx_submitted); init_completion(&priv->start_comp); init_completion(&priv->stop_comp); init_completion(&priv->flush_comp); init_completion(&priv->get_busparams_comp); priv->can.ctrlmode_supported = CAN_CTRLMODE_CC_LEN8_DLC; priv->dev = dev; priv->netdev = netdev; priv->channel = channel; spin_lock_init(&priv->tx_contexts_lock); kvaser_usb_reset_tx_urb_contexts(priv); priv->can.state = CAN_STATE_STOPPED; priv->can.clock.freq = dev->cfg->clock.freq; priv->can.bittiming_const = dev->cfg->bittiming_const; priv->can.do_set_bittiming = kvaser_usb_set_bittiming; priv->can.do_set_mode = ops->dev_set_mode; if ((driver_info->quirks & KVASER_USB_QUIRK_HAS_TXRX_ERRORS) || (priv->dev->card_data.capabilities & KVASER_USB_CAP_BERR_CAP)) priv->can.do_get_berr_counter = ops->dev_get_berr_counter; if (driver_info->quirks & KVASER_USB_QUIRK_HAS_SILENT_MODE) priv->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY; priv->can.ctrlmode_supported |= dev->card_data.ctrlmode_supported; if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) { priv->can.data_bittiming_const = dev->cfg->data_bittiming_const; priv->can.do_set_data_bittiming = kvaser_usb_set_data_bittiming; } netdev->flags |= IFF_ECHO; netdev->netdev_ops = &kvaser_usb_netdev_ops; netdev->ethtool_ops = &kvaser_usb_ethtool_ops; SET_NETDEV_DEV(netdev, &dev->intf->dev); netdev->dev_id = channel; dev->nets[channel] = priv; if (ops->dev_init_channel) { err = ops->dev_init_channel(priv); if (err) goto err; } err = register_candev(netdev); if (err) { dev_err(&dev->intf->dev, "Failed to register CAN device\n"); goto err; } netdev_dbg(netdev, "device registered\n"); return 0; err: free_candev(netdev); dev->nets[channel] = NULL; return err; } static int kvaser_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct kvaser_usb *dev; int err; int i; const struct kvaser_usb_driver_info *driver_info; const struct kvaser_usb_dev_ops *ops; driver_info = (const struct kvaser_usb_driver_info *)id->driver_info; if (!driver_info) return -ENODEV; dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL); if (!dev) return -ENOMEM; dev->intf = intf; dev->driver_info = driver_info; ops = driver_info->ops; err = ops->dev_setup_endpoints(dev); if (err) return dev_err_probe(&intf->dev, err, "Cannot get usb endpoint(s)"); dev->udev = interface_to_usbdev(intf); init_usb_anchor(&dev->rx_submitted); usb_set_intfdata(intf, dev); dev->card_data.ctrlmode_supported = 0; dev->card_data.capabilities = 0; err = ops->dev_init_card(dev); if (err) return dev_err_probe(&intf->dev, err, "Failed to initialize card\n"); err = ops->dev_get_software_info(dev); if (err) return dev_err_probe(&intf->dev, err, "Cannot get software info\n"); if (ops->dev_get_software_details) { err = ops->dev_get_software_details(dev); if (err) return dev_err_probe(&intf->dev, err, "Cannot get software details\n"); } if (WARN_ON(!dev->cfg)) return -ENODEV; dev_dbg(&intf->dev, "Firmware version: %d.%d.%d\n", ((dev->fw_version >> 24) & 0xff), ((dev->fw_version >> 16) & 0xff), (dev->fw_version & 0xffff)); dev_dbg(&intf->dev, "Max outstanding tx = %d URBs\n", dev->max_tx_urbs); err = ops->dev_get_card_info(dev); if (err) return dev_err_probe(&intf->dev, err, "Cannot get card info\n"); if (ops->dev_get_capabilities) { err = ops->dev_get_capabilities(dev); if (err) { kvaser_usb_remove_interfaces(dev); return dev_err_probe(&intf->dev, err, "Cannot get capabilities\n"); } } for (i = 0; i < dev->nchannels; i++) { err = kvaser_usb_init_one(dev, i); if (err) { kvaser_usb_remove_interfaces(dev); return err; } } return 0; } static void kvaser_usb_disconnect(struct usb_interface *intf) { struct kvaser_usb *dev = usb_get_intfdata(intf); usb_set_intfdata(intf, NULL); if (!dev) return; kvaser_usb_remove_interfaces(dev); } static struct usb_driver kvaser_usb_driver = { .name = KBUILD_MODNAME, .probe = kvaser_usb_probe, .disconnect = kvaser_usb_disconnect, .id_table = kvaser_usb_table, }; module_usb_driver(kvaser_usb_driver); MODULE_AUTHOR("Olivier Sobrie <olivier@sobrie.be>"); MODULE_AUTHOR("Kvaser AB <support@kvaser.com>"); MODULE_DESCRIPTION("CAN driver for Kvaser CAN/USB devices"); MODULE_LICENSE("GPL v2"); |
7 2 7 7 1 1 2 1 1 1 1 7 7 7 7 1 1 7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 | // SPDX-License-Identifier: GPL-2.0-or-later /* * SN9C2028 library * * Copyright (C) 2009 Theodore Kilgore <kilgota@auburn.edu> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #define MODULE_NAME "sn9c2028" #include "gspca.h" MODULE_AUTHOR("Theodore Kilgore"); MODULE_DESCRIPTION("Sonix SN9C2028 USB Camera Driver"); MODULE_LICENSE("GPL"); /* specific webcam descriptor */ struct sd { struct gspca_dev gspca_dev; /* !! must be the first item */ u8 sof_read; u16 model; #define MIN_AVG_LUM 8500 #define MAX_AVG_LUM 10000 int avg_lum; u8 avg_lum_l; struct { /* autogain and gain control cluster */ struct v4l2_ctrl *autogain; struct v4l2_ctrl *gain; }; }; struct init_command { unsigned char instruction[6]; unsigned char to_read; /* length to read. 0 means no reply requested */ }; /* How to change the resolution of any of the VGA cams is unknown */ static const struct v4l2_pix_format vga_mode[] = { {640, 480, V4L2_PIX_FMT_SN9C2028, V4L2_FIELD_NONE, .bytesperline = 640, .sizeimage = 640 * 480 * 3 / 4, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* No way to change the resolution of the CIF cams is known */ static const struct v4l2_pix_format cif_mode[] = { {352, 288, V4L2_PIX_FMT_SN9C2028, V4L2_FIELD_NONE, .bytesperline = 352, .sizeimage = 352 * 288 * 3 / 4, .colorspace = V4L2_COLORSPACE_SRGB, .priv = 0}, }; /* the bytes to write are in gspca_dev->usb_buf */ static int sn9c2028_command(struct gspca_dev *gspca_dev, u8 *command) { int rc; gspca_dbg(gspca_dev, D_USBO, "sending command %02x%02x%02x%02x%02x%02x\n", command[0], command[1], command[2], command[3], command[4], command[5]); memcpy(gspca_dev->usb_buf, command, 6); rc = usb_control_msg(gspca_dev->dev, usb_sndctrlpipe(gspca_dev->dev, 0), USB_REQ_GET_CONFIGURATION, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 2, 0, gspca_dev->usb_buf, 6, 500); if (rc < 0) { pr_err("command write [%02x] error %d\n", gspca_dev->usb_buf[0], rc); return rc; } return 0; } static int sn9c2028_read1(struct gspca_dev *gspca_dev) { int rc; rc = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), USB_REQ_GET_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 1, 0, gspca_dev->usb_buf, 1, 500); if (rc != 1) { pr_err("read1 error %d\n", rc); return (rc < 0) ? rc : -EIO; } gspca_dbg(gspca_dev, D_USBI, "read1 response %02x\n", gspca_dev->usb_buf[0]); return gspca_dev->usb_buf[0]; } static int sn9c2028_read4(struct gspca_dev *gspca_dev, u8 *reading) { int rc; rc = usb_control_msg(gspca_dev->dev, usb_rcvctrlpipe(gspca_dev->dev, 0), USB_REQ_GET_STATUS, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_INTERFACE, 4, 0, gspca_dev->usb_buf, 4, 500); if (rc != 4) { pr_err("read4 error %d\n", rc); return (rc < 0) ? rc : -EIO; } memcpy(reading, gspca_dev->usb_buf, 4); gspca_dbg(gspca_dev, D_USBI, "read4 response %02x%02x%02x%02x\n", reading[0], reading[1], reading[2], reading[3]); return rc; } static int sn9c2028_long_command(struct gspca_dev *gspca_dev, u8 *command) { int i, status; __u8 reading[4]; status = sn9c2028_command(gspca_dev, command); if (status < 0) return status; status = -1; for (i = 0; i < 256 && status < 2; i++) status = sn9c2028_read1(gspca_dev); if (status < 0) { pr_err("long command status read error %d\n", status); return status; } memset(reading, 0, 4); status = sn9c2028_read4(gspca_dev, reading); if (status < 0) return status; /* in general, the first byte of the response is the first byte of * the command, or'ed with 8 */ status = sn9c2028_read1(gspca_dev); if (status < 0) return status; return 0; } static int sn9c2028_short_command(struct gspca_dev *gspca_dev, u8 *command) { int err_code; err_code = sn9c2028_command(gspca_dev, command); if (err_code < 0) return err_code; err_code = sn9c2028_read1(gspca_dev); if (err_code < 0) return err_code; return 0; } /* this function is called at probe time */ static int sd_config(struct gspca_dev *gspca_dev, const struct usb_device_id *id) { struct sd *sd = (struct sd *) gspca_dev; struct cam *cam = &gspca_dev->cam; gspca_dbg(gspca_dev, D_PROBE, "SN9C2028 camera detected (vid/pid 0x%04X:0x%04X)\n", id->idVendor, id->idProduct); sd->model = id->idProduct; switch (sd->model) { case 0x7005: gspca_dbg(gspca_dev, D_PROBE, "Genius Smart 300 camera\n"); break; case 0x7003: gspca_dbg(gspca_dev, D_PROBE, "Genius Videocam Live v2\n"); break; case 0x8000: gspca_dbg(gspca_dev, D_PROBE, "DC31VC\n"); break; case 0x8001: gspca_dbg(gspca_dev, D_PROBE, "Spy camera\n"); break; case 0x8003: gspca_dbg(gspca_dev, D_PROBE, "CIF camera\n"); break; case 0x8008: gspca_dbg(gspca_dev, D_PROBE, "Mini-Shotz ms-350 camera\n"); break; case 0x800a: gspca_dbg(gspca_dev, D_PROBE, "Vivitar 3350b type camera\n"); cam->input_flags = V4L2_IN_ST_VFLIP | V4L2_IN_ST_HFLIP; break; } switch (sd->model) { case 0x8000: case 0x8001: case 0x8003: cam->cam_mode = cif_mode; cam->nmodes = ARRAY_SIZE(cif_mode); break; default: cam->cam_mode = vga_mode; cam->nmodes = ARRAY_SIZE(vga_mode); } return 0; } /* this function is called at probe and resume time */ static int sd_init(struct gspca_dev *gspca_dev) { int status; sn9c2028_read1(gspca_dev); sn9c2028_read1(gspca_dev); status = sn9c2028_read1(gspca_dev); return (status < 0) ? status : 0; } static int run_start_commands(struct gspca_dev *gspca_dev, struct init_command *cam_commands, int n) { int i, err_code = -1; for (i = 0; i < n; i++) { switch (cam_commands[i].to_read) { case 4: err_code = sn9c2028_long_command(gspca_dev, cam_commands[i].instruction); break; case 1: err_code = sn9c2028_short_command(gspca_dev, cam_commands[i].instruction); break; case 0: err_code = sn9c2028_command(gspca_dev, cam_commands[i].instruction); break; } if (err_code < 0) return err_code; } return 0; } static void set_gain(struct gspca_dev *gspca_dev, s32 g) { struct sd *sd = (struct sd *) gspca_dev; struct init_command genius_vcam_live_gain_cmds[] = { {{0x1d, 0x25, 0x10 /* This byte is gain */, 0x20, 0xab, 0x00}, 0}, }; if (!gspca_dev->streaming) return; switch (sd->model) { case 0x7003: genius_vcam_live_gain_cmds[0].instruction[2] = g; run_start_commands(gspca_dev, genius_vcam_live_gain_cmds, ARRAY_SIZE(genius_vcam_live_gain_cmds)); break; default: break; } } static int sd_s_ctrl(struct v4l2_ctrl *ctrl) { struct gspca_dev *gspca_dev = container_of(ctrl->handler, struct gspca_dev, ctrl_handler); struct sd *sd = (struct sd *)gspca_dev; gspca_dev->usb_err = 0; if (!gspca_dev->streaming) return 0; switch (ctrl->id) { /* standalone gain control */ case V4L2_CID_GAIN: set_gain(gspca_dev, ctrl->val); break; /* autogain */ case V4L2_CID_AUTOGAIN: set_gain(gspca_dev, sd->gain->val); break; } return gspca_dev->usb_err; } static const struct v4l2_ctrl_ops sd_ctrl_ops = { .s_ctrl = sd_s_ctrl, }; static int sd_init_controls(struct gspca_dev *gspca_dev) { struct v4l2_ctrl_handler *hdl = &gspca_dev->ctrl_handler; struct sd *sd = (struct sd *)gspca_dev; gspca_dev->vdev.ctrl_handler = hdl; v4l2_ctrl_handler_init(hdl, 2); switch (sd->model) { case 0x7003: sd->gain = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_GAIN, 0, 20, 1, 0); sd->autogain = v4l2_ctrl_new_std(hdl, &sd_ctrl_ops, V4L2_CID_AUTOGAIN, 0, 1, 1, 1); break; default: break; } return 0; } static int start_spy_cam(struct gspca_dev *gspca_dev) { struct init_command spy_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x22, 0x01, 0x04, 0x00, 0x00}, 4}, {{0x13, 0x23, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, /* width 352 */ {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, /* height 288 */ /* {{0x13, 0x27, 0x01, 0x28, 0x00, 0x00}, 4}, */ {{0x13, 0x27, 0x01, 0x68, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x09, 0x00, 0x00}, 4}, /* red gain ?*/ /* {{0x13, 0x28, 0x01, 0x00, 0x00, 0x00}, 4}, */ {{0x13, 0x29, 0x01, 0x00, 0x00, 0x00}, 4}, /* {{0x13, 0x29, 0x01, 0x0c, 0x00, 0x00}, 4}, */ {{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4}, /* {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, */ {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x02, 0x00, 0x00}, 4}, /* {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, */ {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x07, 0x00, 0x00}, 4}, {{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4}, {{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x02, 0x06, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x03, 0x13, 0x00, 0x00, 0x00}, 4}, /*don't mess with*/ /*{{0x11, 0x04, 0x06, 0x00, 0x00, 0x00}, 4}, observed */ {{0x11, 0x04, 0x00, 0x00, 0x00, 0x00}, 4}, /* brighter */ /*{{0x11, 0x05, 0x65, 0x00, 0x00, 0x00}, 4}, observed */ {{0x11, 0x05, 0x00, 0x00, 0x00, 0x00}, 4}, /* brighter */ {{0x11, 0x06, 0xb1, 0x00, 0x00, 0x00}, 4}, /* observed */ {{0x11, 0x07, 0x00, 0x00, 0x00, 0x00}, 4}, /*{{0x11, 0x08, 0x06, 0x00, 0x00, 0x00}, 4}, observed */ {{0x11, 0x08, 0x0b, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x09, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0c, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0d, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0e, 0x04, 0x00, 0x00, 0x00}, 4}, /* {{0x11, 0x0f, 0x00, 0x00, 0x00, 0x00}, 4}, */ /* brightness or gain. 0 is default. 4 is good * indoors at night with incandescent lighting */ {{0x11, 0x0f, 0x04, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x06, 0x00, 0x00, 0x00}, 4}, /*hstart or hoffs*/ {{0x11, 0x11, 0x06, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x01, 0x00, 0x00, 0x00}, 4}, /* {{0x1b, 0x02, 0x06, 0x00, 0x00, 0x00}, 1}, observed */ {{0x1b, 0x02, 0x11, 0x00, 0x00, 0x00}, 1}, /* brighter */ /* {{0x1b, 0x13, 0x01, 0x00, 0x00, 0x00}, 1}, observed */ {{0x1b, 0x13, 0x11, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 1}, /* compresses */ /* Camera should start to capture now. */ }; return run_start_commands(gspca_dev, spy_start_commands, ARRAY_SIZE(spy_start_commands)); } static int start_cif_cam(struct gspca_dev *gspca_dev) { struct init_command cif_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, /* The entire sequence below seems redundant */ /* {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x22, 0x01, 0x06, 0x00, 0x00}, 4}, {{0x13, 0x23, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, width? {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, height? {{0x13, 0x27, 0x01, 0x68, 0x00, 0x00}, 4}, subsample? {{0x13, 0x28, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, {{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4}, {{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4},*/ {{0x1b, 0x21, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x17, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x19, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x02, 0x06, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x03, 0x5a, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x04, 0x27, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x05, 0x01, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x12, 0x14, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x13, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x14, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x15, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x16, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x77, 0xa2, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x06, 0x0f, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x07, 0x14, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x08, 0x0f, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x09, 0x10, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x0e, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x0f, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x12, 0x07, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x10, 0x1f, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 1}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 1}, /* width/8 */ {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 1}, /* height/8 */ /* {{0x13, 0x27, 0x01, 0x68, 0x00, 0x00}, 4}, subsample? * {{0x13, 0x28, 0x01, 0x1e, 0x00, 0x00}, 4}, does nothing * {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, */ /* {{0x13, 0x29, 0x01, 0x22, 0x00, 0x00}, 4}, * causes subsampling * but not a change in the resolution setting! */ {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x01, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x08, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x06, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x1b, 0x04, 0x6d, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x05, 0x03, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x36, 0x06, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x0e, 0x01, 0x00, 0x00, 0x00}, 1}, {{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x1b, 0x0f, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x36, 0x05, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x10, 0x0f, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x02, 0x06, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 1},/* use compression */ /* Camera should start to capture now. */ }; return run_start_commands(gspca_dev, cif_start_commands, ARRAY_SIZE(cif_start_commands)); } static int start_ms350_cam(struct gspca_dev *gspca_dev) { struct init_command ms350_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x16, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x22, 0x01, 0x04, 0x00, 0x00}, 4}, {{0x13, 0x23, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x28, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x09, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, {{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4}, {{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x00, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x01, 0x70, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x02, 0x05, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x03, 0x5d, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x04, 0x07, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x05, 0x25, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x06, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x07, 0x09, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x08, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x09, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0a, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0c, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0d, 0x0c, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0e, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x0f, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x63, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0x70, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x18, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, /* width */ {{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, /* height */ {{0x13, 0x28, 0x01, 0x09, 0x00, 0x00}, 4}, /* vstart? */ {{0x13, 0x27, 0x01, 0x28, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x40, 0x00, 0x00}, 4}, /* hstart? */ {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, {{0x1b, 0x02, 0x05, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x18, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x02, 0x0a, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x11, 0x01, 0x00, 0x00, 0x00}, 0}, /* Camera should start to capture now. */ }; return run_start_commands(gspca_dev, ms350_start_commands, ARRAY_SIZE(ms350_start_commands)); } static int start_genius_cam(struct gspca_dev *gspca_dev) { struct init_command genius_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x16, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x10, 0x00, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, /* "preliminary" width and height settings */ {{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x22, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x07, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2d, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x64, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x91, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0x20, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x16, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x17, 0x60, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2d, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x25, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x26, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x27, 0x88, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x30, 0x38, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x31, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x32, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x33, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x34, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x5b, 0x0a, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, /* real width */ {{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, /* real height */ {{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x62, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x28, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x04, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0xe0, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x16, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x17, 0x80, 0x00, 0x00, 0x00}, 4}, {{0x1c, 0x20, 0x00, 0x2a, 0x00, 0x00}, 1}, {{0x1c, 0x20, 0x00, 0x2a, 0x00, 0x00}, 1}, {{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 0} /* Camera should start to capture now. */ }; return run_start_commands(gspca_dev, genius_start_commands, ARRAY_SIZE(genius_start_commands)); } static int start_genius_videocam_live(struct gspca_dev *gspca_dev) { int r; struct sd *sd = (struct sd *) gspca_dev; struct init_command genius_vcam_live_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 0}, {{0x16, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x10, 0x00, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x22, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x07, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2d, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x64, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x91, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0x20, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x16, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x17, 0x60, 0x00, 0x00, 0x00}, 4}, {{0x1c, 0x20, 0x00, 0x2d, 0x00, 0x00}, 4}, {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x22, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x23, 0x01, 0x01, 0x00, 0x00}, 4}, {{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x16, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x12, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x22, 0x00, 0x00}, 4}, {{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x09, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x07, 0x00, 0x00}, 4}, {{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4}, {{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x01, 0x04, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x02, 0x92, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x64, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x91, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0x20, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x16, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x17, 0x60, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2d, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x25, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x26, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x27, 0x88, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x30, 0x38, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x31, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x32, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x33, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x34, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x5b, 0x0a, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x0e, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x29, 0x01, 0x62, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, {{0x11, 0x20, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x21, 0x2a, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x22, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x23, 0x28, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x10, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x11, 0x04, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x12, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x13, 0x03, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x14, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x15, 0xe0, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x16, 0x02, 0x00, 0x00, 0x00}, 4}, {{0x11, 0x17, 0x80, 0x00, 0x00, 0x00}, 4}, {{0x1c, 0x20, 0x00, 0x2a, 0x00, 0x00}, 1}, {{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 0}, /* Camera should start to capture now. */ {{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 0}, {{0x1b, 0x32, 0x26, 0x00, 0x00, 0x00}, 0}, {{0x1d, 0x25, 0x10, 0x20, 0xab, 0x00}, 0}, }; r = run_start_commands(gspca_dev, genius_vcam_live_start_commands, ARRAY_SIZE(genius_vcam_live_start_commands)); if (r < 0) return r; if (sd->gain) set_gain(gspca_dev, v4l2_ctrl_g_ctrl(sd->gain)); return r; } static int start_vivitar_cam(struct gspca_dev *gspca_dev) { struct init_command vivitar_start_commands[] = { {{0x0c, 0x01, 0x00, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x20, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x21, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x22, 0x01, 0x01, 0x00, 0x00}, 4}, {{0x13, 0x23, 0x01, 0x01, 0x00, 0x00}, 4}, {{0x13, 0x24, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, {{0x13, 0x27, 0x01, 0x20, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x0a, 0x00, 0x00}, 4}, /* * Above is changed from OEM 0x0b. Fixes Bayer tiling. * Presumably gives a vertical shift of one row. */ {{0x13, 0x29, 0x01, 0x20, 0x00, 0x00}, 4}, /* Above seems to do horizontal shift. */ {{0x13, 0x2a, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2b, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x2c, 0x01, 0x02, 0x00, 0x00}, 4}, {{0x13, 0x2d, 0x01, 0x03, 0x00, 0x00}, 4}, {{0x13, 0x2e, 0x01, 0x0f, 0x00, 0x00}, 4}, {{0x13, 0x2f, 0x01, 0x0c, 0x00, 0x00}, 4}, /* Above three commands seem to relate to brightness. */ {{0x12, 0x34, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x13, 0x34, 0x01, 0xa1, 0x00, 0x00}, 4}, {{0x13, 0x35, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x1b, 0x12, 0x80, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x01, 0x77, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x02, 0x3a, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x12, 0x78, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x13, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x14, 0x80, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x15, 0x34, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x1b, 0x04, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x20, 0x44, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x23, 0xee, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x26, 0xa0, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x27, 0x9a, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x28, 0xa0, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x29, 0x30, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x2a, 0x80, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x2b, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x2f, 0x3d, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x30, 0x24, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x32, 0x86, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x60, 0xa9, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x61, 0x42, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x65, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x69, 0x38, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x6f, 0x88, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x70, 0x0b, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x71, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x74, 0x21, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x75, 0x86, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x76, 0x00, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x7d, 0xf3, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x17, 0x1c, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x18, 0xc0, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x19, 0x05, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x1a, 0xf6, 0x00, 0x00, 0x00}, 1}, /* {{0x13, 0x25, 0x01, 0x28, 0x00, 0x00}, 4}, {{0x13, 0x26, 0x01, 0x1e, 0x00, 0x00}, 4}, {{0x13, 0x28, 0x01, 0x0b, 0x00, 0x00}, 4}, */ {{0x20, 0x36, 0x06, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x10, 0x26, 0x00, 0x00, 0x00}, 1}, {{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x1b, 0x76, 0x03, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x36, 0x05, 0x00, 0x00, 0x00}, 1}, {{0x1b, 0x00, 0x3f, 0x00, 0x00, 0x00}, 1}, /* Above is brightness; OEM driver setting is 0x10 */ {{0x12, 0x27, 0x01, 0x00, 0x00, 0x00}, 4}, {{0x20, 0x29, 0x30, 0x00, 0x00, 0x00}, 1}, {{0x20, 0x34, 0xa1, 0x00, 0x00, 0x00}, 1} }; return run_start_commands(gspca_dev, vivitar_start_commands, ARRAY_SIZE(vivitar_start_commands)); } static int sd_start(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; int err_code; sd->sof_read = 0; switch (sd->model) { case 0x7005: err_code = start_genius_cam(gspca_dev); break; case 0x7003: err_code = start_genius_videocam_live(gspca_dev); break; case 0x8001: err_code = start_spy_cam(gspca_dev); break; case 0x8003: err_code = start_cif_cam(gspca_dev); break; case 0x8008: err_code = start_ms350_cam(gspca_dev); break; case 0x800a: err_code = start_vivitar_cam(gspca_dev); break; default: pr_err("Starting unknown camera, please report this\n"); return -ENXIO; } sd->avg_lum = -1; return err_code; } static void sd_stopN(struct gspca_dev *gspca_dev) { int result; __u8 data[6]; result = sn9c2028_read1(gspca_dev); if (result < 0) gspca_err(gspca_dev, "Camera Stop read failed\n"); memset(data, 0, 6); data[0] = 0x14; result = sn9c2028_command(gspca_dev, data); if (result < 0) gspca_err(gspca_dev, "Camera Stop command failed\n"); } static void do_autogain(struct gspca_dev *gspca_dev, int avg_lum) { struct sd *sd = (struct sd *) gspca_dev; s32 cur_gain = v4l2_ctrl_g_ctrl(sd->gain); if (avg_lum == -1) return; if (avg_lum < MIN_AVG_LUM) { if (cur_gain == sd->gain->maximum) return; cur_gain++; v4l2_ctrl_s_ctrl(sd->gain, cur_gain); } if (avg_lum > MAX_AVG_LUM) { if (cur_gain == sd->gain->minimum) return; cur_gain--; v4l2_ctrl_s_ctrl(sd->gain, cur_gain); } } static void sd_dqcallback(struct gspca_dev *gspca_dev) { struct sd *sd = (struct sd *) gspca_dev; if (sd->autogain == NULL || !v4l2_ctrl_g_ctrl(sd->autogain)) return; do_autogain(gspca_dev, sd->avg_lum); } /* Include sn9c2028 sof detection functions */ #include "sn9c2028.h" static void sd_pkt_scan(struct gspca_dev *gspca_dev, __u8 *data, /* isoc packet */ int len) /* iso packet length */ { unsigned char *sof; sof = sn9c2028_find_sof(gspca_dev, data, len); if (sof) { int n; /* finish decoding current frame */ n = sof - data; if (n > sizeof sn9c2028_sof_marker) n -= sizeof sn9c2028_sof_marker; else n = 0; gspca_frame_add(gspca_dev, LAST_PACKET, data, n); /* Start next frame. */ gspca_frame_add(gspca_dev, FIRST_PACKET, sn9c2028_sof_marker, sizeof sn9c2028_sof_marker); len -= sof - data; data = sof; } gspca_frame_add(gspca_dev, INTER_PACKET, data, len); } /* sub-driver description */ static const struct sd_desc sd_desc = { .name = MODULE_NAME, .config = sd_config, .init = sd_init, .init_controls = sd_init_controls, .start = sd_start, .stopN = sd_stopN, .dq_callback = sd_dqcallback, .pkt_scan = sd_pkt_scan, }; /* -- module initialisation -- */ static const struct usb_device_id device_table[] = { {USB_DEVICE(0x0458, 0x7005)}, /* Genius Smart 300, version 2 */ {USB_DEVICE(0x0458, 0x7003)}, /* Genius Videocam Live v2 */ /* The Genius Smart is untested. I can't find an owner ! */ /* {USB_DEVICE(0x0c45, 0x8000)}, DC31VC, Don't know this camera */ {USB_DEVICE(0x0c45, 0x8001)}, /* Wild Planet digital spy cam */ {USB_DEVICE(0x0c45, 0x8003)}, /* Several small CIF cameras */ /* {USB_DEVICE(0x0c45, 0x8006)}, Unknown VGA camera */ {USB_DEVICE(0x0c45, 0x8008)}, /* Mini-Shotz ms-350 */ {USB_DEVICE(0x0c45, 0x800a)}, /* Vivicam 3350B */ {} }; MODULE_DEVICE_TABLE(usb, device_table); /* -- device connect -- */ static int sd_probe(struct usb_interface *intf, const struct usb_device_id *id) { return gspca_dev_probe(intf, id, &sd_desc, sizeof(struct sd), THIS_MODULE); } static struct usb_driver sd_driver = { .name = MODULE_NAME, .id_table = device_table, .probe = sd_probe, .disconnect = gspca_disconnect, #ifdef CONFIG_PM .suspend = gspca_suspend, .resume = gspca_resume, .reset_resume = gspca_resume, #endif }; module_usb_driver(sd_driver); |
1 1 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtual PTP 1588 clock for use with KVM guests * * Copyright (C) 2017 Red Hat Inc. */ #include <linux/device.h> #include <linux/kernel.h> #include <asm/pvclock.h> #include <asm/kvmclock.h> #include <linux/module.h> #include <uapi/asm/kvm_para.h> #include <uapi/linux/kvm_para.h> #include <linux/ptp_clock_kernel.h> #include <linux/ptp_kvm.h> #include <linux/set_memory.h> static phys_addr_t clock_pair_gpa; static struct kvm_clock_pairing clock_pair_glbl; static struct kvm_clock_pairing *clock_pair; int kvm_arch_ptp_init(void) { struct page *p; long ret; if (!kvm_para_available()) return -ENODEV; if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { p = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!p) return -ENOMEM; clock_pair = page_address(p); ret = set_memory_decrypted((unsigned long)clock_pair, 1); if (ret) { __free_page(p); clock_pair = NULL; goto nofree; } } else { clock_pair = &clock_pair_glbl; } clock_pair_gpa = slow_virt_to_phys(clock_pair); if (!pvclock_get_pvti_cpu0_va()) { ret = -ENODEV; goto err; } ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa, KVM_CLOCK_PAIRING_WALLCLOCK); if (ret == -KVM_ENOSYS) { ret = -ENODEV; goto err; } return ret; err: kvm_arch_ptp_exit(); nofree: return ret; } void kvm_arch_ptp_exit(void) { if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) { WARN_ON(set_memory_encrypted((unsigned long)clock_pair, 1)); free_page((unsigned long)clock_pair); clock_pair = NULL; } } int kvm_arch_ptp_get_clock(struct timespec64 *ts) { long ret; ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa, KVM_CLOCK_PAIRING_WALLCLOCK); if (ret != 0) { pr_err_ratelimited("clock offset hypercall ret %lu\n", ret); return -EOPNOTSUPP; } ts->tv_sec = clock_pair->sec; ts->tv_nsec = clock_pair->nsec; return 0; } int kvm_arch_ptp_get_crosststamp(u64 *cycle, struct timespec64 *tspec, enum clocksource_ids *cs_id) { struct pvclock_vcpu_time_info *src; unsigned int version; long ret; src = this_cpu_pvti(); do { /* * We are using a TSC value read in the hosts * kvm_hc_clock_pairing handling. * So any changes to tsc_to_system_mul * and tsc_shift or any other pvclock * data invalidate that measurement. */ version = pvclock_read_begin(src); ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa, KVM_CLOCK_PAIRING_WALLCLOCK); if (ret != 0) { pr_err_ratelimited("clock pairing hypercall ret %lu\n", ret); return -EOPNOTSUPP; } tspec->tv_sec = clock_pair->sec; tspec->tv_nsec = clock_pair->nsec; *cycle = __pvclock_read_cycles(src, clock_pair->tsc); } while (pvclock_read_retry(src, version)); *cs_id = CSID_X86_KVM_CLK; return 0; } |
8 8 3 1 2 3 3 1 3 3 1 1 1 2 1 1 1 2 3 1 2 1 1 2 1 1 1 3 3 3 1 1 1 1 1 10 12 8 8 12 12 12 4 8 12 12 3 1 2 8 8 8 8 12 1 3 8 8 8 8 8 3 5 8 1 7 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 | // SPDX-License-Identifier: GPL-2.0+ /* * usblp.c * * Copyright (c) 1999 Michael Gee <michael@linuxspecific.com> * Copyright (c) 1999 Pavel Machek <pavel@ucw.cz> * Copyright (c) 2000 Randy Dunlap <rdunlap@xenotime.net> * Copyright (c) 2000 Vojtech Pavlik <vojtech@suse.cz> # Copyright (c) 2001 Pete Zaitcev <zaitcev@redhat.com> # Copyright (c) 2001 David Paschal <paschal@rcsis.com> * Copyright (c) 2006 Oliver Neukum <oliver@neukum.name> * * USB Printer Device Class driver for USB printers and printer cables * * Sponsored by SuSE * * ChangeLog: * v0.1 - thorough cleaning, URBification, almost a rewrite * v0.2 - some more cleanups * v0.3 - cleaner again, waitqueue fixes * v0.4 - fixes in unidirectional mode * v0.5 - add DEVICE_ID string support * v0.6 - never time out * v0.7 - fixed bulk-IN read and poll (David Paschal) * v0.8 - add devfs support * v0.9 - fix unplug-while-open paths * v0.10- remove sleep_on, fix error on oom (oliver@neukum.org) * v0.11 - add proto_bias option (Pete Zaitcev) * v0.12 - add hpoj.sourceforge.net ioctls (David Paschal) * v0.13 - alloc space for statusbuf (<status> not on stack); * use usb_alloc_coherent() for read buf & write buf; * none - Maintained in Linux kernel after v0.13 */ #include <linux/module.h> #include <linux/kernel.h> #include <linux/sched/signal.h> #include <linux/signal.h> #include <linux/poll.h> #include <linux/slab.h> #include <linux/lp.h> #include <linux/mutex.h> #undef DEBUG #include <linux/usb.h> #include <linux/usb/ch9.h> #include <linux/ratelimit.h> /* * Version Information */ #define DRIVER_AUTHOR "Michael Gee, Pavel Machek, Vojtech Pavlik, Randy Dunlap, Pete Zaitcev, David Paschal" #define DRIVER_DESC "USB Printer Device Class driver" #define USBLP_BUF_SIZE 8192 #define USBLP_BUF_SIZE_IN 1024 #define USBLP_DEVICE_ID_SIZE 1024 /* ioctls: */ #define IOCNR_GET_DEVICE_ID 1 #define IOCNR_GET_PROTOCOLS 2 #define IOCNR_SET_PROTOCOL 3 #define IOCNR_HP_SET_CHANNEL 4 #define IOCNR_GET_BUS_ADDRESS 5 #define IOCNR_GET_VID_PID 6 #define IOCNR_SOFT_RESET 7 /* Get device_id string: */ #define LPIOC_GET_DEVICE_ID(len) _IOC(_IOC_READ, 'P', IOCNR_GET_DEVICE_ID, len) /* The following ioctls were added for http://hpoj.sourceforge.net: * Get two-int array: * [0]=current protocol * (1=USB_CLASS_PRINTER/1/1, 2=USB_CLASS_PRINTER/1/2, * 3=USB_CLASS_PRINTER/1/3), * [1]=supported protocol mask (mask&(1<<n)!=0 means * USB_CLASS_PRINTER/1/n supported): */ #define LPIOC_GET_PROTOCOLS(len) _IOC(_IOC_READ, 'P', IOCNR_GET_PROTOCOLS, len) /* * Set protocol * (arg: 1=USB_CLASS_PRINTER/1/1, 2=USB_CLASS_PRINTER/1/2, * 3=USB_CLASS_PRINTER/1/3): */ #define LPIOC_SET_PROTOCOL _IOC(_IOC_WRITE, 'P', IOCNR_SET_PROTOCOL, 0) /* Set channel number (HP Vendor-specific command): */ #define LPIOC_HP_SET_CHANNEL _IOC(_IOC_WRITE, 'P', IOCNR_HP_SET_CHANNEL, 0) /* Get two-int array: [0]=bus number, [1]=device address: */ #define LPIOC_GET_BUS_ADDRESS(len) _IOC(_IOC_READ, 'P', IOCNR_GET_BUS_ADDRESS, len) /* Get two-int array: [0]=vendor ID, [1]=product ID: */ #define LPIOC_GET_VID_PID(len) _IOC(_IOC_READ, 'P', IOCNR_GET_VID_PID, len) /* Perform class specific soft reset */ #define LPIOC_SOFT_RESET _IOC(_IOC_NONE, 'P', IOCNR_SOFT_RESET, 0); /* * A DEVICE_ID string may include the printer's serial number. * It should end with a semi-colon (';'). * An example from an HP 970C DeskJet printer is (this is one long string, * with the serial number changed): MFG:HEWLETT-PACKARD;MDL:DESKJET 970C;CMD:MLC,PCL,PML;CLASS:PRINTER;DESCRIPTION:Hewlett-Packard DeskJet 970C;SERN:US970CSEPROF;VSTATUS:$HB0$NC0,ff,DN,IDLE,CUT,K1,C0,DP,NR,KP000,CP027;VP:0800,FL,B0;VJ: ; */ /* * USB Printer Requests */ #define USBLP_REQ_GET_ID 0x00 #define USBLP_REQ_GET_STATUS 0x01 #define USBLP_REQ_RESET 0x02 #define USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST 0x00 /* HP Vendor-specific */ #define USBLP_MINORS 16 #define USBLP_MINOR_BASE 0 #define USBLP_CTL_TIMEOUT 5000 /* 5 seconds */ #define USBLP_FIRST_PROTOCOL 1 #define USBLP_LAST_PROTOCOL 3 #define USBLP_MAX_PROTOCOLS (USBLP_LAST_PROTOCOL+1) /* * some arbitrary status buffer size; * need a status buffer that is allocated via kmalloc(), not on stack */ #define STATUS_BUF_SIZE 8 /* * Locks down the locking order: * ->wmut locks wstatus. * ->mut locks the whole usblp, except [rw]complete, and thus, by indirection, * [rw]status. We only touch status when we know the side idle. * ->lock locks what interrupt accesses. */ struct usblp { struct usb_device *dev; /* USB device */ struct mutex wmut; struct mutex mut; spinlock_t lock; /* locks rcomplete, wcomplete */ char *readbuf; /* read transfer_buffer */ char *statusbuf; /* status transfer_buffer */ struct usb_anchor urbs; wait_queue_head_t rwait, wwait; int readcount; /* Counter for reads */ int ifnum; /* Interface number */ struct usb_interface *intf; /* The interface */ /* * Alternate-setting numbers and endpoints for each protocol * (USB_CLASS_PRINTER/1/{index=1,2,3}) that the device supports: */ struct { int alt_setting; struct usb_endpoint_descriptor *epwrite; struct usb_endpoint_descriptor *epread; } protocol[USBLP_MAX_PROTOCOLS]; int current_protocol; int minor; /* minor number of device */ int wcomplete, rcomplete; int wstatus; /* bytes written or error */ int rstatus; /* bytes ready or error */ unsigned int quirks; /* quirks flags */ unsigned int flags; /* mode flags */ unsigned char used; /* True if open */ unsigned char present; /* True if not disconnected */ unsigned char bidir; /* interface is bidirectional */ unsigned char no_paper; /* Paper Out happened */ unsigned char *device_id_string; /* IEEE 1284 DEVICE ID string (ptr) */ /* first 2 bytes are (big-endian) length */ }; #ifdef DEBUG static void usblp_dump(struct usblp *usblp) { struct device *dev = &usblp->intf->dev; int p; dev_dbg(dev, "usblp=0x%p\n", usblp); dev_dbg(dev, "dev=0x%p\n", usblp->dev); dev_dbg(dev, "present=%d\n", usblp->present); dev_dbg(dev, "readbuf=0x%p\n", usblp->readbuf); dev_dbg(dev, "readcount=%d\n", usblp->readcount); dev_dbg(dev, "ifnum=%d\n", usblp->ifnum); for (p = USBLP_FIRST_PROTOCOL; p <= USBLP_LAST_PROTOCOL; p++) { dev_dbg(dev, "protocol[%d].alt_setting=%d\n", p, usblp->protocol[p].alt_setting); dev_dbg(dev, "protocol[%d].epwrite=%p\n", p, usblp->protocol[p].epwrite); dev_dbg(dev, "protocol[%d].epread=%p\n", p, usblp->protocol[p].epread); } dev_dbg(dev, "current_protocol=%d\n", usblp->current_protocol); dev_dbg(dev, "minor=%d\n", usblp->minor); dev_dbg(dev, "wstatus=%d\n", usblp->wstatus); dev_dbg(dev, "rstatus=%d\n", usblp->rstatus); dev_dbg(dev, "quirks=%d\n", usblp->quirks); dev_dbg(dev, "used=%d\n", usblp->used); dev_dbg(dev, "bidir=%d\n", usblp->bidir); dev_dbg(dev, "device_id_string=\"%s\"\n", usblp->device_id_string ? usblp->device_id_string + 2 : (unsigned char *)"(null)"); } #endif /* Quirks: various printer quirks are handled by this table & its flags. */ struct quirk_printer_struct { __u16 vendorId; __u16 productId; unsigned int quirks; }; #define USBLP_QUIRK_BIDIR 0x1 /* reports bidir but requires unidirectional mode (no INs/reads) */ #define USBLP_QUIRK_USB_INIT 0x2 /* needs vendor USB init string */ #define USBLP_QUIRK_BAD_CLASS 0x4 /* descriptor uses vendor-specific Class or SubClass */ static const struct quirk_printer_struct quirk_printers[] = { { 0x03f0, 0x0004, USBLP_QUIRK_BIDIR }, /* HP DeskJet 895C */ { 0x03f0, 0x0104, USBLP_QUIRK_BIDIR }, /* HP DeskJet 880C */ { 0x03f0, 0x0204, USBLP_QUIRK_BIDIR }, /* HP DeskJet 815C */ { 0x03f0, 0x0304, USBLP_QUIRK_BIDIR }, /* HP DeskJet 810C/812C */ { 0x03f0, 0x0404, USBLP_QUIRK_BIDIR }, /* HP DeskJet 830C */ { 0x03f0, 0x0504, USBLP_QUIRK_BIDIR }, /* HP DeskJet 885C */ { 0x03f0, 0x0604, USBLP_QUIRK_BIDIR }, /* HP DeskJet 840C */ { 0x03f0, 0x0804, USBLP_QUIRK_BIDIR }, /* HP DeskJet 816C */ { 0x03f0, 0x1104, USBLP_QUIRK_BIDIR }, /* HP Deskjet 959C */ { 0x0409, 0xefbe, USBLP_QUIRK_BIDIR }, /* NEC Picty900 (HP OEM) */ { 0x0409, 0xbef4, USBLP_QUIRK_BIDIR }, /* NEC Picty760 (HP OEM) */ { 0x0409, 0xf0be, USBLP_QUIRK_BIDIR }, /* NEC Picty920 (HP OEM) */ { 0x0409, 0xf1be, USBLP_QUIRK_BIDIR }, /* NEC Picty800 (HP OEM) */ { 0x0482, 0x0010, USBLP_QUIRK_BIDIR }, /* Kyocera Mita FS 820, by zut <kernel@zut.de> */ { 0x04f9, 0x000d, USBLP_QUIRK_BIDIR }, /* Brother Industries, Ltd HL-1440 Laser Printer */ { 0x04b8, 0x0202, USBLP_QUIRK_BAD_CLASS }, /* Seiko Epson Receipt Printer M129C */ { 0, 0 } }; static int usblp_wwait(struct usblp *usblp, int nonblock); static int usblp_wtest(struct usblp *usblp, int nonblock); static int usblp_rwait_and_lock(struct usblp *usblp, int nonblock); static int usblp_rtest(struct usblp *usblp, int nonblock); static int usblp_submit_read(struct usblp *usblp); static int usblp_select_alts(struct usblp *usblp); static int usblp_set_protocol(struct usblp *usblp, int protocol); static int usblp_cache_device_id_string(struct usblp *usblp); /* forward reference to make our lives easier */ static struct usb_driver usblp_driver; static DEFINE_MUTEX(usblp_mutex); /* locks the existence of usblp's */ /* * Functions for usblp control messages. */ static int usblp_ctrl_msg(struct usblp *usblp, int request, int type, int dir, int recip, int value, void *buf, int len) { int retval; int index = usblp->ifnum; /* High byte has the interface index. Low byte has the alternate setting. */ if ((request == USBLP_REQ_GET_ID) && (type == USB_TYPE_CLASS)) index = (usblp->ifnum<<8)|usblp->protocol[usblp->current_protocol].alt_setting; retval = usb_control_msg(usblp->dev, dir ? usb_rcvctrlpipe(usblp->dev, 0) : usb_sndctrlpipe(usblp->dev, 0), request, type | dir | recip, value, index, buf, len, USBLP_CTL_TIMEOUT); dev_dbg(&usblp->intf->dev, "usblp_control_msg: rq: 0x%02x dir: %d recip: %d value: %d idx: %d len: %#x result: %d\n", request, !!dir, recip, value, index, len, retval); return retval < 0 ? retval : 0; } #define usblp_read_status(usblp, status)\ usblp_ctrl_msg(usblp, USBLP_REQ_GET_STATUS, USB_TYPE_CLASS, USB_DIR_IN, USB_RECIP_INTERFACE, 0, status, 1) #define usblp_get_id(usblp, config, id, maxlen)\ usblp_ctrl_msg(usblp, USBLP_REQ_GET_ID, USB_TYPE_CLASS, USB_DIR_IN, USB_RECIP_INTERFACE, config, id, maxlen) #define usblp_reset(usblp)\ usblp_ctrl_msg(usblp, USBLP_REQ_RESET, USB_TYPE_CLASS, USB_DIR_OUT, USB_RECIP_OTHER, 0, NULL, 0) static int usblp_hp_channel_change_request(struct usblp *usblp, int channel, u8 *new_channel) { u8 *buf; int ret; buf = kzalloc(1, GFP_KERNEL); if (!buf) return -ENOMEM; ret = usblp_ctrl_msg(usblp, USBLP_REQ_HP_CHANNEL_CHANGE_REQUEST, USB_TYPE_VENDOR, USB_DIR_IN, USB_RECIP_INTERFACE, channel, buf, 1); if (ret == 0) *new_channel = buf[0]; kfree(buf); return ret; } /* * See the description for usblp_select_alts() below for the usage * explanation. Look into your /sys/kernel/debug/usb/devices and dmesg in * case of any trouble. */ static int proto_bias = -1; /* * URB callback. */ static void usblp_bulk_read(struct urb *urb) { struct usblp *usblp = urb->context; int status = urb->status; unsigned long flags; if (usblp->present && usblp->used) { if (status) printk(KERN_WARNING "usblp%d: " "nonzero read bulk status received: %d\n", usblp->minor, status); } spin_lock_irqsave(&usblp->lock, flags); if (status < 0) usblp->rstatus = status; else usblp->rstatus = urb->actual_length; usblp->rcomplete = 1; wake_up(&usblp->rwait); spin_unlock_irqrestore(&usblp->lock, flags); usb_free_urb(urb); } static void usblp_bulk_write(struct urb *urb) { struct usblp *usblp = urb->context; int status = urb->status; unsigned long flags; if (usblp->present && usblp->used) { if (status) printk(KERN_WARNING "usblp%d: " "nonzero write bulk status received: %d\n", usblp->minor, status); } spin_lock_irqsave(&usblp->lock, flags); if (status < 0) usblp->wstatus = status; else usblp->wstatus = urb->actual_length; usblp->no_paper = 0; usblp->wcomplete = 1; wake_up(&usblp->wwait); spin_unlock_irqrestore(&usblp->lock, flags); usb_free_urb(urb); } /* * Get and print printer errors. */ static const char *usblp_messages[] = { "ok", "out of paper", "off-line", "on fire" }; static int usblp_check_status(struct usblp *usblp, int err) { unsigned char status, newerr = 0; int error; mutex_lock(&usblp->mut); if ((error = usblp_read_status(usblp, usblp->statusbuf)) < 0) { mutex_unlock(&usblp->mut); printk_ratelimited(KERN_ERR "usblp%d: error %d reading printer status\n", usblp->minor, error); return 0; } status = *usblp->statusbuf; mutex_unlock(&usblp->mut); if (~status & LP_PERRORP) newerr = 3; if (status & LP_POUTPA) newerr = 1; if (~status & LP_PSELECD) newerr = 2; if (newerr != err) { printk(KERN_INFO "usblp%d: %s\n", usblp->minor, usblp_messages[newerr]); } return newerr; } static int handle_bidir(struct usblp *usblp) { if (usblp->bidir && usblp->used) { if (usblp_submit_read(usblp) < 0) return -EIO; } return 0; } /* * File op functions. */ static int usblp_open(struct inode *inode, struct file *file) { int minor = iminor(inode); struct usblp *usblp; struct usb_interface *intf; int retval; if (minor < 0) return -ENODEV; mutex_lock(&usblp_mutex); retval = -ENODEV; intf = usb_find_interface(&usblp_driver, minor); if (!intf) goto out; usblp = usb_get_intfdata(intf); if (!usblp || !usblp->dev || !usblp->present) goto out; retval = -EBUSY; if (usblp->used) goto out; /* * We do not implement LP_ABORTOPEN/LPABORTOPEN for two reasons: * - We do not want persistent state which close(2) does not clear * - It is not used anyway, according to CUPS people */ retval = usb_autopm_get_interface(intf); if (retval < 0) goto out; usblp->used = 1; file->private_data = usblp; usblp->wcomplete = 1; /* we begin writeable */ usblp->wstatus = 0; usblp->rcomplete = 0; if (handle_bidir(usblp) < 0) { usb_autopm_put_interface(intf); usblp->used = 0; file->private_data = NULL; retval = -EIO; } out: mutex_unlock(&usblp_mutex); return retval; } static void usblp_cleanup(struct usblp *usblp) { printk(KERN_INFO "usblp%d: removed\n", usblp->minor); kfree(usblp->readbuf); kfree(usblp->device_id_string); kfree(usblp->statusbuf); usb_put_intf(usblp->intf); kfree(usblp); } static void usblp_unlink_urbs(struct usblp *usblp) { usb_kill_anchored_urbs(&usblp->urbs); } static int usblp_release(struct inode *inode, struct file *file) { struct usblp *usblp = file->private_data; usblp->flags &= ~LP_ABORT; mutex_lock(&usblp_mutex); usblp->used = 0; if (usblp->present) usblp_unlink_urbs(usblp); usb_autopm_put_interface(usblp->intf); if (!usblp->present) /* finish cleanup from disconnect */ usblp_cleanup(usblp); /* any URBs must be dead */ mutex_unlock(&usblp_mutex); return 0; } /* No kernel lock - fine */ static __poll_t usblp_poll(struct file *file, struct poll_table_struct *wait) { struct usblp *usblp = file->private_data; __poll_t ret = 0; unsigned long flags; /* Should we check file->f_mode & FMODE_WRITE before poll_wait()? */ poll_wait(file, &usblp->rwait, wait); poll_wait(file, &usblp->wwait, wait); mutex_lock(&usblp->mut); if (!usblp->present) ret |= EPOLLHUP; mutex_unlock(&usblp->mut); spin_lock_irqsave(&usblp->lock, flags); if (usblp->bidir && usblp->rcomplete) ret |= EPOLLIN | EPOLLRDNORM; if (usblp->no_paper || usblp->wcomplete) ret |= EPOLLOUT | EPOLLWRNORM; spin_unlock_irqrestore(&usblp->lock, flags); return ret; } static long usblp_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct usblp *usblp = file->private_data; int length, err, i; unsigned char newChannel; int status; int twoints[2]; int retval = 0; mutex_lock(&usblp->mut); if (!usblp->present) { retval = -ENODEV; goto done; } dev_dbg(&usblp->intf->dev, "usblp_ioctl: cmd=0x%x (%c nr=%d len=%d dir=%d)\n", cmd, _IOC_TYPE(cmd), _IOC_NR(cmd), _IOC_SIZE(cmd), _IOC_DIR(cmd)); if (_IOC_TYPE(cmd) == 'P') /* new-style ioctl number */ switch (_IOC_NR(cmd)) { case IOCNR_GET_DEVICE_ID: /* get the DEVICE_ID string */ if (_IOC_DIR(cmd) != _IOC_READ) { retval = -EINVAL; goto done; } length = usblp_cache_device_id_string(usblp); if (length < 0) { retval = length; goto done; } if (length > _IOC_SIZE(cmd)) length = _IOC_SIZE(cmd); /* truncate */ if (copy_to_user((void __user *) arg, usblp->device_id_string, (unsigned long) length)) { retval = -EFAULT; goto done; } break; case IOCNR_GET_PROTOCOLS: if (_IOC_DIR(cmd) != _IOC_READ || _IOC_SIZE(cmd) < sizeof(twoints)) { retval = -EINVAL; goto done; } twoints[0] = usblp->current_protocol; twoints[1] = 0; for (i = USBLP_FIRST_PROTOCOL; i <= USBLP_LAST_PROTOCOL; i++) { if (usblp->protocol[i].alt_setting >= 0) twoints[1] |= (1<<i); } if (copy_to_user((void __user *)arg, (unsigned char *)twoints, sizeof(twoints))) { retval = -EFAULT; goto done; } break; case IOCNR_SET_PROTOCOL: if (_IOC_DIR(cmd) != _IOC_WRITE) { retval = -EINVAL; goto done; } #ifdef DEBUG if (arg == -10) { usblp_dump(usblp); break; } #endif usblp_unlink_urbs(usblp); retval = usblp_set_protocol(usblp, arg); if (retval < 0) { usblp_set_protocol(usblp, usblp->current_protocol); } break; case IOCNR_HP_SET_CHANNEL: if (_IOC_DIR(cmd) != _IOC_WRITE || le16_to_cpu(usblp->dev->descriptor.idVendor) != 0x03F0 || usblp->quirks & USBLP_QUIRK_BIDIR) { retval = -EINVAL; goto done; } err = usblp_hp_channel_change_request(usblp, arg, &newChannel); if (err < 0) { dev_err(&usblp->dev->dev, "usblp%d: error = %d setting " "HP channel\n", usblp->minor, err); retval = -EIO; goto done; } dev_dbg(&usblp->intf->dev, "usblp%d requested/got HP channel %ld/%d\n", usblp->minor, arg, newChannel); break; case IOCNR_GET_BUS_ADDRESS: if (_IOC_DIR(cmd) != _IOC_READ || _IOC_SIZE(cmd) < sizeof(twoints)) { retval = -EINVAL; goto done; } twoints[0] = usblp->dev->bus->busnum; twoints[1] = usblp->dev->devnum; if (copy_to_user((void __user *)arg, (unsigned char *)twoints, sizeof(twoints))) { retval = -EFAULT; goto done; } dev_dbg(&usblp->intf->dev, "usblp%d is bus=%d, device=%d\n", usblp->minor, twoints[0], twoints[1]); break; case IOCNR_GET_VID_PID: if (_IOC_DIR(cmd) != _IOC_READ || _IOC_SIZE(cmd) < sizeof(twoints)) { retval = -EINVAL; goto done; } twoints[0] = le16_to_cpu(usblp->dev->descriptor.idVendor); twoints[1] = le16_to_cpu(usblp->dev->descriptor.idProduct); if (copy_to_user((void __user *)arg, (unsigned char *)twoints, sizeof(twoints))) { retval = -EFAULT; goto done; } dev_dbg(&usblp->intf->dev, "usblp%d is VID=0x%4.4X, PID=0x%4.4X\n", usblp->minor, twoints[0], twoints[1]); break; case IOCNR_SOFT_RESET: if (_IOC_DIR(cmd) != _IOC_NONE) { retval = -EINVAL; goto done; } retval = usblp_reset(usblp); break; default: retval = -ENOTTY; } else /* old-style ioctl value */ switch (cmd) { case LPGETSTATUS: retval = usblp_read_status(usblp, usblp->statusbuf); if (retval) { printk_ratelimited(KERN_ERR "usblp%d:" "failed reading printer status (%d)\n", usblp->minor, retval); retval = -EIO; goto done; } status = *usblp->statusbuf; if (copy_to_user((void __user *)arg, &status, sizeof(int))) retval = -EFAULT; break; case LPABORT: if (arg) usblp->flags |= LP_ABORT; else usblp->flags &= ~LP_ABORT; break; default: retval = -ENOTTY; } done: mutex_unlock(&usblp->mut); return retval; } static struct urb *usblp_new_writeurb(struct usblp *usblp, int transfer_length) { struct urb *urb; char *writebuf; writebuf = kmalloc(transfer_length, GFP_KERNEL); if (writebuf == NULL) return NULL; urb = usb_alloc_urb(0, GFP_KERNEL); if (urb == NULL) { kfree(writebuf); return NULL; } usb_fill_bulk_urb(urb, usblp->dev, usb_sndbulkpipe(usblp->dev, usblp->protocol[usblp->current_protocol].epwrite->bEndpointAddress), writebuf, transfer_length, usblp_bulk_write, usblp); urb->transfer_flags |= URB_FREE_BUFFER; return urb; } static ssize_t usblp_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { struct usblp *usblp = file->private_data; struct urb *writeurb; int rv; int transfer_length; ssize_t writecount = 0; if (mutex_lock_interruptible(&usblp->wmut)) { rv = -EINTR; goto raise_biglock; } if ((rv = usblp_wwait(usblp, !!(file->f_flags & O_NONBLOCK))) < 0) goto raise_wait; while (writecount < count) { /* * Step 1: Submit next block. */ if ((transfer_length = count - writecount) > USBLP_BUF_SIZE) transfer_length = USBLP_BUF_SIZE; rv = -ENOMEM; writeurb = usblp_new_writeurb(usblp, transfer_length); if (writeurb == NULL) goto raise_urb; usb_anchor_urb(writeurb, &usblp->urbs); if (copy_from_user(writeurb->transfer_buffer, buffer + writecount, transfer_length)) { rv = -EFAULT; goto raise_badaddr; } spin_lock_irq(&usblp->lock); usblp->wcomplete = 0; spin_unlock_irq(&usblp->lock); if ((rv = usb_submit_urb(writeurb, GFP_KERNEL)) < 0) { usblp->wstatus = 0; spin_lock_irq(&usblp->lock); usblp->no_paper = 0; usblp->wcomplete = 1; wake_up(&usblp->wwait); spin_unlock_irq(&usblp->lock); if (rv != -ENOMEM) rv = -EIO; goto raise_submit; } /* * Step 2: Wait for transfer to end, collect results. */ rv = usblp_wwait(usblp, !!(file->f_flags&O_NONBLOCK)); if (rv < 0) { if (rv == -EAGAIN) { /* Presume that it's going to complete well. */ writecount += transfer_length; } if (rv == -ENOSPC) { spin_lock_irq(&usblp->lock); usblp->no_paper = 1; /* Mark for poll(2) */ spin_unlock_irq(&usblp->lock); writecount += transfer_length; } /* Leave URB dangling, to be cleaned on close. */ goto collect_error; } if (usblp->wstatus < 0) { rv = -EIO; goto collect_error; } /* * This is critical: it must be our URB, not other writer's. * The wmut exists mainly to cover us here. */ writecount += usblp->wstatus; } mutex_unlock(&usblp->wmut); return writecount; raise_submit: raise_badaddr: usb_unanchor_urb(writeurb); usb_free_urb(writeurb); raise_urb: raise_wait: collect_error: /* Out of raise sequence */ mutex_unlock(&usblp->wmut); raise_biglock: return writecount ? writecount : rv; } /* * Notice that we fail to restart in a few cases: on EFAULT, on restart * error, etc. This is the historical behaviour. In all such cases we return * EIO, and applications loop in order to get the new read going. */ static ssize_t usblp_read(struct file *file, char __user *buffer, size_t len, loff_t *ppos) { struct usblp *usblp = file->private_data; ssize_t count; ssize_t avail; int rv; if (!usblp->bidir) return -EINVAL; rv = usblp_rwait_and_lock(usblp, !!(file->f_flags & O_NONBLOCK)); if (rv < 0) return rv; if (!usblp->present) { count = -ENODEV; goto done; } if ((avail = usblp->rstatus) < 0) { printk(KERN_ERR "usblp%d: error %d reading from printer\n", usblp->minor, (int)avail); usblp_submit_read(usblp); count = -EIO; goto done; } count = len < avail - usblp->readcount ? len : avail - usblp->readcount; if (count != 0 && copy_to_user(buffer, usblp->readbuf + usblp->readcount, count)) { count = -EFAULT; goto done; } if ((usblp->readcount += count) == avail) { if (usblp_submit_read(usblp) < 0) { /* We don't want to leak USB return codes into errno. */ if (count == 0) count = -EIO; goto done; } } done: mutex_unlock(&usblp->mut); return count; } /* * Wait for the write path to come idle. * This is called under the ->wmut, so the idle path stays idle. * * Our write path has a peculiar property: it does not buffer like a tty, * but waits for the write to succeed. This allows our ->release to bug out * without waiting for writes to drain. But it obviously does not work * when O_NONBLOCK is set. So, applications setting O_NONBLOCK must use * select(2) or poll(2) to wait for the buffer to drain before closing. * Alternatively, set blocking mode with fcntl and issue a zero-size write. */ static int usblp_wwait(struct usblp *usblp, int nonblock) { DECLARE_WAITQUEUE(waita, current); int rc; int err = 0; add_wait_queue(&usblp->wwait, &waita); for (;;) { if (mutex_lock_interruptible(&usblp->mut)) { rc = -EINTR; break; } set_current_state(TASK_INTERRUPTIBLE); rc = usblp_wtest(usblp, nonblock); mutex_unlock(&usblp->mut); if (rc <= 0) break; if (schedule_timeout(msecs_to_jiffies(1500)) == 0) { if (usblp->flags & LP_ABORT) { err = usblp_check_status(usblp, err); if (err == 1) { /* Paper out */ rc = -ENOSPC; break; } } else { /* Prod the printer, Gentoo#251237. */ mutex_lock(&usblp->mut); usblp_read_status(usblp, usblp->statusbuf); mutex_unlock(&usblp->mut); } } } set_current_state(TASK_RUNNING); remove_wait_queue(&usblp->wwait, &waita); return rc; } static int usblp_wtest(struct usblp *usblp, int nonblock) { unsigned long flags; if (!usblp->present) return -ENODEV; if (signal_pending(current)) return -EINTR; spin_lock_irqsave(&usblp->lock, flags); if (usblp->wcomplete) { spin_unlock_irqrestore(&usblp->lock, flags); return 0; } spin_unlock_irqrestore(&usblp->lock, flags); if (nonblock) return -EAGAIN; return 1; } /* * Wait for read bytes to become available. This probably should have been * called usblp_r_lock_and_wait(), because we lock first. But it's a traditional * name for functions which lock and return. * * We do not use wait_event_interruptible because it makes locking iffy. */ static int usblp_rwait_and_lock(struct usblp *usblp, int nonblock) { DECLARE_WAITQUEUE(waita, current); int rc; add_wait_queue(&usblp->rwait, &waita); for (;;) { if (mutex_lock_interruptible(&usblp->mut)) { rc = -EINTR; break; } set_current_state(TASK_INTERRUPTIBLE); if ((rc = usblp_rtest(usblp, nonblock)) < 0) { mutex_unlock(&usblp->mut); break; } if (rc == 0) /* Keep it locked */ break; mutex_unlock(&usblp->mut); schedule(); } set_current_state(TASK_RUNNING); remove_wait_queue(&usblp->rwait, &waita); return rc; } static int usblp_rtest(struct usblp *usblp, int nonblock) { unsigned long flags; if (!usblp->present) return -ENODEV; if (signal_pending(current)) return -EINTR; spin_lock_irqsave(&usblp->lock, flags); if (usblp->rcomplete) { spin_unlock_irqrestore(&usblp->lock, flags); return 0; } spin_unlock_irqrestore(&usblp->lock, flags); if (nonblock) return -EAGAIN; return 1; } /* * Please check ->bidir and other such things outside for now. */ static int usblp_submit_read(struct usblp *usblp) { struct urb *urb; unsigned long flags; int rc; rc = -ENOMEM; urb = usb_alloc_urb(0, GFP_KERNEL); if (urb == NULL) goto raise_urb; usb_fill_bulk_urb(urb, usblp->dev, usb_rcvbulkpipe(usblp->dev, usblp->protocol[usblp->current_protocol].epread->bEndpointAddress), usblp->readbuf, USBLP_BUF_SIZE_IN, usblp_bulk_read, usblp); usb_anchor_urb(urb, &usblp->urbs); spin_lock_irqsave(&usblp->lock, flags); usblp->readcount = 0; /* XXX Why here? */ usblp->rcomplete = 0; spin_unlock_irqrestore(&usblp->lock, flags); if ((rc = usb_submit_urb(urb, GFP_KERNEL)) < 0) { dev_dbg(&usblp->intf->dev, "error submitting urb (%d)\n", rc); spin_lock_irqsave(&usblp->lock, flags); usblp->rstatus = rc; usblp->rcomplete = 1; spin_unlock_irqrestore(&usblp->lock, flags); goto raise_submit; } return 0; raise_submit: usb_unanchor_urb(urb); usb_free_urb(urb); raise_urb: return rc; } /* * Checks for printers that have quirks, such as requiring unidirectional * communication but reporting bidirectional; currently some HP printers * have this flaw (HP 810, 880, 895, etc.), or needing an init string * sent at each open (like some Epsons). * Returns 1 if found, 0 if not found. * * HP recommended that we use the bidirectional interface but * don't attempt any bulk IN transfers from the IN endpoint. * Here's some more detail on the problem: * The problem is not that it isn't bidirectional though. The problem * is that if you request a device ID, or status information, while * the buffers are full, the return data will end up in the print data * buffer. For example if you make sure you never request the device ID * while you are sending print data, and you don't try to query the * printer status every couple of milliseconds, you will probably be OK. */ static unsigned int usblp_quirks(__u16 vendor, __u16 product) { int i; for (i = 0; quirk_printers[i].vendorId; i++) { if (vendor == quirk_printers[i].vendorId && product == quirk_printers[i].productId) return quirk_printers[i].quirks; } return 0; } static const struct file_operations usblp_fops = { .owner = THIS_MODULE, .read = usblp_read, .write = usblp_write, .poll = usblp_poll, .unlocked_ioctl = usblp_ioctl, .compat_ioctl = usblp_ioctl, .open = usblp_open, .release = usblp_release, .llseek = noop_llseek, }; static char *usblp_devnode(const struct device *dev, umode_t *mode) { return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev)); } static struct usb_class_driver usblp_class = { .name = "lp%d", .devnode = usblp_devnode, .fops = &usblp_fops, .minor_base = USBLP_MINOR_BASE, }; static ssize_t ieee1284_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct usb_interface *intf = to_usb_interface(dev); struct usblp *usblp = usb_get_intfdata(intf); if (usblp->device_id_string[0] == 0 && usblp->device_id_string[1] == 0) return 0; return sprintf(buf, "%s", usblp->device_id_string+2); } static DEVICE_ATTR_RO(ieee1284_id); static struct attribute *usblp_attrs[] = { &dev_attr_ieee1284_id.attr, NULL, }; ATTRIBUTE_GROUPS(usblp); static int usblp_probe(struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *dev = interface_to_usbdev(intf); struct usblp *usblp; int protocol; int retval; /* Malloc and start initializing usblp structure so we can use it * directly. */ usblp = kzalloc(sizeof(struct usblp), GFP_KERNEL); if (!usblp) { retval = -ENOMEM; goto abort_ret; } usblp->dev = dev; mutex_init(&usblp->wmut); mutex_init(&usblp->mut); spin_lock_init(&usblp->lock); init_waitqueue_head(&usblp->rwait); init_waitqueue_head(&usblp->wwait); init_usb_anchor(&usblp->urbs); usblp->ifnum = intf->cur_altsetting->desc.bInterfaceNumber; usblp->intf = usb_get_intf(intf); /* Malloc device ID string buffer to the largest expected length, * since we can re-query it on an ioctl and a dynamic string * could change in length. */ if (!(usblp->device_id_string = kmalloc(USBLP_DEVICE_ID_SIZE, GFP_KERNEL))) { retval = -ENOMEM; goto abort; } /* * Allocate read buffer. We somewhat wastefully * malloc both regardless of bidirectionality, because the * alternate setting can be changed later via an ioctl. */ if (!(usblp->readbuf = kmalloc(USBLP_BUF_SIZE_IN, GFP_KERNEL))) { retval = -ENOMEM; goto abort; } /* Allocate buffer for printer status */ usblp->statusbuf = kmalloc(STATUS_BUF_SIZE, GFP_KERNEL); if (!usblp->statusbuf) { retval = -ENOMEM; goto abort; } /* Lookup quirks for this printer. */ usblp->quirks = usblp_quirks( le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); /* Analyze and pick initial alternate settings and endpoints. */ protocol = usblp_select_alts(usblp); if (protocol < 0) { dev_dbg(&intf->dev, "incompatible printer-class device 0x%4.4X/0x%4.4X\n", le16_to_cpu(dev->descriptor.idVendor), le16_to_cpu(dev->descriptor.idProduct)); retval = -ENODEV; goto abort; } /* Setup the selected alternate setting and endpoints. */ if (usblp_set_protocol(usblp, protocol) < 0) { retval = -ENODEV; /* ->probe isn't ->ioctl */ goto abort; } /* Retrieve and store the device ID string. */ usblp_cache_device_id_string(usblp); #ifdef DEBUG usblp_check_status(usblp, 0); #endif usb_set_intfdata(intf, usblp); usblp->present = 1; retval = usb_register_dev(intf, &usblp_class); if (retval) { dev_err(&intf->dev, "usblp: Not able to get a minor (base %u, slice default): %d\n", USBLP_MINOR_BASE, retval); goto abort_intfdata; } usblp->minor = intf->minor; dev_info(&intf->dev, "usblp%d: USB %sdirectional printer dev %d if %d alt %d proto %d vid 0x%4.4X pid 0x%4.4X\n", usblp->minor, usblp->bidir ? "Bi" : "Uni", dev->devnum, usblp->ifnum, usblp->protocol[usblp->current_protocol].alt_setting, usblp->current_protocol, le16_to_cpu(usblp->dev->descriptor.idVendor), le16_to_cpu(usblp->dev->descriptor.idProduct)); return 0; abort_intfdata: usb_set_intfdata(intf, NULL); abort: kfree(usblp->readbuf); kfree(usblp->statusbuf); kfree(usblp->device_id_string); usb_put_intf(usblp->intf); kfree(usblp); abort_ret: return retval; } /* * We are a "new" style driver with usb_device_id table, * but our requirements are too intricate for simple match to handle. * * The "proto_bias" option may be used to specify the preferred protocol * for all USB printers (1=USB_CLASS_PRINTER/1/1, 2=USB_CLASS_PRINTER/1/2, * 3=USB_CLASS_PRINTER/1/3). If the device supports the preferred protocol, * then we bind to it. * * The best interface for us is USB_CLASS_PRINTER/1/2, because it * is compatible with a stream of characters. If we find it, we bind to it. * * Note that the people from hpoj.sourceforge.net need to be able to * bind to USB_CLASS_PRINTER/1/3 (MLC/1284.4), so we provide them ioctls * for this purpose. * * Failing USB_CLASS_PRINTER/1/2, we look for USB_CLASS_PRINTER/1/3, * even though it's probably not stream-compatible, because this matches * the behaviour of the old code. * * If nothing else, we bind to USB_CLASS_PRINTER/1/1 * - the unidirectional interface. */ static int usblp_select_alts(struct usblp *usblp) { struct usb_interface *if_alt; struct usb_host_interface *ifd; struct usb_endpoint_descriptor *epwrite, *epread; int p, i; int res; if_alt = usblp->intf; for (p = 0; p < USBLP_MAX_PROTOCOLS; p++) usblp->protocol[p].alt_setting = -1; /* Find out what we have. */ for (i = 0; i < if_alt->num_altsetting; i++) { ifd = &if_alt->altsetting[i]; if (ifd->desc.bInterfaceClass != USB_CLASS_PRINTER || ifd->desc.bInterfaceSubClass != 1) if (!(usblp->quirks & USBLP_QUIRK_BAD_CLASS)) continue; if (ifd->desc.bInterfaceProtocol < USBLP_FIRST_PROTOCOL || ifd->desc.bInterfaceProtocol > USBLP_LAST_PROTOCOL) continue; /* Look for the expected bulk endpoints. */ if (ifd->desc.bInterfaceProtocol > 1) { res = usb_find_common_endpoints(ifd, &epread, &epwrite, NULL, NULL); } else { epread = NULL; res = usb_find_bulk_out_endpoint(ifd, &epwrite); } /* Ignore buggy hardware without the right endpoints. */ if (res) continue; /* Turn off reads for buggy bidirectional printers. */ if (usblp->quirks & USBLP_QUIRK_BIDIR) { printk(KERN_INFO "usblp%d: Disabling reads from " "problematic bidirectional printer\n", usblp->minor); epread = NULL; } usblp->protocol[ifd->desc.bInterfaceProtocol].alt_setting = ifd->desc.bAlternateSetting; usblp->protocol[ifd->desc.bInterfaceProtocol].epwrite = epwrite; usblp->protocol[ifd->desc.bInterfaceProtocol].epread = epread; } /* If our requested protocol is supported, then use it. */ if (proto_bias >= USBLP_FIRST_PROTOCOL && proto_bias <= USBLP_LAST_PROTOCOL && usblp->protocol[proto_bias].alt_setting != -1) return proto_bias; /* Ordering is important here. */ if (usblp->protocol[2].alt_setting != -1) return 2; if (usblp->protocol[1].alt_setting != -1) return 1; if (usblp->protocol[3].alt_setting != -1) return 3; /* If nothing is available, then don't bind to this device. */ return -1; } static int usblp_set_protocol(struct usblp *usblp, int protocol) { int r, alts; if (protocol < USBLP_FIRST_PROTOCOL || protocol > USBLP_LAST_PROTOCOL) return -EINVAL; /* Don't unnecessarily set the interface if there's a single alt. */ if (usblp->intf->num_altsetting > 1) { alts = usblp->protocol[protocol].alt_setting; if (alts < 0) return -EINVAL; r = usb_set_interface(usblp->dev, usblp->ifnum, alts); if (r < 0) { printk(KERN_ERR "usblp: can't set desired altsetting %d on interface %d\n", alts, usblp->ifnum); return r; } } usblp->bidir = (usblp->protocol[protocol].epread != NULL); usblp->current_protocol = protocol; dev_dbg(&usblp->intf->dev, "usblp%d set protocol %d\n", usblp->minor, protocol); return 0; } /* Retrieves and caches device ID string. * Returns length, including length bytes but not null terminator. * On error, returns a negative errno value. */ static int usblp_cache_device_id_string(struct usblp *usblp) { int err, length; err = usblp_get_id(usblp, 0, usblp->device_id_string, USBLP_DEVICE_ID_SIZE - 1); if (err < 0) { dev_dbg(&usblp->intf->dev, "usblp%d: error = %d reading IEEE-1284 Device ID string\n", usblp->minor, err); usblp->device_id_string[0] = usblp->device_id_string[1] = '\0'; return -EIO; } /* First two bytes are length in big-endian. * They count themselves, and we copy them into * the user's buffer. */ length = be16_to_cpu(*((__be16 *)usblp->device_id_string)); if (length < 2) length = 2; else if (length >= USBLP_DEVICE_ID_SIZE) length = USBLP_DEVICE_ID_SIZE - 1; usblp->device_id_string[length] = '\0'; dev_dbg(&usblp->intf->dev, "usblp%d Device ID string [len=%d]=\"%s\"\n", usblp->minor, length, &usblp->device_id_string[2]); return length; } static void usblp_disconnect(struct usb_interface *intf) { struct usblp *usblp = usb_get_intfdata(intf); usb_deregister_dev(intf, &usblp_class); if (!usblp || !usblp->dev) { dev_err(&intf->dev, "bogus disconnect\n"); BUG(); } mutex_lock(&usblp_mutex); mutex_lock(&usblp->mut); usblp->present = 0; wake_up(&usblp->wwait); wake_up(&usblp->rwait); usb_set_intfdata(intf, NULL); usblp_unlink_urbs(usblp); mutex_unlock(&usblp->mut); usb_poison_anchored_urbs(&usblp->urbs); if (!usblp->used) usblp_cleanup(usblp); mutex_unlock(&usblp_mutex); } static int usblp_suspend(struct usb_interface *intf, pm_message_t message) { struct usblp *usblp = usb_get_intfdata(intf); usblp_unlink_urbs(usblp); #if 0 /* XXX Do we want this? What if someone is reading, should we fail? */ /* not strictly necessary, but just in case */ wake_up(&usblp->wwait); wake_up(&usblp->rwait); #endif return 0; } static int usblp_resume(struct usb_interface *intf) { struct usblp *usblp = usb_get_intfdata(intf); int r; r = handle_bidir(usblp); return r; } static const struct usb_device_id usblp_ids[] = { { USB_DEVICE_INFO(USB_CLASS_PRINTER, 1, 1) }, { USB_DEVICE_INFO(USB_CLASS_PRINTER, 1, 2) }, { USB_DEVICE_INFO(USB_CLASS_PRINTER, 1, 3) }, { USB_INTERFACE_INFO(USB_CLASS_PRINTER, 1, 1) }, { USB_INTERFACE_INFO(USB_CLASS_PRINTER, 1, 2) }, { USB_INTERFACE_INFO(USB_CLASS_PRINTER, 1, 3) }, { USB_DEVICE(0x04b8, 0x0202) }, /* Seiko Epson Receipt Printer M129C */ { } /* Terminating entry */ }; MODULE_DEVICE_TABLE(usb, usblp_ids); static struct usb_driver usblp_driver = { .name = "usblp", .probe = usblp_probe, .disconnect = usblp_disconnect, .suspend = usblp_suspend, .resume = usblp_resume, .id_table = usblp_ids, .dev_groups = usblp_groups, .supports_autosuspend = 1, }; module_usb_driver(usblp_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); module_param(proto_bias, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(proto_bias, "Favourite protocol number"); MODULE_LICENSE("GPL"); |
45 45 43 55 13 45 45 55 12 45 52 57 5 52 52 54 54 57 5 54 52 2 2 2 2 54 50 8 8 8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 | // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 1996-2000 Russell King. * * Scan ADFS partitions on hard disk drives. Unfortunately, there * isn't a standard for partitioning drives on Acorn machines, so * every single manufacturer of SCSI and IDE cards created their own * method. */ #include <linux/buffer_head.h> #include <linux/adfs_fs.h> #include "check.h" /* * Partition types. (Oh for reusability) */ #define PARTITION_RISCIX_MFM 1 #define PARTITION_RISCIX_SCSI 2 #define PARTITION_LINUX 9 #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ defined(CONFIG_ACORN_PARTITION_ADFS) static struct adfs_discrecord * adfs_partition(struct parsed_partitions *state, char *name, char *data, unsigned long first_sector, int slot) { struct adfs_discrecord *dr; unsigned int nr_sects; if (adfs_checkbblk(data)) return NULL; dr = (struct adfs_discrecord *)(data + 0x1c0); if (dr->disc_size == 0 && dr->disc_size_high == 0) return NULL; nr_sects = (le32_to_cpu(dr->disc_size_high) << 23) | (le32_to_cpu(dr->disc_size) >> 9); if (name) { strlcat(state->pp_buf, " [", PAGE_SIZE); strlcat(state->pp_buf, name, PAGE_SIZE); strlcat(state->pp_buf, "]", PAGE_SIZE); } put_partition(state, slot, first_sector, nr_sects); return dr; } #endif #ifdef CONFIG_ACORN_PARTITION_RISCIX struct riscix_part { __le32 start; __le32 length; __le32 one; char name[16]; }; struct riscix_record { __le32 magic; #define RISCIX_MAGIC cpu_to_le32(0x4a657320) __le32 date; struct riscix_part part[8]; }; #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ defined(CONFIG_ACORN_PARTITION_ADFS) static int riscix_partition(struct parsed_partitions *state, unsigned long first_sect, int slot, unsigned long nr_sects) { Sector sect; struct riscix_record *rr; rr = read_part_sector(state, first_sect, §); if (!rr) return -1; strlcat(state->pp_buf, " [RISCiX]", PAGE_SIZE); if (rr->magic == RISCIX_MAGIC) { unsigned long size = nr_sects > 2 ? 2 : nr_sects; int part; strlcat(state->pp_buf, " <", PAGE_SIZE); put_partition(state, slot++, first_sect, size); for (part = 0; part < 8; part++) { if (rr->part[part].one && memcmp(rr->part[part].name, "All\0", 4)) { put_partition(state, slot++, le32_to_cpu(rr->part[part].start), le32_to_cpu(rr->part[part].length)); strlcat(state->pp_buf, "(", PAGE_SIZE); strlcat(state->pp_buf, rr->part[part].name, PAGE_SIZE); strlcat(state->pp_buf, ")", PAGE_SIZE); } } strlcat(state->pp_buf, " >\n", PAGE_SIZE); } else { put_partition(state, slot++, first_sect, nr_sects); } put_dev_sector(sect); return slot; } #endif #endif #define LINUX_NATIVE_MAGIC 0xdeafa1de #define LINUX_SWAP_MAGIC 0xdeafab1e struct linux_part { __le32 magic; __le32 start_sect; __le32 nr_sects; }; #if defined(CONFIG_ACORN_PARTITION_CUMANA) || \ defined(CONFIG_ACORN_PARTITION_ADFS) static int linux_partition(struct parsed_partitions *state, unsigned long first_sect, int slot, unsigned long nr_sects) { Sector sect; struct linux_part *linuxp; unsigned long size = nr_sects > 2 ? 2 : nr_sects; strlcat(state->pp_buf, " [Linux]", PAGE_SIZE); put_partition(state, slot++, first_sect, size); linuxp = read_part_sector(state, first_sect, §); if (!linuxp) return -1; strlcat(state->pp_buf, " <", PAGE_SIZE); while (linuxp->magic == cpu_to_le32(LINUX_NATIVE_MAGIC) || linuxp->magic == cpu_to_le32(LINUX_SWAP_MAGIC)) { if (slot == state->limit) break; put_partition(state, slot++, first_sect + le32_to_cpu(linuxp->start_sect), le32_to_cpu(linuxp->nr_sects)); linuxp ++; } strlcat(state->pp_buf, " >", PAGE_SIZE); put_dev_sector(sect); return slot; } #endif #ifdef CONFIG_ACORN_PARTITION_CUMANA int adfspart_check_CUMANA(struct parsed_partitions *state) { unsigned long first_sector = 0; unsigned int start_blk = 0; Sector sect; unsigned char *data; char *name = "CUMANA/ADFS"; int first = 1; int slot = 1; /* * Try Cumana style partitions - sector 6 contains ADFS boot block * with pointer to next 'drive'. * * There are unknowns in this code - is the 'cylinder number' of the * next partition relative to the start of this one - I'm assuming * it is. * * Also, which ID did Cumana use? * * This is totally unfinished, and will require more work to get it * going. Hence it is totally untested. */ do { struct adfs_discrecord *dr; unsigned int nr_sects; data = read_part_sector(state, start_blk * 2 + 6, §); if (!data) return -1; if (slot == state->limit) break; dr = adfs_partition(state, name, data, first_sector, slot++); if (!dr) break; name = NULL; nr_sects = (data[0x1fd] + (data[0x1fe] << 8)) * (dr->heads + (dr->lowsector & 0x40 ? 1 : 0)) * dr->secspertrack; if (!nr_sects) break; first = 0; first_sector += nr_sects; start_blk += nr_sects >> (BLOCK_SIZE_BITS - 9); nr_sects = 0; /* hmm - should be partition size */ switch (data[0x1fc] & 15) { case 0: /* No partition / ADFS? */ break; #ifdef CONFIG_ACORN_PARTITION_RISCIX case PARTITION_RISCIX_SCSI: /* RISCiX - we don't know how to find the next one. */ slot = riscix_partition(state, first_sector, slot, nr_sects); break; #endif case PARTITION_LINUX: slot = linux_partition(state, first_sector, slot, nr_sects); break; } put_dev_sector(sect); if (slot == -1) return -1; } while (1); put_dev_sector(sect); return first ? 0 : 1; } #endif #ifdef CONFIG_ACORN_PARTITION_ADFS /* * Purpose: allocate ADFS partitions. * * Params : hd - pointer to gendisk structure to store partition info. * dev - device number to access. * * Returns: -1 on error, 0 for no ADFS boot sector, 1 for ok. * * Alloc : hda = whole drive * hda1 = ADFS partition on first drive. * hda2 = non-ADFS partition. */ int adfspart_check_ADFS(struct parsed_partitions *state) { unsigned long start_sect, nr_sects, sectscyl, heads; Sector sect; unsigned char *data; struct adfs_discrecord *dr; unsigned char id; int slot = 1; data = read_part_sector(state, 6, §); if (!data) return -1; dr = adfs_partition(state, "ADFS", data, 0, slot++); if (!dr) { put_dev_sector(sect); return 0; } heads = dr->heads + ((dr->lowsector >> 6) & 1); sectscyl = dr->secspertrack * heads; start_sect = ((data[0x1fe] << 8) + data[0x1fd]) * sectscyl; id = data[0x1fc] & 15; put_dev_sector(sect); /* * Work out start of non-adfs partition. */ nr_sects = get_capacity(state->disk) - start_sect; if (start_sect) { switch (id) { #ifdef CONFIG_ACORN_PARTITION_RISCIX case PARTITION_RISCIX_SCSI: case PARTITION_RISCIX_MFM: riscix_partition(state, start_sect, slot, nr_sects); break; #endif case PARTITION_LINUX: linux_partition(state, start_sect, slot, nr_sects); break; } } strlcat(state->pp_buf, "\n", PAGE_SIZE); return 1; } #endif #ifdef CONFIG_ACORN_PARTITION_ICS struct ics_part { __le32 start; __le32 size; }; static int adfspart_check_ICSLinux(struct parsed_partitions *state, unsigned long block) { Sector sect; unsigned char *data = read_part_sector(state, block, §); int result = 0; if (data) { if (memcmp(data, "LinuxPart", 9) == 0) result = 1; put_dev_sector(sect); } return result; } /* * Check for a valid ICS partition using the checksum. */ static inline int valid_ics_sector(const unsigned char *data) { unsigned long sum; int i; for (i = 0, sum = 0x50617274; i < 508; i++) sum += data[i]; sum -= le32_to_cpu(*(__le32 *)(&data[508])); return sum == 0; } /* * Purpose: allocate ICS partitions. * Params : hd - pointer to gendisk structure to store partition info. * dev - device number to access. * Returns: -1 on error, 0 for no ICS table, 1 for partitions ok. * Alloc : hda = whole drive * hda1 = ADFS partition 0 on first drive. * hda2 = ADFS partition 1 on first drive. * ..etc.. */ int adfspart_check_ICS(struct parsed_partitions *state) { const unsigned char *data; const struct ics_part *p; int slot; Sector sect; /* * Try ICS style partitions - sector 0 contains partition info. */ data = read_part_sector(state, 0, §); if (!data) return -1; if (!valid_ics_sector(data)) { put_dev_sector(sect); return 0; } strlcat(state->pp_buf, " [ICS]", PAGE_SIZE); for (slot = 1, p = (const struct ics_part *)data; p->size; p++) { u32 start = le32_to_cpu(p->start); s32 size = le32_to_cpu(p->size); /* yes, it's signed. */ if (slot == state->limit) break; /* * Negative sizes tell the RISC OS ICS driver to ignore * this partition - in effect it says that this does not * contain an ADFS filesystem. */ if (size < 0) { size = -size; /* * Our own extension - We use the first sector * of the partition to identify what type this * partition is. We must not make this visible * to the filesystem. */ if (size > 1 && adfspart_check_ICSLinux(state, start)) { start += 1; size -= 1; } } if (size) put_partition(state, slot++, start, size); } put_dev_sector(sect); strlcat(state->pp_buf, "\n", PAGE_SIZE); return 1; } #endif #ifdef CONFIG_ACORN_PARTITION_POWERTEC struct ptec_part { __le32 unused1; __le32 unused2; __le32 start; __le32 size; __le32 unused5; char type[8]; }; static inline int valid_ptec_sector(const unsigned char *data) { unsigned char checksum = 0x2a; int i; /* * If it looks like a PC/BIOS partition, then it * probably isn't PowerTec. */ if (data[510] == 0x55 && data[511] == 0xaa) return 0; for (i = 0; i < 511; i++) checksum += data[i]; return checksum == data[511]; } /* * Purpose: allocate ICS partitions. * Params : hd - pointer to gendisk structure to store partition info. * dev - device number to access. * Returns: -1 on error, 0 for no ICS table, 1 for partitions ok. * Alloc : hda = whole drive * hda1 = ADFS partition 0 on first drive. * hda2 = ADFS partition 1 on first drive. * ..etc.. */ int adfspart_check_POWERTEC(struct parsed_partitions *state) { Sector sect; const unsigned char *data; const struct ptec_part *p; int slot = 1; int i; data = read_part_sector(state, 0, §); if (!data) return -1; if (!valid_ptec_sector(data)) { put_dev_sector(sect); return 0; } strlcat(state->pp_buf, " [POWERTEC]", PAGE_SIZE); for (i = 0, p = (const struct ptec_part *)data; i < 12; i++, p++) { u32 start = le32_to_cpu(p->start); u32 size = le32_to_cpu(p->size); if (size) put_partition(state, slot++, start, size); } put_dev_sector(sect); strlcat(state->pp_buf, "\n", PAGE_SIZE); return 1; } #endif #ifdef CONFIG_ACORN_PARTITION_EESOX struct eesox_part { char magic[6]; char name[10]; __le32 start; __le32 unused6; __le32 unused7; __le32 unused8; }; /* * Guess who created this format? */ static const char eesox_name[] = { 'N', 'e', 'i', 'l', ' ', 'C', 'r', 'i', 't', 'c', 'h', 'e', 'l', 'l', ' ', ' ' }; /* * EESOX SCSI partition format. * * This is a goddamned awful partition format. We don't seem to store * the size of the partition in this table, only the start addresses. * * There are two possibilities where the size comes from: * 1. The individual ADFS boot block entries that are placed on the disk. * 2. The start address of the next entry. */ int adfspart_check_EESOX(struct parsed_partitions *state) { Sector sect; const unsigned char *data; unsigned char buffer[256]; struct eesox_part *p; sector_t start = 0; int i, slot = 1; data = read_part_sector(state, 7, §); if (!data) return -1; /* * "Decrypt" the partition table. God knows why... */ for (i = 0; i < 256; i++) buffer[i] = data[i] ^ eesox_name[i & 15]; put_dev_sector(sect); for (i = 0, p = (struct eesox_part *)buffer; i < 8; i++, p++) { sector_t next; if (memcmp(p->magic, "Eesox", 6)) break; next = le32_to_cpu(p->start); if (i) put_partition(state, slot++, start, next - start); start = next; } if (i != 0) { sector_t size; size = get_capacity(state->disk); put_partition(state, slot++, start, size - start); strlcat(state->pp_buf, "\n", PAGE_SIZE); } return i ? 1 : 0; } #endif |
3088 3085 3084 3021 3026 3021 3030 3038 3034 3035 81 81 3027 3032 3034 3035 3033 3031 3025 3028 2627 2630 2981 2978 2982 2981 2982 3267 3269 462 3229 13 13 3283 1671 414 2991 415 919 922 3967 3240 790 4406 836 3093 4390 3092 2841 4390 4088 3368 3372 3370 2680 3484 4396 3025 2946 3050 3518 1395 12 3117 3082 37 3026 2982 3900 2993 2189 2116 3393 3901 3901 3901 2986 3905 3273 3270 3270 1728 3117 2972 1588 3901 3907 3901 2981 3403 3075 3072 2991 2991 3978 3982 901 1300 2839 1303 3900 901 1302 617 1012 1006 1303 2994 7 2970 1586 1584 3276 2983 1588 1301 3901 3901 2990 905 1347 2189 2192 1591 1591 308 1299 1587 3866 3052 3053 2975 3902 3900 3902 3898 3898 3897 3897 7 3901 979 3075 3 3884 3053 3900 3898 980 3067 2990 2192 3 2986 4309 981 3574 385 3585 3271 3272 3267 3276 3271 384 2993 3265 2975 2981 2968 1587 3272 3275 3271 2994 3008 2983 2 142 3367 1538 3070 3074 618 1183 713 1384 138 138 2 2 5 1 1 5 5 825 825 910 903 900 35 36 960 964 27 27 27 27 96 3184 3120 3222 3228 3218 3158 3164 2 23 1875 2751 2338 3059 14 14 2951 2953 2951 587 586 36 36 1105 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 5040 5041 5042 5043 5044 5045 5046 5047 5048 5049 5050 5051 5052 5053 5054 5055 5056 5057 5058 5059 5060 5061 5062 5063 5064 5065 5066 5067 5068 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 5112 5113 5114 5115 5116 5117 5118 5119 5120 5121 5122 5123 5124 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 5158 5159 5160 5161 5162 5163 5164 5165 5166 5167 5168 5169 5170 5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182 5183 5184 5185 5186 5187 5188 5189 5190 5191 5192 5193 5194 5195 5196 5197 5198 5199 5200 5201 5202 5203 5204 5205 5206 5207 5208 5209 5210 5211 5212 5213 5214 5215 5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226 5227 5228 5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240 5241 5242 5243 5244 5245 5246 5247 5248 5249 5250 5251 5252 5253 5254 5255 5256 5257 5258 5259 5260 5261 5262 5263 5264 5265 5266 5267 5268 5269 5270 5271 5272 5273 5274 5275 5276 5277 5278 5279 5280 5281 | // SPDX-License-Identifier: GPL-2.0 /* * drivers/base/core.c - core driver model code (device registration, etc) * * Copyright (c) 2002-3 Patrick Mochel * Copyright (c) 2002-3 Open Source Development Labs * Copyright (c) 2006 Greg Kroah-Hartman <gregkh@suse.de> * Copyright (c) 2006 Novell, Inc. */ #include <linux/acpi.h> #include <linux/blkdev.h> #include <linux/cleanup.h> #include <linux/cpufreq.h> #include <linux/device.h> #include <linux/dma-map-ops.h> /* for dma_default_coherent */ #include <linux/err.h> #include <linux/fwnode.h> #include <linux/init.h> #include <linux/kdev_t.h> #include <linux/kstrtox.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/of.h> #include <linux/of_device.h> #include <linux/pm_runtime.h> #include <linux/sched/mm.h> #include <linux/sched/signal.h> #include <linux/slab.h> #include <linux/string_helpers.h> #include <linux/swiotlb.h> #include <linux/sysfs.h> #include "base.h" #include "physical_location.h" #include "power/power.h" /* Device links support. */ static LIST_HEAD(deferred_sync); static unsigned int defer_sync_state_count = 1; static DEFINE_MUTEX(fwnode_link_lock); static bool fw_devlink_is_permissive(void); static void __fw_devlink_link_to_consumers(struct device *dev); static bool fw_devlink_drv_reg_done; static bool fw_devlink_best_effort; static struct workqueue_struct *device_link_wq; /** * __fwnode_link_add - Create a link between two fwnode_handles. * @con: Consumer end of the link. * @sup: Supplier end of the link. * @flags: Link flags. * * Create a fwnode link between fwnode handles @con and @sup. The fwnode link * represents the detail that the firmware lists @sup fwnode as supplying a * resource to @con. * * The driver core will use the fwnode link to create a device link between the * two device objects corresponding to @con and @sup when they are created. The * driver core will automatically delete the fwnode link between @con and @sup * after doing that. * * Attempts to create duplicate links between the same pair of fwnode handles * are ignored and there is no reference counting. */ static int __fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, u8 flags) { struct fwnode_link *link; list_for_each_entry(link, &sup->consumers, s_hook) if (link->consumer == con) { link->flags |= flags; return 0; } link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) return -ENOMEM; link->supplier = sup; INIT_LIST_HEAD(&link->s_hook); link->consumer = con; INIT_LIST_HEAD(&link->c_hook); link->flags = flags; list_add(&link->s_hook, &sup->consumers); list_add(&link->c_hook, &con->suppliers); pr_debug("%pfwf Linked as a fwnode consumer to %pfwf\n", con, sup); return 0; } int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup, u8 flags) { guard(mutex)(&fwnode_link_lock); return __fwnode_link_add(con, sup, flags); } /** * __fwnode_link_del - Delete a link between two fwnode_handles. * @link: the fwnode_link to be deleted * * The fwnode_link_lock needs to be held when this function is called. */ static void __fwnode_link_del(struct fwnode_link *link) { pr_debug("%pfwf Dropping the fwnode link to %pfwf\n", link->consumer, link->supplier); list_del(&link->s_hook); list_del(&link->c_hook); kfree(link); } /** * __fwnode_link_cycle - Mark a fwnode link as being part of a cycle. * @link: the fwnode_link to be marked * * The fwnode_link_lock needs to be held when this function is called. */ static void __fwnode_link_cycle(struct fwnode_link *link) { pr_debug("%pfwf: cycle: depends on %pfwf\n", link->consumer, link->supplier); link->flags |= FWLINK_FLAG_CYCLE; } /** * fwnode_links_purge_suppliers - Delete all supplier links of fwnode_handle. * @fwnode: fwnode whose supplier links need to be deleted * * Deletes all supplier links connecting directly to @fwnode. */ static void fwnode_links_purge_suppliers(struct fwnode_handle *fwnode) { struct fwnode_link *link, *tmp; guard(mutex)(&fwnode_link_lock); list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) __fwnode_link_del(link); } /** * fwnode_links_purge_consumers - Delete all consumer links of fwnode_handle. * @fwnode: fwnode whose consumer links need to be deleted * * Deletes all consumer links connecting directly to @fwnode. */ static void fwnode_links_purge_consumers(struct fwnode_handle *fwnode) { struct fwnode_link *link, *tmp; guard(mutex)(&fwnode_link_lock); list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook) __fwnode_link_del(link); } /** * fwnode_links_purge - Delete all links connected to a fwnode_handle. * @fwnode: fwnode whose links needs to be deleted * * Deletes all links connecting directly to a fwnode. */ void fwnode_links_purge(struct fwnode_handle *fwnode) { fwnode_links_purge_suppliers(fwnode); fwnode_links_purge_consumers(fwnode); } void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode) { struct fwnode_handle *child; /* Don't purge consumer links of an added child */ if (fwnode->dev) return; fwnode->flags |= FWNODE_FLAG_NOT_DEVICE; fwnode_links_purge_consumers(fwnode); fwnode_for_each_available_child_node(fwnode, child) fw_devlink_purge_absent_suppliers(child); } EXPORT_SYMBOL_GPL(fw_devlink_purge_absent_suppliers); /** * __fwnode_links_move_consumers - Move consumer from @from to @to fwnode_handle * @from: move consumers away from this fwnode * @to: move consumers to this fwnode * * Move all consumer links from @from fwnode to @to fwnode. */ static void __fwnode_links_move_consumers(struct fwnode_handle *from, struct fwnode_handle *to) { struct fwnode_link *link, *tmp; list_for_each_entry_safe(link, tmp, &from->consumers, s_hook) { __fwnode_link_add(link->consumer, to, link->flags); __fwnode_link_del(link); } } /** * __fw_devlink_pickup_dangling_consumers - Pick up dangling consumers * @fwnode: fwnode from which to pick up dangling consumers * @new_sup: fwnode of new supplier * * If the @fwnode has a corresponding struct device and the device supports * probing (that is, added to a bus), then we want to let fw_devlink create * MANAGED device links to this device, so leave @fwnode and its descendant's * fwnode links alone. * * Otherwise, move its consumers to the new supplier @new_sup. */ static void __fw_devlink_pickup_dangling_consumers(struct fwnode_handle *fwnode, struct fwnode_handle *new_sup) { struct fwnode_handle *child; if (fwnode->dev && fwnode->dev->bus) return; fwnode->flags |= FWNODE_FLAG_NOT_DEVICE; __fwnode_links_move_consumers(fwnode, new_sup); fwnode_for_each_available_child_node(fwnode, child) __fw_devlink_pickup_dangling_consumers(child, new_sup); } static DEFINE_MUTEX(device_links_lock); DEFINE_STATIC_SRCU(device_links_srcu); static inline void device_links_write_lock(void) { mutex_lock(&device_links_lock); } static inline void device_links_write_unlock(void) { mutex_unlock(&device_links_lock); } int device_links_read_lock(void) __acquires(&device_links_srcu) { return srcu_read_lock(&device_links_srcu); } void device_links_read_unlock(int idx) __releases(&device_links_srcu) { srcu_read_unlock(&device_links_srcu, idx); } int device_links_read_lock_held(void) { return srcu_read_lock_held(&device_links_srcu); } static void device_link_synchronize_removal(void) { synchronize_srcu(&device_links_srcu); } static void device_link_remove_from_lists(struct device_link *link) { list_del_rcu(&link->s_node); list_del_rcu(&link->c_node); } static bool device_is_ancestor(struct device *dev, struct device *target) { while (target->parent) { target = target->parent; if (dev == target) return true; } return false; } #define DL_MARKER_FLAGS (DL_FLAG_INFERRED | \ DL_FLAG_CYCLE | \ DL_FLAG_MANAGED) static inline bool device_link_flag_is_sync_state_only(u32 flags) { return (flags & ~DL_MARKER_FLAGS) == DL_FLAG_SYNC_STATE_ONLY; } /** * device_is_dependent - Check if one device depends on another one * @dev: Device to check dependencies for. * @target: Device to check against. * * Check if @target depends on @dev or any device dependent on it (its child or * its consumer etc). Return 1 if that is the case or 0 otherwise. */ static int device_is_dependent(struct device *dev, void *target) { struct device_link *link; int ret; /* * The "ancestors" check is needed to catch the case when the target * device has not been completely initialized yet and it is still * missing from the list of children of its parent device. */ if (dev == target || device_is_ancestor(dev, target)) return 1; ret = device_for_each_child(dev, target, device_is_dependent); if (ret) return ret; list_for_each_entry(link, &dev->links.consumers, s_node) { if (device_link_flag_is_sync_state_only(link->flags)) continue; if (link->consumer == target) return 1; ret = device_is_dependent(link->consumer, target); if (ret) break; } return ret; } static void device_link_init_status(struct device_link *link, struct device *consumer, struct device *supplier) { switch (supplier->links.status) { case DL_DEV_PROBING: switch (consumer->links.status) { case DL_DEV_PROBING: /* * A consumer driver can create a link to a supplier * that has not completed its probing yet as long as it * knows that the supplier is already functional (for * example, it has just acquired some resources from the * supplier). */ link->status = DL_STATE_CONSUMER_PROBE; break; default: link->status = DL_STATE_DORMANT; break; } break; case DL_DEV_DRIVER_BOUND: switch (consumer->links.status) { case DL_DEV_PROBING: link->status = DL_STATE_CONSUMER_PROBE; break; case DL_DEV_DRIVER_BOUND: link->status = DL_STATE_ACTIVE; break; default: link->status = DL_STATE_AVAILABLE; break; } break; case DL_DEV_UNBINDING: link->status = DL_STATE_SUPPLIER_UNBIND; break; default: link->status = DL_STATE_DORMANT; break; } } static int device_reorder_to_tail(struct device *dev, void *not_used) { struct device_link *link; /* * Devices that have not been registered yet will be put to the ends * of the lists during the registration, so skip them here. */ if (device_is_registered(dev)) devices_kset_move_last(dev); if (device_pm_initialized(dev)) device_pm_move_last(dev); device_for_each_child(dev, NULL, device_reorder_to_tail); list_for_each_entry(link, &dev->links.consumers, s_node) { if (device_link_flag_is_sync_state_only(link->flags)) continue; device_reorder_to_tail(link->consumer, NULL); } return 0; } /** * device_pm_move_to_tail - Move set of devices to the end of device lists * @dev: Device to move * * This is a device_reorder_to_tail() wrapper taking the requisite locks. * * It moves the @dev along with all of its children and all of its consumers * to the ends of the device_kset and dpm_list, recursively. */ void device_pm_move_to_tail(struct device *dev) { int idx; idx = device_links_read_lock(); device_pm_lock(); device_reorder_to_tail(dev, NULL); device_pm_unlock(); device_links_read_unlock(idx); } #define to_devlink(dev) container_of((dev), struct device_link, link_dev) static ssize_t status_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *output; switch (to_devlink(dev)->status) { case DL_STATE_NONE: output = "not tracked"; break; case DL_STATE_DORMANT: output = "dormant"; break; case DL_STATE_AVAILABLE: output = "available"; break; case DL_STATE_CONSUMER_PROBE: output = "consumer probing"; break; case DL_STATE_ACTIVE: output = "active"; break; case DL_STATE_SUPPLIER_UNBIND: output = "supplier unbinding"; break; default: output = "unknown"; break; } return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(status); static ssize_t auto_remove_on_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); const char *output; if (link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) output = "supplier unbind"; else if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) output = "consumer unbind"; else output = "never"; return sysfs_emit(buf, "%s\n", output); } static DEVICE_ATTR_RO(auto_remove_on); static ssize_t runtime_pm_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); return sysfs_emit(buf, "%d\n", !!(link->flags & DL_FLAG_PM_RUNTIME)); } static DEVICE_ATTR_RO(runtime_pm); static ssize_t sync_state_only_show(struct device *dev, struct device_attribute *attr, char *buf) { struct device_link *link = to_devlink(dev); return sysfs_emit(buf, "%d\n", !!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); } static DEVICE_ATTR_RO(sync_state_only); static struct attribute *devlink_attrs[] = { &dev_attr_status.attr, &dev_attr_auto_remove_on.attr, &dev_attr_runtime_pm.attr, &dev_attr_sync_state_only.attr, NULL, }; ATTRIBUTE_GROUPS(devlink); static void device_link_release_fn(struct work_struct *work) { struct device_link *link = container_of(work, struct device_link, rm_work); /* Ensure that all references to the link object have been dropped. */ device_link_synchronize_removal(); pm_runtime_release_supplier(link); /* * If supplier_preactivated is set, the link has been dropped between * the pm_runtime_get_suppliers() and pm_runtime_put_suppliers() calls * in __driver_probe_device(). In that case, drop the supplier's * PM-runtime usage counter to remove the reference taken by * pm_runtime_get_suppliers(). */ if (link->supplier_preactivated) pm_runtime_put_noidle(link->supplier); pm_request_idle(link->supplier); put_device(link->consumer); put_device(link->supplier); kfree(link); } static void devlink_dev_release(struct device *dev) { struct device_link *link = to_devlink(dev); INIT_WORK(&link->rm_work, device_link_release_fn); /* * It may take a while to complete this work because of the SRCU * synchronization in device_link_release_fn() and if the consumer or * supplier devices get deleted when it runs, so put it into the * dedicated workqueue. */ queue_work(device_link_wq, &link->rm_work); } /** * device_link_wait_removal - Wait for ongoing devlink removal jobs to terminate */ void device_link_wait_removal(void) { /* * devlink removal jobs are queued in the dedicated work queue. * To be sure that all removal jobs are terminated, ensure that any * scheduled work has run to completion. */ flush_workqueue(device_link_wq); } EXPORT_SYMBOL_GPL(device_link_wait_removal); static const struct class devlink_class = { .name = "devlink", .dev_groups = devlink_groups, .dev_release = devlink_dev_release, }; static int devlink_add_symlinks(struct device *dev) { char *buf_con __free(kfree) = NULL, *buf_sup __free(kfree) = NULL; int ret; struct device_link *link = to_devlink(dev); struct device *sup = link->supplier; struct device *con = link->consumer; ret = sysfs_create_link(&link->link_dev.kobj, &sup->kobj, "supplier"); if (ret) goto out; ret = sysfs_create_link(&link->link_dev.kobj, &con->kobj, "consumer"); if (ret) goto err_con; buf_con = kasprintf(GFP_KERNEL, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); if (!buf_con) { ret = -ENOMEM; goto err_con_dev; } ret = sysfs_create_link(&sup->kobj, &link->link_dev.kobj, buf_con); if (ret) goto err_con_dev; buf_sup = kasprintf(GFP_KERNEL, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); if (!buf_sup) { ret = -ENOMEM; goto err_sup_dev; } ret = sysfs_create_link(&con->kobj, &link->link_dev.kobj, buf_sup); if (ret) goto err_sup_dev; goto out; err_sup_dev: sysfs_remove_link(&sup->kobj, buf_con); err_con_dev: sysfs_remove_link(&link->link_dev.kobj, "consumer"); err_con: sysfs_remove_link(&link->link_dev.kobj, "supplier"); out: return ret; } static void devlink_remove_symlinks(struct device *dev) { char *buf_con __free(kfree) = NULL, *buf_sup __free(kfree) = NULL; struct device_link *link = to_devlink(dev); struct device *sup = link->supplier; struct device *con = link->consumer; sysfs_remove_link(&link->link_dev.kobj, "consumer"); sysfs_remove_link(&link->link_dev.kobj, "supplier"); if (device_is_registered(con)) { buf_sup = kasprintf(GFP_KERNEL, "supplier:%s:%s", dev_bus_name(sup), dev_name(sup)); if (!buf_sup) goto out; sysfs_remove_link(&con->kobj, buf_sup); } buf_con = kasprintf(GFP_KERNEL, "consumer:%s:%s", dev_bus_name(con), dev_name(con)); if (!buf_con) goto out; sysfs_remove_link(&sup->kobj, buf_con); return; out: WARN(1, "Unable to properly free device link symlinks!\n"); } static struct class_interface devlink_class_intf = { .class = &devlink_class, .add_dev = devlink_add_symlinks, .remove_dev = devlink_remove_symlinks, }; static int __init devlink_class_init(void) { int ret; ret = class_register(&devlink_class); if (ret) return ret; ret = class_interface_register(&devlink_class_intf); if (ret) class_unregister(&devlink_class); return ret; } postcore_initcall(devlink_class_init); #define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \ DL_FLAG_AUTOREMOVE_SUPPLIER | \ DL_FLAG_AUTOPROBE_CONSUMER | \ DL_FLAG_SYNC_STATE_ONLY | \ DL_FLAG_INFERRED | \ DL_FLAG_CYCLE) #define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \ DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE) /** * device_link_add - Create a link between two devices. * @consumer: Consumer end of the link. * @supplier: Supplier end of the link. * @flags: Link flags. * * Return: On success, a device_link struct will be returned. * On error or invalid flag settings, NULL will be returned. * * The caller is responsible for the proper synchronization of the link creation * with runtime PM. First, setting the DL_FLAG_PM_RUNTIME flag will cause the * runtime PM framework to take the link into account. Second, if the * DL_FLAG_RPM_ACTIVE flag is set in addition to it, the supplier devices will * be forced into the active meta state and reference-counted upon the creation * of the link. If DL_FLAG_PM_RUNTIME is not set, DL_FLAG_RPM_ACTIVE will be * ignored. * * If DL_FLAG_STATELESS is set in @flags, the caller of this function is * expected to release the link returned by it directly with the help of either * device_link_del() or device_link_remove(). * * If that flag is not set, however, the caller of this function is handing the * management of the link over to the driver core entirely and its return value * can only be used to check whether or not the link is present. In that case, * the DL_FLAG_AUTOREMOVE_CONSUMER and DL_FLAG_AUTOREMOVE_SUPPLIER device link * flags can be used to indicate to the driver core when the link can be safely * deleted. Namely, setting one of them in @flags indicates to the driver core * that the link is not going to be used (by the given caller of this function) * after unbinding the consumer or supplier driver, respectively, from its * device, so the link can be deleted at that point. If none of them is set, * the link will be maintained until one of the devices pointed to by it (either * the consumer or the supplier) is unregistered. * * Also, if DL_FLAG_STATELESS, DL_FLAG_AUTOREMOVE_CONSUMER and * DL_FLAG_AUTOREMOVE_SUPPLIER are not set in @flags (that is, a persistent * managed device link is being added), the DL_FLAG_AUTOPROBE_CONSUMER flag can * be used to request the driver core to automatically probe for a consumer * driver after successfully binding a driver to the supplier device. * * The combination of DL_FLAG_STATELESS and one of DL_FLAG_AUTOREMOVE_CONSUMER, * DL_FLAG_AUTOREMOVE_SUPPLIER, or DL_FLAG_AUTOPROBE_CONSUMER set in @flags at * the same time is invalid and will cause NULL to be returned upfront. * However, if a device link between the given @consumer and @supplier pair * exists already when this function is called for them, the existing link will * be returned regardless of its current type and status (the link's flags may * be modified then). The caller of this function is then expected to treat * the link as though it has just been created, so (in particular) if * DL_FLAG_STATELESS was passed in @flags, the link needs to be released * explicitly when not needed any more (as stated above). * * A side effect of the link creation is re-ordering of dpm_list and the * devices_kset list by moving the consumer device and all devices depending * on it to the ends of these lists (that does not happen to devices that have * not been registered when this function is called). * * The supplier device is required to be registered when this function is called * and NULL will be returned if that is not the case. The consumer device need * not be registered, however. */ struct device_link *device_link_add(struct device *consumer, struct device *supplier, u32 flags) { struct device_link *link; if (!consumer || !supplier || consumer == supplier || flags & ~DL_ADD_VALID_FLAGS || (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) || (flags & DL_FLAG_AUTOPROBE_CONSUMER && flags & (DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER))) return NULL; if (flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) { if (pm_runtime_get_sync(supplier) < 0) { pm_runtime_put_noidle(supplier); return NULL; } } if (!(flags & DL_FLAG_STATELESS)) flags |= DL_FLAG_MANAGED; if (flags & DL_FLAG_SYNC_STATE_ONLY && !device_link_flag_is_sync_state_only(flags)) return NULL; device_links_write_lock(); device_pm_lock(); /* * If the supplier has not been fully registered yet or there is a * reverse (non-SYNC_STATE_ONLY) dependency between the consumer and * the supplier already in the graph, return NULL. If the link is a * SYNC_STATE_ONLY link, we don't check for reverse dependencies * because it only affects sync_state() callbacks. */ if (!device_pm_initialized(supplier) || (!(flags & DL_FLAG_SYNC_STATE_ONLY) && device_is_dependent(consumer, supplier))) { link = NULL; goto out; } /* * SYNC_STATE_ONLY links are useless once a consumer device has probed. * So, only create it if the consumer hasn't probed yet. */ if (flags & DL_FLAG_SYNC_STATE_ONLY && consumer->links.status != DL_DEV_NO_DRIVER && consumer->links.status != DL_DEV_PROBING) { link = NULL; goto out; } /* * DL_FLAG_AUTOREMOVE_SUPPLIER indicates that the link will be needed * longer than for DL_FLAG_AUTOREMOVE_CONSUMER and setting them both * together doesn't make sense, so prefer DL_FLAG_AUTOREMOVE_SUPPLIER. */ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer != consumer) continue; if (link->flags & DL_FLAG_INFERRED && !(flags & DL_FLAG_INFERRED)) link->flags &= ~DL_FLAG_INFERRED; if (flags & DL_FLAG_PM_RUNTIME) { if (!(link->flags & DL_FLAG_PM_RUNTIME)) { pm_runtime_new_link(consumer); link->flags |= DL_FLAG_PM_RUNTIME; } if (flags & DL_FLAG_RPM_ACTIVE) refcount_inc(&link->rpm_active); } if (flags & DL_FLAG_STATELESS) { kref_get(&link->kref); if (link->flags & DL_FLAG_SYNC_STATE_ONLY && !(link->flags & DL_FLAG_STATELESS)) { link->flags |= DL_FLAG_STATELESS; goto reorder; } else { link->flags |= DL_FLAG_STATELESS; goto out; } } /* * If the life time of the link following from the new flags is * longer than indicated by the flags of the existing link, * update the existing link to stay around longer. */ if (flags & DL_FLAG_AUTOREMOVE_SUPPLIER) { if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) { link->flags &= ~DL_FLAG_AUTOREMOVE_CONSUMER; link->flags |= DL_FLAG_AUTOREMOVE_SUPPLIER; } } else if (!(flags & DL_FLAG_AUTOREMOVE_CONSUMER)) { link->flags &= ~(DL_FLAG_AUTOREMOVE_CONSUMER | DL_FLAG_AUTOREMOVE_SUPPLIER); } if (!(link->flags & DL_FLAG_MANAGED)) { kref_get(&link->kref); link->flags |= DL_FLAG_MANAGED; device_link_init_status(link, consumer, supplier); } if (link->flags & DL_FLAG_SYNC_STATE_ONLY && !(flags & DL_FLAG_SYNC_STATE_ONLY)) { link->flags &= ~DL_FLAG_SYNC_STATE_ONLY; goto reorder; } goto out; } link = kzalloc(sizeof(*link), GFP_KERNEL); if (!link) goto out; refcount_set(&link->rpm_active, 1); get_device(supplier); link->supplier = supplier; INIT_LIST_HEAD(&link->s_node); get_device(consumer); link->consumer = consumer; INIT_LIST_HEAD(&link->c_node); link->flags = flags; kref_init(&link->kref); link->link_dev.class = &devlink_class; device_set_pm_not_required(&link->link_dev); dev_set_name(&link->link_dev, "%s:%s--%s:%s", dev_bus_name(supplier), dev_name(supplier), dev_bus_name(consumer), dev_name(consumer)); if (device_register(&link->link_dev)) { put_device(&link->link_dev); link = NULL; goto out; } if (flags & DL_FLAG_PM_RUNTIME) { if (flags & DL_FLAG_RPM_ACTIVE) refcount_inc(&link->rpm_active); pm_runtime_new_link(consumer); } /* Determine the initial link state. */ if (flags & DL_FLAG_STATELESS) link->status = DL_STATE_NONE; else device_link_init_status(link, consumer, supplier); /* * Some callers expect the link creation during consumer driver probe to * resume the supplier even without DL_FLAG_RPM_ACTIVE. */ if (link->status == DL_STATE_CONSUMER_PROBE && flags & DL_FLAG_PM_RUNTIME) pm_runtime_resume(supplier); list_add_tail_rcu(&link->s_node, &supplier->links.consumers); list_add_tail_rcu(&link->c_node, &consumer->links.suppliers); if (flags & DL_FLAG_SYNC_STATE_ONLY) { dev_dbg(consumer, "Linked as a sync state only consumer to %s\n", dev_name(supplier)); goto out; } reorder: /* * Move the consumer and all of the devices depending on it to the end * of dpm_list and the devices_kset list. * * It is necessary to hold dpm_list locked throughout all that or else * we may end up suspending with a wrong ordering of it. */ device_reorder_to_tail(consumer, NULL); dev_dbg(consumer, "Linked as a consumer to %s\n", dev_name(supplier)); out: device_pm_unlock(); device_links_write_unlock(); if ((flags & DL_FLAG_PM_RUNTIME && flags & DL_FLAG_RPM_ACTIVE) && !link) pm_runtime_put(supplier); return link; } EXPORT_SYMBOL_GPL(device_link_add); static void __device_link_del(struct kref *kref) { struct device_link *link = container_of(kref, struct device_link, kref); dev_dbg(link->consumer, "Dropping the link to %s\n", dev_name(link->supplier)); pm_runtime_drop_link(link); device_link_remove_from_lists(link); device_unregister(&link->link_dev); } static void device_link_put_kref(struct device_link *link) { if (link->flags & DL_FLAG_STATELESS) kref_put(&link->kref, __device_link_del); else if (!device_is_registered(link->consumer)) __device_link_del(&link->kref); else WARN(1, "Unable to drop a managed device link reference\n"); } /** * device_link_del - Delete a stateless link between two devices. * @link: Device link to delete. * * The caller must ensure proper synchronization of this function with runtime * PM. If the link was added multiple times, it needs to be deleted as often. * Care is required for hotplugged devices: Their links are purged on removal * and calling device_link_del() is then no longer allowed. */ void device_link_del(struct device_link *link) { device_links_write_lock(); device_link_put_kref(link); device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_del); /** * device_link_remove - Delete a stateless link between two devices. * @consumer: Consumer end of the link. * @supplier: Supplier end of the link. * * The caller must ensure proper synchronization of this function with runtime * PM. */ void device_link_remove(void *consumer, struct device *supplier) { struct device_link *link; if (WARN_ON(consumer == supplier)) return; device_links_write_lock(); list_for_each_entry(link, &supplier->links.consumers, s_node) { if (link->consumer == consumer) { device_link_put_kref(link); break; } } device_links_write_unlock(); } EXPORT_SYMBOL_GPL(device_link_remove); static void device_links_missing_supplier(struct device *dev) { struct device_link *link; list_for_each_entry(link, &dev->links.suppliers, c_node) { if (link->status != DL_STATE_CONSUMER_PROBE) continue; if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) { WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } else { WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); WRITE_ONCE(link->status, DL_STATE_DORMANT); } } } static bool dev_is_best_effort(struct device *dev) { return (fw_devlink_best_effort && dev->can_match) || (dev->fwnode && (dev->fwnode->flags & FWNODE_FLAG_BEST_EFFORT)); } static struct fwnode_handle *fwnode_links_check_suppliers( struct fwnode_handle *fwnode) { struct fwnode_link *link; if (!fwnode || fw_devlink_is_permissive()) return NULL; list_for_each_entry(link, &fwnode->suppliers, c_hook) if (!(link->flags & (FWLINK_FLAG_CYCLE | FWLINK_FLAG_IGNORE))) return link->supplier; return NULL; } /** * device_links_check_suppliers - Check presence of supplier drivers. * @dev: Consumer device. * * Check links from this device to any suppliers. Walk the list of the device's * links to suppliers and see if all of them are available. If not, simply * return -EPROBE_DEFER. * * We need to guarantee that the supplier will not go away after the check has * been positive here. It only can go away in __device_release_driver() and * that function checks the device's links to consumers. This means we need to * mark the link as "consumer probe in progress" to make the supplier removal * wait for us to complete (or bad things may happen). * * Links without the DL_FLAG_MANAGED flag set are ignored. */ int device_links_check_suppliers(struct device *dev) { struct device_link *link; int ret = 0, fwnode_ret = 0; struct fwnode_handle *sup_fw; /* * Device waiting for supplier to become available is not allowed to * probe. */ scoped_guard(mutex, &fwnode_link_lock) { sup_fw = fwnode_links_check_suppliers(dev->fwnode); if (sup_fw) { if (dev_is_best_effort(dev)) fwnode_ret = -EAGAIN; else return dev_err_probe(dev, -EPROBE_DEFER, "wait for supplier %pfwf\n", sup_fw); } } device_links_write_lock(); list_for_each_entry(link, &dev->links.suppliers, c_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->status != DL_STATE_AVAILABLE && !(link->flags & DL_FLAG_SYNC_STATE_ONLY)) { if (dev_is_best_effort(dev) && link->flags & DL_FLAG_INFERRED && !link->supplier->can_match) { ret = -EAGAIN; continue; } device_links_missing_supplier(dev); ret = dev_err_probe(dev, -EPROBE_DEFER, "supplier %s not ready\n", dev_name(link->supplier)); break; } WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE); } dev->links.status = DL_DEV_PROBING; device_links_write_unlock(); return ret ? ret : fwnode_ret; } /** * __device_links_queue_sync_state - Queue a device for sync_state() callback * @dev: Device to call sync_state() on * @list: List head to queue the @dev on * * Queues a device for a sync_state() callback when the device links write lock * isn't held. This allows the sync_state() execution flow to use device links * APIs. The caller must ensure this function is called with * device_links_write_lock() held. * * This function does a get_device() to make sure the device is not freed while * on this list. * * So the caller must also ensure that device_links_flush_sync_list() is called * as soon as the caller releases device_links_write_lock(). This is necessary * to make sure the sync_state() is called in a timely fashion and the * put_device() is called on this device. */ static void __device_links_queue_sync_state(struct device *dev, struct list_head *list) { struct device_link *link; if (!dev_has_sync_state(dev)) return; if (dev->state_synced) return; list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->status != DL_STATE_ACTIVE) return; } /* * Set the flag here to avoid adding the same device to a list more * than once. This can happen if new consumers get added to the device * and probed before the list is flushed. */ dev->state_synced = true; if (WARN_ON(!list_empty(&dev->links.defer_sync))) return; get_device(dev); list_add_tail(&dev->links.defer_sync, list); } /** * device_links_flush_sync_list - Call sync_state() on a list of devices * @list: List of devices to call sync_state() on * @dont_lock_dev: Device for which lock is already held by the caller * * Calls sync_state() on all the devices that have been queued for it. This * function is used in conjunction with __device_links_queue_sync_state(). The * @dont_lock_dev parameter is useful when this function is called from a * context where a device lock is already held. */ static void device_links_flush_sync_list(struct list_head *list, struct device *dont_lock_dev) { struct device *dev, *tmp; list_for_each_entry_safe(dev, tmp, list, links.defer_sync) { list_del_init(&dev->links.defer_sync); if (dev != dont_lock_dev) device_lock(dev); dev_sync_state(dev); if (dev != dont_lock_dev) device_unlock(dev); put_device(dev); } } void device_links_supplier_sync_state_pause(void) { device_links_write_lock(); defer_sync_state_count++; device_links_write_unlock(); } void device_links_supplier_sync_state_resume(void) { struct device *dev, *tmp; LIST_HEAD(sync_list); device_links_write_lock(); if (!defer_sync_state_count) { WARN(true, "Unmatched sync_state pause/resume!"); goto out; } defer_sync_state_count--; if (defer_sync_state_count) goto out; list_for_each_entry_safe(dev, tmp, &deferred_sync, links.defer_sync) { /* * Delete from deferred_sync list before queuing it to * sync_list because defer_sync is used for both lists. */ list_del_init(&dev->links.defer_sync); __device_links_queue_sync_state(dev, &sync_list); } out: device_links_write_unlock(); device_links_flush_sync_list(&sync_list, NULL); } static int sync_state_resume_initcall(void) { device_links_supplier_sync_state_resume(); return 0; } late_initcall(sync_state_resume_initcall); static void __device_links_supplier_defer_sync(struct device *sup) { if (list_empty(&sup->links.defer_sync) && dev_has_sync_state(sup)) list_add_tail(&sup->links.defer_sync, &deferred_sync); } static void device_link_drop_managed(struct device_link *link) { link->flags &= ~DL_FLAG_MANAGED; WRITE_ONCE(link->status, DL_STATE_NONE); kref_put(&link->kref, __device_link_del); } static ssize_t waiting_for_supplier_show(struct device *dev, struct device_attribute *attr, char *buf) { bool val; device_lock(dev); scoped_guard(mutex, &fwnode_link_lock) val = !!fwnode_links_check_suppliers(dev->fwnode); device_unlock(dev); return sysfs_emit(buf, "%u\n", val); } static DEVICE_ATTR_RO(waiting_for_supplier); /** * device_links_force_bind - Prepares device to be force bound * @dev: Consumer device. * * device_bind_driver() force binds a device to a driver without calling any * driver probe functions. So the consumer really isn't going to wait for any * supplier before it's bound to the driver. We still want the device link * states to be sensible when this happens. * * In preparation for device_bind_driver(), this function goes through each * supplier device links and checks if the supplier is bound. If it is, then * the device link status is set to CONSUMER_PROBE. Otherwise, the device link * is dropped. Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_force_bind(struct device *dev) { struct device_link *link, *ln; device_links_write_lock(); list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->status != DL_STATE_AVAILABLE) { device_link_drop_managed(link); continue; } WRITE_ONCE(link->status, DL_STATE_CONSUMER_PROBE); } dev->links.status = DL_DEV_PROBING; device_links_write_unlock(); } /** * device_links_driver_bound - Update device links after probing its driver. * @dev: Device to update the links for. * * The probe has been successful, so update links from this device to any * consumers by changing their status to "available". * * Also change the status of @dev's links to suppliers to "active". * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_driver_bound(struct device *dev) { struct device_link *link, *ln; LIST_HEAD(sync_list); /* * If a device binds successfully, it's expected to have created all * the device links it needs to or make new device links as it needs * them. So, fw_devlink no longer needs to create device links to any * of the device's suppliers. * * Also, if a child firmware node of this bound device is not added as a * device by now, assume it is never going to be added. Make this bound * device the fallback supplier to the dangling consumers of the child * firmware node because this bound device is probably implementing the * child firmware node functionality and we don't want the dangling * consumers to defer probe indefinitely waiting for a device for the * child firmware node. */ if (dev->fwnode && dev->fwnode->dev == dev) { struct fwnode_handle *child; fwnode_links_purge_suppliers(dev->fwnode); guard(mutex)(&fwnode_link_lock); fwnode_for_each_available_child_node(dev->fwnode, child) __fw_devlink_pickup_dangling_consumers(child, dev->fwnode); __fw_devlink_link_to_consumers(dev); } device_remove_file(dev, &dev_attr_waiting_for_supplier); device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; /* * Links created during consumer probe may be in the "consumer * probe" state to start with if the supplier is still probing * when they are created and they may become "active" if the * consumer probe returns first. Skip them here. */ if (link->status == DL_STATE_CONSUMER_PROBE || link->status == DL_STATE_ACTIVE) continue; WARN_ON(link->status != DL_STATE_DORMANT); WRITE_ONCE(link->status, DL_STATE_AVAILABLE); if (link->flags & DL_FLAG_AUTOPROBE_CONSUMER) driver_deferred_probe_add(link->consumer); } if (defer_sync_state_count) __device_links_supplier_defer_sync(dev); else __device_links_queue_sync_state(dev, &sync_list); list_for_each_entry_safe(link, ln, &dev->links.suppliers, c_node) { struct device *supplier; if (!(link->flags & DL_FLAG_MANAGED)) continue; supplier = link->supplier; if (link->flags & DL_FLAG_SYNC_STATE_ONLY) { /* * When DL_FLAG_SYNC_STATE_ONLY is set, it means no * other DL_MANAGED_LINK_FLAGS have been set. So, it's * save to drop the managed link completely. */ device_link_drop_managed(link); } else if (dev_is_best_effort(dev) && link->flags & DL_FLAG_INFERRED && link->status != DL_STATE_CONSUMER_PROBE && !link->supplier->can_match) { /* * When dev_is_best_effort() is true, we ignore device * links to suppliers that don't have a driver. If the * consumer device still managed to probe, there's no * point in maintaining a device link in a weird state * (consumer probed before supplier). So delete it. */ device_link_drop_managed(link); } else { WARN_ON(link->status != DL_STATE_CONSUMER_PROBE); WRITE_ONCE(link->status, DL_STATE_ACTIVE); } /* * This needs to be done even for the deleted * DL_FLAG_SYNC_STATE_ONLY device link in case it was the last * device link that was preventing the supplier from getting a * sync_state() call. */ if (defer_sync_state_count) __device_links_supplier_defer_sync(supplier); else __device_links_queue_sync_state(supplier, &sync_list); } dev->links.status = DL_DEV_DRIVER_BOUND; device_links_write_unlock(); device_links_flush_sync_list(&sync_list, dev); } /** * __device_links_no_driver - Update links of a device without a driver. * @dev: Device without a drvier. * * Delete all non-persistent links from this device to any suppliers. * * Persistent links stay around, but their status is changed to "available", * unless they already are in the "supplier unbind in progress" state in which * case they need not be updated. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ static void __device_links_no_driver(struct device *dev) { struct device_link *link, *ln; list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->flags & DL_FLAG_AUTOREMOVE_CONSUMER) { device_link_drop_managed(link); continue; } if (link->status != DL_STATE_CONSUMER_PROBE && link->status != DL_STATE_ACTIVE) continue; if (link->supplier->links.status == DL_DEV_DRIVER_BOUND) { WRITE_ONCE(link->status, DL_STATE_AVAILABLE); } else { WARN_ON(!(link->flags & DL_FLAG_SYNC_STATE_ONLY)); WRITE_ONCE(link->status, DL_STATE_DORMANT); } } dev->links.status = DL_DEV_NO_DRIVER; } /** * device_links_no_driver - Update links after failing driver probe. * @dev: Device whose driver has just failed to probe. * * Clean up leftover links to consumers for @dev and invoke * %__device_links_no_driver() to update links to suppliers for it as * appropriate. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_no_driver(struct device *dev) { struct device_link *link; device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; /* * The probe has failed, so if the status of the link is * "consumer probe" or "active", it must have been added by * a probing consumer while this device was still probing. * Change its state to "dormant", as it represents a valid * relationship, but it is not functionally meaningful. */ if (link->status == DL_STATE_CONSUMER_PROBE || link->status == DL_STATE_ACTIVE) WRITE_ONCE(link->status, DL_STATE_DORMANT); } __device_links_no_driver(dev); device_links_write_unlock(); } /** * device_links_driver_cleanup - Update links after driver removal. * @dev: Device whose driver has just gone away. * * Update links to consumers for @dev by changing their status to "dormant" and * invoke %__device_links_no_driver() to update links to suppliers for it as * appropriate. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_driver_cleanup(struct device *dev) { struct device_link *link, *ln; device_links_write_lock(); list_for_each_entry_safe(link, ln, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; WARN_ON(link->flags & DL_FLAG_AUTOREMOVE_CONSUMER); WARN_ON(link->status != DL_STATE_SUPPLIER_UNBIND); /* * autoremove the links between this @dev and its consumer * devices that are not active, i.e. where the link state * has moved to DL_STATE_SUPPLIER_UNBIND. */ if (link->status == DL_STATE_SUPPLIER_UNBIND && link->flags & DL_FLAG_AUTOREMOVE_SUPPLIER) device_link_drop_managed(link); WRITE_ONCE(link->status, DL_STATE_DORMANT); } list_del_init(&dev->links.defer_sync); __device_links_no_driver(dev); device_links_write_unlock(); } /** * device_links_busy - Check if there are any busy links to consumers. * @dev: Device to check. * * Check each consumer of the device and return 'true' if its link's status * is one of "consumer probe" or "active" (meaning that the given consumer is * probing right now or its driver is present). Otherwise, change the link * state to "supplier unbind" to prevent the consumer from being probed * successfully going forward. * * Return 'false' if there are no probing or active consumers. * * Links without the DL_FLAG_MANAGED flag set are ignored. */ bool device_links_busy(struct device *dev) { struct device_link *link; bool ret = false; device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { if (!(link->flags & DL_FLAG_MANAGED)) continue; if (link->status == DL_STATE_CONSUMER_PROBE || link->status == DL_STATE_ACTIVE) { ret = true; break; } WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND); } dev->links.status = DL_DEV_UNBINDING; device_links_write_unlock(); return ret; } /** * device_links_unbind_consumers - Force unbind consumers of the given device. * @dev: Device to unbind the consumers of. * * Walk the list of links to consumers for @dev and if any of them is in the * "consumer probe" state, wait for all device probes in progress to complete * and start over. * * If that's not the case, change the status of the link to "supplier unbind" * and check if the link was in the "active" state. If so, force the consumer * driver to unbind and start over (the consumer will not re-probe as we have * changed the state of the link already). * * Links without the DL_FLAG_MANAGED flag set are ignored. */ void device_links_unbind_consumers(struct device *dev) { struct device_link *link; start: device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) { enum device_link_state status; if (!(link->flags & DL_FLAG_MANAGED) || link->flags & DL_FLAG_SYNC_STATE_ONLY) continue; status = link->status; if (status == DL_STATE_CONSUMER_PROBE) { device_links_write_unlock(); wait_for_device_probe(); goto start; } WRITE_ONCE(link->status, DL_STATE_SUPPLIER_UNBIND); if (status == DL_STATE_ACTIVE) { struct device *consumer = link->consumer; get_device(consumer); device_links_write_unlock(); device_release_driver_internal(consumer, NULL, consumer->parent); put_device(consumer); goto start; } } device_links_write_unlock(); } /** * device_links_purge - Delete existing links to other devices. * @dev: Target device. */ static void device_links_purge(struct device *dev) { struct device_link *link, *ln; if (dev->class == &devlink_class) return; /* * Delete all of the remaining links from this device to any other * devices (either consumers or suppliers). */ device_links_write_lock(); list_for_each_entry_safe_reverse(link, ln, &dev->links.suppliers, c_node) { WARN_ON(link->status == DL_STATE_ACTIVE); __device_link_del(&link->kref); } list_for_each_entry_safe_reverse(link, ln, &dev->links.consumers, s_node) { WARN_ON(link->status != DL_STATE_DORMANT && link->status != DL_STATE_NONE); __device_link_del(&link->kref); } device_links_write_unlock(); } #define FW_DEVLINK_FLAGS_PERMISSIVE (DL_FLAG_INFERRED | \ DL_FLAG_SYNC_STATE_ONLY) #define FW_DEVLINK_FLAGS_ON (DL_FLAG_INFERRED | \ DL_FLAG_AUTOPROBE_CONSUMER) #define FW_DEVLINK_FLAGS_RPM (FW_DEVLINK_FLAGS_ON | \ DL_FLAG_PM_RUNTIME) static u32 fw_devlink_flags = FW_DEVLINK_FLAGS_RPM; static int __init fw_devlink_setup(char *arg) { if (!arg) return -EINVAL; if (strcmp(arg, "off") == 0) { fw_devlink_flags = 0; } else if (strcmp(arg, "permissive") == 0) { fw_devlink_flags = FW_DEVLINK_FLAGS_PERMISSIVE; } else if (strcmp(arg, "on") == 0) { fw_devlink_flags = FW_DEVLINK_FLAGS_ON; } else if (strcmp(arg, "rpm") == 0) { fw_devlink_flags = FW_DEVLINK_FLAGS_RPM; } return 0; } early_param("fw_devlink", fw_devlink_setup); static bool fw_devlink_strict; static int __init fw_devlink_strict_setup(char *arg) { return kstrtobool(arg, &fw_devlink_strict); } early_param("fw_devlink.strict", fw_devlink_strict_setup); #define FW_DEVLINK_SYNC_STATE_STRICT 0 #define FW_DEVLINK_SYNC_STATE_TIMEOUT 1 #ifndef CONFIG_FW_DEVLINK_SYNC_STATE_TIMEOUT static int fw_devlink_sync_state; #else static int fw_devlink_sync_state = FW_DEVLINK_SYNC_STATE_TIMEOUT; #endif static int __init fw_devlink_sync_state_setup(char *arg) { if (!arg) return -EINVAL; if (strcmp(arg, "strict") == 0) { fw_devlink_sync_state = FW_DEVLINK_SYNC_STATE_STRICT; return 0; } else if (strcmp(arg, "timeout") == 0) { fw_devlink_sync_state = FW_DEVLINK_SYNC_STATE_TIMEOUT; return 0; } return -EINVAL; } early_param("fw_devlink.sync_state", fw_devlink_sync_state_setup); static inline u32 fw_devlink_get_flags(u8 fwlink_flags) { if (fwlink_flags & FWLINK_FLAG_CYCLE) return FW_DEVLINK_FLAGS_PERMISSIVE | DL_FLAG_CYCLE; return fw_devlink_flags; } static bool fw_devlink_is_permissive(void) { return fw_devlink_flags == FW_DEVLINK_FLAGS_PERMISSIVE; } bool fw_devlink_is_strict(void) { return fw_devlink_strict && !fw_devlink_is_permissive(); } static void fw_devlink_parse_fwnode(struct fwnode_handle *fwnode) { if (fwnode->flags & FWNODE_FLAG_LINKS_ADDED) return; fwnode_call_int_op(fwnode, add_links); fwnode->flags |= FWNODE_FLAG_LINKS_ADDED; } static void fw_devlink_parse_fwtree(struct fwnode_handle *fwnode) { struct fwnode_handle *child = NULL; fw_devlink_parse_fwnode(fwnode); while ((child = fwnode_get_next_available_child_node(fwnode, child))) fw_devlink_parse_fwtree(child); } static void fw_devlink_relax_link(struct device_link *link) { if (!(link->flags & DL_FLAG_INFERRED)) return; if (device_link_flag_is_sync_state_only(link->flags)) return; pm_runtime_drop_link(link); link->flags = DL_FLAG_MANAGED | FW_DEVLINK_FLAGS_PERMISSIVE; dev_dbg(link->consumer, "Relaxing link with %s\n", dev_name(link->supplier)); } static int fw_devlink_no_driver(struct device *dev, void *data) { struct device_link *link = to_devlink(dev); if (!link->supplier->can_match) fw_devlink_relax_link(link); return 0; } void fw_devlink_drivers_done(void) { fw_devlink_drv_reg_done = true; device_links_write_lock(); class_for_each_device(&devlink_class, NULL, NULL, fw_devlink_no_driver); device_links_write_unlock(); } static int fw_devlink_dev_sync_state(struct device *dev, void *data) { struct device_link *link = to_devlink(dev); struct device *sup = link->supplier; if (!(link->flags & DL_FLAG_MANAGED) || link->status == DL_STATE_ACTIVE || sup->state_synced || !dev_has_sync_state(sup)) return 0; if (fw_devlink_sync_state == FW_DEVLINK_SYNC_STATE_STRICT) { dev_warn(sup, "sync_state() pending due to %s\n", dev_name(link->consumer)); return 0; } if (!list_empty(&sup->links.defer_sync)) return 0; dev_warn(sup, "Timed out. Forcing sync_state()\n"); sup->state_synced = true; get_device(sup); list_add_tail(&sup->links.defer_sync, data); return 0; } void fw_devlink_probing_done(void) { LIST_HEAD(sync_list); device_links_write_lock(); class_for_each_device(&devlink_class, NULL, &sync_list, fw_devlink_dev_sync_state); device_links_write_unlock(); device_links_flush_sync_list(&sync_list, NULL); } /** * wait_for_init_devices_probe - Try to probe any device needed for init * * Some devices might need to be probed and bound successfully before the kernel * boot sequence can finish and move on to init/userspace. For example, a * network interface might need to be bound to be able to mount a NFS rootfs. * * With fw_devlink=on by default, some of these devices might be blocked from * probing because they are waiting on a optional supplier that doesn't have a * driver. While fw_devlink will eventually identify such devices and unblock * the probing automatically, it might be too late by the time it unblocks the * probing of devices. For example, the IP4 autoconfig might timeout before * fw_devlink unblocks probing of the network interface. * * This function is available to temporarily try and probe all devices that have * a driver even if some of their suppliers haven't been added or don't have * drivers. * * The drivers can then decide which of the suppliers are optional vs mandatory * and probe the device if possible. By the time this function returns, all such * "best effort" probes are guaranteed to be completed. If a device successfully * probes in this mode, we delete all fw_devlink discovered dependencies of that * device where the supplier hasn't yet probed successfully because they have to * be optional dependencies. * * Any devices that didn't successfully probe go back to being treated as if * this function was never called. * * This also means that some devices that aren't needed for init and could have * waited for their optional supplier to probe (when the supplier's module is * loaded later on) would end up probing prematurely with limited functionality. * So call this function only when boot would fail without it. */ void __init wait_for_init_devices_probe(void) { if (!fw_devlink_flags || fw_devlink_is_permissive()) return; /* * Wait for all ongoing probes to finish so that the "best effort" is * only applied to devices that can't probe otherwise. */ wait_for_device_probe(); pr_info("Trying to probe devices needed for running init ...\n"); fw_devlink_best_effort = true; driver_deferred_probe_trigger(); /* * Wait for all "best effort" probes to finish before going back to * normal enforcement. */ wait_for_device_probe(); fw_devlink_best_effort = false; } static void fw_devlink_unblock_consumers(struct device *dev) { struct device_link *link; if (!fw_devlink_flags || fw_devlink_is_permissive()) return; device_links_write_lock(); list_for_each_entry(link, &dev->links.consumers, s_node) fw_devlink_relax_link(link); device_links_write_unlock(); } #define get_dev_from_fwnode(fwnode) get_device((fwnode)->dev) static bool fwnode_init_without_drv(struct fwnode_handle *fwnode) { struct device *dev; bool ret; if (!(fwnode->flags & FWNODE_FLAG_INITIALIZED)) return false; dev = get_dev_from_fwnode(fwnode); ret = !dev || dev->links.status == DL_DEV_NO_DRIVER; put_device(dev); return ret; } static bool fwnode_ancestor_init_without_drv(struct fwnode_handle *fwnode) { struct fwnode_handle *parent; fwnode_for_each_parent_node(fwnode, parent) { if (fwnode_init_without_drv(parent)) { fwnode_handle_put(parent); return true; } } return false; } /** * fwnode_is_ancestor_of - Test if @ancestor is ancestor of @child * @ancestor: Firmware which is tested for being an ancestor * @child: Firmware which is tested for being the child * * A node is considered an ancestor of itself too. * * Return: true if @ancestor is an ancestor of @child. Otherwise, returns false. */ static bool fwnode_is_ancestor_of(const struct fwnode_handle *ancestor, const struct fwnode_handle *child) { struct fwnode_handle *parent; if (IS_ERR_OR_NULL(ancestor)) return false; if (child == ancestor) return true; fwnode_for_each_parent_node(child, parent) { if (parent == ancestor) { fwnode_handle_put(parent); return true; } } return false; } /** * fwnode_get_next_parent_dev - Find device of closest ancestor fwnode * @fwnode: firmware node * * Given a firmware node (@fwnode), this function finds its closest ancestor * firmware node that has a corresponding struct device and returns that struct * device. * * The caller is responsible for calling put_device() on the returned device * pointer. * * Return: a pointer to the device of the @fwnode's closest ancestor. */ static struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwnode) { struct fwnode_handle *parent; struct device *dev; fwnode_for_each_parent_node(fwnode, parent) { dev = get_dev_from_fwnode(parent); if (dev) { fwnode_handle_put(parent); return dev; } } return NULL; } /** * __fw_devlink_relax_cycles - Relax and mark dependency cycles. * @con_handle: Potential consumer device fwnode. * @sup_handle: Potential supplier's fwnode. * * Needs to be called with fwnode_lock and device link lock held. * * Check if @sup_handle or any of its ancestors or suppliers direct/indirectly * depend on @con. This function can detect multiple cyles between @sup_handle * and @con. When such dependency cycles are found, convert all device links * created solely by fw_devlink into SYNC_STATE_ONLY device links. Also, mark * all fwnode links in the cycle with FWLINK_FLAG_CYCLE so that when they are * converted into a device link in the future, they are created as * SYNC_STATE_ONLY device links. This is the equivalent of doing * fw_devlink=permissive just between the devices in the cycle. We need to do * this because, at this point, fw_devlink can't tell which of these * dependencies is not a real dependency. * * Return true if one or more cycles were found. Otherwise, return false. */ static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle, struct fwnode_handle *sup_handle) { struct device *sup_dev = NULL, *par_dev = NULL, *con_dev = NULL; struct fwnode_link *link; struct device_link *dev_link; bool ret = false; if (!sup_handle) return false; /* * We aren't trying to find all cycles. Just a cycle between con and * sup_handle. */ if (sup_handle->flags & FWNODE_FLAG_VISITED) return false; sup_handle->flags |= FWNODE_FLAG_VISITED; /* Termination condition. */ if (sup_handle == con_handle) { pr_debug("----- cycle: start -----\n"); ret = true; goto out; } sup_dev = get_dev_from_fwnode(sup_handle); con_dev = get_dev_from_fwnode(con_handle); /* * If sup_dev is bound to a driver and @con hasn't started binding to a * driver, sup_dev can't be a consumer of @con. So, no need to check * further. */ if (sup_dev && sup_dev->links.status == DL_DEV_DRIVER_BOUND && con_dev && con_dev->links.status == DL_DEV_NO_DRIVER) { ret = false; goto out; } list_for_each_entry(link, &sup_handle->suppliers, c_hook) { if (link->flags & FWLINK_FLAG_IGNORE) continue; if (__fw_devlink_relax_cycles(con_handle, link->supplier)) { __fwnode_link_cycle(link); ret = true; } } /* * Give priority to device parent over fwnode parent to account for any * quirks in how fwnodes are converted to devices. */ if (sup_dev) par_dev = get_device(sup_dev->parent); else par_dev = fwnode_get_next_parent_dev(sup_handle); if (par_dev && __fw_devlink_relax_cycles(con_handle, par_dev->fwnode)) { pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle, par_dev->fwnode); ret = true; } if (!sup_dev) goto out; list_for_each_entry(dev_link, &sup_dev->links.suppliers, c_node) { /* * Ignore a SYNC_STATE_ONLY flag only if it wasn't marked as * such due to a cycle. */ if (device_link_flag_is_sync_state_only(dev_link->flags) && !(dev_link->flags & DL_FLAG_CYCLE)) continue; if (__fw_devlink_relax_cycles(con_handle, dev_link->supplier->fwnode)) { pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle, dev_link->supplier->fwnode); fw_devlink_relax_link(dev_link); dev_link->flags |= DL_FLAG_CYCLE; ret = true; } } out: sup_handle->flags &= ~FWNODE_FLAG_VISITED; put_device(sup_dev); put_device(par_dev); return ret; } /** * fw_devlink_create_devlink - Create a device link from a consumer to fwnode * @con: consumer device for the device link * @sup_handle: fwnode handle of supplier * @link: fwnode link that's being converted to a device link * * This function will try to create a device link between the consumer device * @con and the supplier device represented by @sup_handle. * * The supplier has to be provided as a fwnode because incorrect cycles in * fwnode links can sometimes cause the supplier device to never be created. * This function detects such cases and returns an error if it cannot create a * device link from the consumer to a missing supplier. * * Returns, * 0 on successfully creating a device link * -EINVAL if the device link cannot be created as expected * -EAGAIN if the device link cannot be created right now, but it may be * possible to do that in the future */ static int fw_devlink_create_devlink(struct device *con, struct fwnode_handle *sup_handle, struct fwnode_link *link) { struct device *sup_dev; int ret = 0; u32 flags; if (link->flags & FWLINK_FLAG_IGNORE) return 0; /* * In some cases, a device P might also be a supplier to its child node * C. However, this would defer the probe of C until the probe of P * completes successfully. This is perfectly fine in the device driver * model. device_add() doesn't guarantee probe completion of the device * by the time it returns. * * However, there are a few drivers that assume C will finish probing * as soon as it's added and before P finishes probing. So, we provide * a flag to let fw_devlink know not to delay the probe of C until the * probe of P completes successfully. * * When such a flag is set, we can't create device links where P is the * supplier of C as that would delay the probe of C. */ if (sup_handle->flags & FWNODE_FLAG_NEEDS_CHILD_BOUND_ON_ADD && fwnode_is_ancestor_of(sup_handle, con->fwnode)) return -EINVAL; /* * Don't try to optimize by not calling the cycle detection logic under * certain conditions. There's always some corner case that won't get * detected. */ device_links_write_lock(); if (__fw_devlink_relax_cycles(link->consumer, sup_handle)) { __fwnode_link_cycle(link); pr_debug("----- cycle: end -----\n"); pr_info("%pfwf: Fixed dependency cycle(s) with %pfwf\n", link->consumer, sup_handle); } device_links_write_unlock(); if (con->fwnode == link->consumer) flags = fw_devlink_get_flags(link->flags); else flags = FW_DEVLINK_FLAGS_PERMISSIVE; if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE) sup_dev = fwnode_get_next_parent_dev(sup_handle); else sup_dev = get_dev_from_fwnode(sup_handle); if (sup_dev) { /* * If it's one of those drivers that don't actually bind to * their device using driver core, then don't wait on this * supplier device indefinitely. */ if (sup_dev->links.status == DL_DEV_NO_DRIVER && sup_handle->flags & FWNODE_FLAG_INITIALIZED) { dev_dbg(con, "Not linking %pfwf - dev might never probe\n", sup_handle); ret = -EINVAL; goto out; } if (con != sup_dev && !device_link_add(con, sup_dev, flags)) { dev_err(con, "Failed to create device link (0x%x) with supplier %s for %pfwf\n", flags, dev_name(sup_dev), link->consumer); ret = -EINVAL; } goto out; } /* * Supplier or supplier's ancestor already initialized without a struct * device or being probed by a driver. */ if (fwnode_init_without_drv(sup_handle) || fwnode_ancestor_init_without_drv(sup_handle)) { dev_dbg(con, "Not linking %pfwf - might never become dev\n", sup_handle); return -EINVAL; } ret = -EAGAIN; out: put_device(sup_dev); return ret; } /** * __fw_devlink_link_to_consumers - Create device links to consumers of a device * @dev: Device that needs to be linked to its consumers * * This function looks at all the consumer fwnodes of @dev and creates device * links between the consumer device and @dev (supplier). * * If the consumer device has not been added yet, then this function creates a * SYNC_STATE_ONLY link between @dev (supplier) and the closest ancestor device * of the consumer fwnode. This is necessary to make sure @dev doesn't get a * sync_state() callback before the real consumer device gets to be added and * then probed. * * Once device links are created from the real consumer to @dev (supplier), the * fwnode links are deleted. */ static void __fw_devlink_link_to_consumers(struct device *dev) { struct fwnode_handle *fwnode = dev->fwnode; struct fwnode_link *link, *tmp; list_for_each_entry_safe(link, tmp, &fwnode->consumers, s_hook) { struct device *con_dev; bool own_link = true; int ret; con_dev = get_dev_from_fwnode(link->consumer); /* * If consumer device is not available yet, make a "proxy" * SYNC_STATE_ONLY link from the consumer's parent device to * the supplier device. This is necessary to make sure the * supplier doesn't get a sync_state() callback before the real * consumer can create a device link to the supplier. * * This proxy link step is needed to handle the case where the * consumer's parent device is added before the supplier. */ if (!con_dev) { con_dev = fwnode_get_next_parent_dev(link->consumer); /* * However, if the consumer's parent device is also the * parent of the supplier, don't create a * consumer-supplier link from the parent to its child * device. Such a dependency is impossible. */ if (con_dev && fwnode_is_ancestor_of(con_dev->fwnode, fwnode)) { put_device(con_dev); con_dev = NULL; } else { own_link = false; } } if (!con_dev) continue; ret = fw_devlink_create_devlink(con_dev, fwnode, link); put_device(con_dev); if (!own_link || ret == -EAGAIN) continue; __fwnode_link_del(link); } } /** * __fw_devlink_link_to_suppliers - Create device links to suppliers of a device * @dev: The consumer device that needs to be linked to its suppliers * @fwnode: Root of the fwnode tree that is used to create device links * * This function looks at all the supplier fwnodes of fwnode tree rooted at * @fwnode and creates device links between @dev (consumer) and all the * supplier devices of the entire fwnode tree at @fwnode. * * The function creates normal (non-SYNC_STATE_ONLY) device links between @dev * and the real suppliers of @dev. Once these device links are created, the * fwnode links are deleted. * * In addition, it also looks at all the suppliers of the entire fwnode tree * because some of the child devices of @dev that have not been added yet * (because @dev hasn't probed) might already have their suppliers added to * driver core. So, this function creates SYNC_STATE_ONLY device links between * @dev (consumer) and these suppliers to make sure they don't execute their * sync_state() callbacks before these child devices have a chance to create * their device links. The fwnode links that correspond to the child devices * aren't delete because they are needed later to create the device links * between the real consumer and supplier devices. */ static void __fw_devlink_link_to_suppliers(struct device *dev, struct fwnode_handle *fwnode) { bool own_link = (dev->fwnode == fwnode); struct fwnode_link *link, *tmp; struct fwnode_handle *child = NULL; list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) { int ret; struct fwnode_handle *sup = link->supplier; ret = fw_devlink_create_devlink(dev, sup, link); if (!own_link || ret == -EAGAIN) continue; __fwnode_link_del(link); } /* * Make "proxy" SYNC_STATE_ONLY device links to represent the needs of * all the descendants. This proxy link step is needed to handle the * case where the supplier is added before the consumer's parent device * (@dev). */ while ((child = fwnode_get_next_available_child_node(fwnode, child))) __fw_devlink_link_to_suppliers(dev, child); } static void fw_devlink_link_device(struct device *dev) { struct fwnode_handle *fwnode = dev->fwnode; if (!fw_devlink_flags) return; fw_devlink_parse_fwtree(fwnode); guard(mutex)(&fwnode_link_lock); __fw_devlink_link_to_consumers(dev); __fw_devlink_link_to_suppliers(dev, fwnode); } /* Device links support end. */ static struct kobject *dev_kobj; /* /sys/dev/char */ static struct kobject *sysfs_dev_char_kobj; /* /sys/dev/block */ static struct kobject *sysfs_dev_block_kobj; static DEFINE_MUTEX(device_hotplug_lock); void lock_device_hotplug(void) { mutex_lock(&device_hotplug_lock); } void unlock_device_hotplug(void) { mutex_unlock(&device_hotplug_lock); } int lock_device_hotplug_sysfs(void) { if (mutex_trylock(&device_hotplug_lock)) return 0; /* Avoid busy looping (5 ms of sleep should do). */ msleep(5); return restart_syscall(); } #ifdef CONFIG_BLOCK static inline int device_is_not_partition(struct device *dev) { return !(dev->type == &part_type); } #else static inline int device_is_not_partition(struct device *dev) { return 1; } #endif static void device_platform_notify(struct device *dev) { acpi_device_notify(dev); software_node_notify(dev); } static void device_platform_notify_remove(struct device *dev) { software_node_notify_remove(dev); acpi_device_notify_remove(dev); } /** * dev_driver_string - Return a device's driver name, if at all possible * @dev: struct device to get the name of * * Will return the device's driver's name if it is bound to a device. If * the device is not bound to a driver, it will return the name of the bus * it is attached to. If it is not attached to a bus either, an empty * string will be returned. */ const char *dev_driver_string(const struct device *dev) { struct device_driver *drv; /* dev->driver can change to NULL underneath us because of unbinding, * so be careful about accessing it. dev->bus and dev->class should * never change once they are set, so they don't need special care. */ drv = READ_ONCE(dev->driver); return drv ? drv->name : dev_bus_name(dev); } EXPORT_SYMBOL(dev_driver_string); #define to_dev_attr(_attr) container_of(_attr, struct device_attribute, attr) static ssize_t dev_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct device_attribute *dev_attr = to_dev_attr(attr); struct device *dev = kobj_to_dev(kobj); ssize_t ret = -EIO; if (dev_attr->show) ret = dev_attr->show(dev, dev_attr, buf); if (ret >= (ssize_t)PAGE_SIZE) { printk("dev_attr_show: %pS returned bad count\n", dev_attr->show); } return ret; } static ssize_t dev_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { struct device_attribute *dev_attr = to_dev_attr(attr); struct device *dev = kobj_to_dev(kobj); ssize_t ret = -EIO; if (dev_attr->store) ret = dev_attr->store(dev, dev_attr, buf, count); return ret; } static const struct sysfs_ops dev_sysfs_ops = { .show = dev_attr_show, .store = dev_attr_store, }; #define to_ext_attr(x) container_of(x, struct dev_ext_attribute, attr) ssize_t device_store_ulong(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct dev_ext_attribute *ea = to_ext_attr(attr); int ret; unsigned long new; ret = kstrtoul(buf, 0, &new); if (ret) return ret; *(unsigned long *)(ea->var) = new; /* Always return full write size even if we didn't consume all */ return size; } EXPORT_SYMBOL_GPL(device_store_ulong); ssize_t device_show_ulong(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); return sysfs_emit(buf, "%lx\n", *(unsigned long *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_ulong); ssize_t device_store_int(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct dev_ext_attribute *ea = to_ext_attr(attr); int ret; long new; ret = kstrtol(buf, 0, &new); if (ret) return ret; if (new > INT_MAX || new < INT_MIN) return -EINVAL; *(int *)(ea->var) = new; /* Always return full write size even if we didn't consume all */ return size; } EXPORT_SYMBOL_GPL(device_store_int); ssize_t device_show_int(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); return sysfs_emit(buf, "%d\n", *(int *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_int); ssize_t device_store_bool(struct device *dev, struct device_attribute *attr, const char *buf, size_t size) { struct dev_ext_attribute *ea = to_ext_attr(attr); if (kstrtobool(buf, ea->var) < 0) return -EINVAL; return size; } EXPORT_SYMBOL_GPL(device_store_bool); ssize_t device_show_bool(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); return sysfs_emit(buf, "%d\n", *(bool *)(ea->var)); } EXPORT_SYMBOL_GPL(device_show_bool); ssize_t device_show_string(struct device *dev, struct device_attribute *attr, char *buf) { struct dev_ext_attribute *ea = to_ext_attr(attr); return sysfs_emit(buf, "%s\n", (char *)ea->var); } EXPORT_SYMBOL_GPL(device_show_string); /** * device_release - free device structure. * @kobj: device's kobject. * * This is called once the reference count for the object * reaches 0. We forward the call to the device's release * method, which should handle actually freeing the structure. */ static void device_release(struct kobject *kobj) { struct device *dev = kobj_to_dev(kobj); struct device_private *p = dev->p; /* * Some platform devices are driven without driver attached * and managed resources may have been acquired. Make sure * all resources are released. * * Drivers still can add resources into device after device * is deleted but alive, so release devres here to avoid * possible memory leak. */ devres_release_all(dev); kfree(dev->dma_range_map); if (dev->release) dev->release(dev); else if (dev->type && dev->type->release) dev->type->release(dev); else if (dev->class && dev->class->dev_release) dev->class->dev_release(dev); else WARN(1, KERN_ERR "Device '%s' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.\n", dev_name(dev)); kfree(p); } static const void *device_namespace(const struct kobject *kobj) { const struct device *dev = kobj_to_dev(kobj); const void *ns = NULL; if (dev->class && dev->class->namespace) ns = dev->class->namespace(dev); return ns; } static void device_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid) { const struct device *dev = kobj_to_dev(kobj); if (dev->class && dev->class->get_ownership) dev->class->get_ownership(dev, uid, gid); } static const struct kobj_type device_ktype = { .release = device_release, .sysfs_ops = &dev_sysfs_ops, .namespace = device_namespace, .get_ownership = device_get_ownership, }; static int dev_uevent_filter(const struct kobject *kobj) { const struct kobj_type *ktype = get_ktype(kobj); if (ktype == &device_ktype) { const struct device *dev = kobj_to_dev(kobj); if (dev->bus) return 1; if (dev->class) return 1; } return 0; } static const char *dev_uevent_name(const struct kobject *kobj) { const struct device *dev = kobj_to_dev(kobj); if (dev->bus) return dev->bus->name; if (dev->class) return dev->class->name; return NULL; } static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); int retval = 0; /* add device node properties if present */ if (MAJOR(dev->devt)) { const char *tmp; const char *name; umode_t mode = 0; kuid_t uid = GLOBAL_ROOT_UID; kgid_t gid = GLOBAL_ROOT_GID; add_uevent_var(env, "MAJOR=%u", MAJOR(dev->devt)); add_uevent_var(env, "MINOR=%u", MINOR(dev->devt)); name = device_get_devnode(dev, &mode, &uid, &gid, &tmp); if (name) { add_uevent_var(env, "DEVNAME=%s", name); if (mode) add_uevent_var(env, "DEVMODE=%#o", mode & 0777); if (!uid_eq(uid, GLOBAL_ROOT_UID)) add_uevent_var(env, "DEVUID=%u", from_kuid(&init_user_ns, uid)); if (!gid_eq(gid, GLOBAL_ROOT_GID)) add_uevent_var(env, "DEVGID=%u", from_kgid(&init_user_ns, gid)); kfree(tmp); } } if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); if (dev->driver) add_uevent_var(env, "DRIVER=%s", dev->driver->name); /* Add common DT information about the device */ of_device_uevent(dev, env); /* have the bus specific function add its stuff */ if (dev->bus && dev->bus->uevent) { retval = dev->bus->uevent(dev, env); if (retval) pr_debug("device: '%s': %s: bus uevent() returned %d\n", dev_name(dev), __func__, retval); } /* have the class specific function add its stuff */ if (dev->class && dev->class->dev_uevent) { retval = dev->class->dev_uevent(dev, env); if (retval) pr_debug("device: '%s': %s: class uevent() " "returned %d\n", dev_name(dev), __func__, retval); } /* have the device type specific function add its stuff */ if (dev->type && dev->type->uevent) { retval = dev->type->uevent(dev, env); if (retval) pr_debug("device: '%s': %s: dev_type uevent() " "returned %d\n", dev_name(dev), __func__, retval); } return retval; } static const struct kset_uevent_ops device_uevent_ops = { .filter = dev_uevent_filter, .name = dev_uevent_name, .uevent = dev_uevent, }; static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, char *buf) { struct kobject *top_kobj; struct kset *kset; struct kobj_uevent_env *env = NULL; int i; int len = 0; int retval; /* search the kset, the device belongs to */ top_kobj = &dev->kobj; while (!top_kobj->kset && top_kobj->parent) top_kobj = top_kobj->parent; if (!top_kobj->kset) goto out; kset = top_kobj->kset; if (!kset->uevent_ops || !kset->uevent_ops->uevent) goto out; /* respect filter */ if (kset->uevent_ops && kset->uevent_ops->filter) if (!kset->uevent_ops->filter(&dev->kobj)) goto out; env = kzalloc(sizeof(struct kobj_uevent_env), GFP_KERNEL); if (!env) return -ENOMEM; /* Synchronize with really_probe() */ device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); device_unlock(dev); if (retval) goto out; /* copy keys to file */ for (i = 0; i < env->envp_idx; i++) len += sysfs_emit_at(buf, len, "%s\n", env->envp[i]); out: kfree(env); return len; } static ssize_t uevent_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int rc; rc = kobject_synth_uevent(&dev->kobj, buf, count); if (rc) { dev_err(dev, "uevent: failed to send synthetic uevent: %d\n", rc); return rc; } return count; } static DEVICE_ATTR_RW(uevent); static ssize_t online_show(struct device *dev, struct device_attribute *attr, char *buf) { bool val; device_lock(dev); val = !dev->offline; device_unlock(dev); return sysfs_emit(buf, "%u\n", val); } static ssize_t online_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { bool val; int ret; ret = kstrtobool(buf, &val); if (ret < 0) return ret; ret = lock_device_hotplug_sysfs(); if (ret) return ret; ret = val ? device_online(dev) : device_offline(dev); unlock_device_hotplug(); return ret < 0 ? ret : count; } static DEVICE_ATTR_RW(online); static ssize_t removable_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *loc; switch (dev->removable) { case DEVICE_REMOVABLE: loc = "removable"; break; case DEVICE_FIXED: loc = "fixed"; break; default: loc = "unknown"; } return sysfs_emit(buf, "%s\n", loc); } static DEVICE_ATTR_RO(removable); int device_add_groups(struct device *dev, const struct attribute_group **groups) { return sysfs_create_groups(&dev->kobj, groups); } EXPORT_SYMBOL_GPL(device_add_groups); void device_remove_groups(struct device *dev, const struct attribute_group **groups) { sysfs_remove_groups(&dev->kobj, groups); } EXPORT_SYMBOL_GPL(device_remove_groups); union device_attr_group_devres { const struct attribute_group *group; const struct attribute_group **groups; }; static void devm_attr_group_remove(struct device *dev, void *res) { union device_attr_group_devres *devres = res; const struct attribute_group *group = devres->group; dev_dbg(dev, "%s: removing group %p\n", __func__, group); sysfs_remove_group(&dev->kobj, group); } /** * devm_device_add_group - given a device, create a managed attribute group * @dev: The device to create the group for * @grp: The attribute group to create * * This function creates a group for the first time. It will explicitly * warn and error if any of the attribute files being created already exist. * * Returns 0 on success or error code on failure. */ int devm_device_add_group(struct device *dev, const struct attribute_group *grp) { union device_attr_group_devres *devres; int error; devres = devres_alloc(devm_attr_group_remove, sizeof(*devres), GFP_KERNEL); if (!devres) return -ENOMEM; error = sysfs_create_group(&dev->kobj, grp); if (error) { devres_free(devres); return error; } devres->group = grp; devres_add(dev, devres); return 0; } EXPORT_SYMBOL_GPL(devm_device_add_group); static int device_add_attrs(struct device *dev) { const struct class *class = dev->class; const struct device_type *type = dev->type; int error; if (class) { error = device_add_groups(dev, class->dev_groups); if (error) return error; } if (type) { error = device_add_groups(dev, type->groups); if (error) goto err_remove_class_groups; } error = device_add_groups(dev, dev->groups); if (error) goto err_remove_type_groups; if (device_supports_offline(dev) && !dev->offline_disabled) { error = device_create_file(dev, &dev_attr_online); if (error) goto err_remove_dev_groups; } if (fw_devlink_flags && !fw_devlink_is_permissive() && dev->fwnode) { error = device_create_file(dev, &dev_attr_waiting_for_supplier); if (error) goto err_remove_dev_online; } if (dev_removable_is_valid(dev)) { error = device_create_file(dev, &dev_attr_removable); if (error) goto err_remove_dev_waiting_for_supplier; } if (dev_add_physical_location(dev)) { error = device_add_group(dev, &dev_attr_physical_location_group); if (error) goto err_remove_dev_removable; } return 0; err_remove_dev_removable: device_remove_file(dev, &dev_attr_removable); err_remove_dev_waiting_for_supplier: device_remove_file(dev, &dev_attr_waiting_for_supplier); err_remove_dev_online: device_remove_file(dev, &dev_attr_online); err_remove_dev_groups: device_remove_groups(dev, dev->groups); err_remove_type_groups: if (type) device_remove_groups(dev, type->groups); err_remove_class_groups: if (class) device_remove_groups(dev, class->dev_groups); return error; } static void device_remove_attrs(struct device *dev) { const struct class *class = dev->class; const struct device_type *type = dev->type; if (dev->physical_location) { device_remove_group(dev, &dev_attr_physical_location_group); kfree(dev->physical_location); } device_remove_file(dev, &dev_attr_removable); device_remove_file(dev, &dev_attr_waiting_for_supplier); device_remove_file(dev, &dev_attr_online); device_remove_groups(dev, dev->groups); if (type) device_remove_groups(dev, type->groups); if (class) device_remove_groups(dev, class->dev_groups); } static ssize_t dev_show(struct device *dev, struct device_attribute *attr, char *buf) { return print_dev_t(buf, dev->devt); } static DEVICE_ATTR_RO(dev); /* /sys/devices/ */ struct kset *devices_kset; /** * devices_kset_move_before - Move device in the devices_kset's list. * @deva: Device to move. * @devb: Device @deva should come before. */ static void devices_kset_move_before(struct device *deva, struct device *devb) { if (!devices_kset) return; pr_debug("devices_kset: Moving %s before %s\n", dev_name(deva), dev_name(devb)); spin_lock(&devices_kset->list_lock); list_move_tail(&deva->kobj.entry, &devb->kobj.entry); spin_unlock(&devices_kset->list_lock); } /** * devices_kset_move_after - Move device in the devices_kset's list. * @deva: Device to move * @devb: Device @deva should come after. */ static void devices_kset_move_after(struct device *deva, struct device *devb) { if (!devices_kset) return; pr_debug("devices_kset: Moving %s after %s\n", dev_name(deva), dev_name(devb)); spin_lock(&devices_kset->list_lock); list_move(&deva->kobj.entry, &devb->kobj.entry); spin_unlock(&devices_kset->list_lock); } /** * devices_kset_move_last - move the device to the end of devices_kset's list. * @dev: device to move */ void devices_kset_move_last(struct device *dev) { if (!devices_kset) return; pr_debug("devices_kset: Moving %s to end of list\n", dev_name(dev)); spin_lock(&devices_kset->list_lock); list_move_tail(&dev->kobj.entry, &devices_kset->list); spin_unlock(&devices_kset->list_lock); } /** * device_create_file - create sysfs attribute file for device. * @dev: device. * @attr: device attribute descriptor. */ int device_create_file(struct device *dev, const struct device_attribute *attr) { int error = 0; if (dev) { WARN(((attr->attr.mode & S_IWUGO) && !attr->store), "Attribute %s: write permission without 'store'\n", attr->attr.name); WARN(((attr->attr.mode & S_IRUGO) && !attr->show), "Attribute %s: read permission without 'show'\n", attr->attr.name); error = sysfs_create_file(&dev->kobj, &attr->attr); } return error; } EXPORT_SYMBOL_GPL(device_create_file); /** * device_remove_file - remove sysfs attribute file. * @dev: device. * @attr: device attribute descriptor. */ void device_remove_file(struct device *dev, const struct device_attribute *attr) { if (dev) sysfs_remove_file(&dev->kobj, &attr->attr); } EXPORT_SYMBOL_GPL(device_remove_file); /** * device_remove_file_self - remove sysfs attribute file from its own method. * @dev: device. * @attr: device attribute descriptor. * * See kernfs_remove_self() for details. */ bool device_remove_file_self(struct device *dev, const struct device_attribute *attr) { if (dev) return sysfs_remove_file_self(&dev->kobj, &attr->attr); else return false; } EXPORT_SYMBOL_GPL(device_remove_file_self); /** * device_create_bin_file - create sysfs binary attribute file for device. * @dev: device. * @attr: device binary attribute descriptor. */ int device_create_bin_file(struct device *dev, const struct bin_attribute *attr) { int error = -EINVAL; if (dev) error = sysfs_create_bin_file(&dev->kobj, attr); return error; } EXPORT_SYMBOL_GPL(device_create_bin_file); /** * device_remove_bin_file - remove sysfs binary attribute file * @dev: device. * @attr: device binary attribute descriptor. */ void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr) { if (dev) sysfs_remove_bin_file(&dev->kobj, attr); } EXPORT_SYMBOL_GPL(device_remove_bin_file); static void klist_children_get(struct klist_node *n) { struct device_private *p = to_device_private_parent(n); struct device *dev = p->device; get_device(dev); } static void klist_children_put(struct klist_node *n) { struct device_private *p = to_device_private_parent(n); struct device *dev = p->device; put_device(dev); } /** * device_initialize - init device structure. * @dev: device. * * This prepares the device for use by other layers by initializing * its fields. * It is the first half of device_register(), if called by * that function, though it can also be called separately, so one * may use @dev's fields. In particular, get_device()/put_device() * may be used for reference counting of @dev after calling this * function. * * All fields in @dev must be initialized by the caller to 0, except * for those explicitly set to some other value. The simplest * approach is to use kzalloc() to allocate the structure containing * @dev. * * NOTE: Use put_device() to give up your reference instead of freeing * @dev directly once you have called this function. */ void device_initialize(struct device *dev) { dev->kobj.kset = devices_kset; kobject_init(&dev->kobj, &device_ktype); INIT_LIST_HEAD(&dev->dma_pools); mutex_init(&dev->mutex); lockdep_set_novalidate_class(&dev->mutex); spin_lock_init(&dev->devres_lock); INIT_LIST_HEAD(&dev->devres_head); device_pm_init(dev); set_dev_node(dev, NUMA_NO_NODE); INIT_LIST_HEAD(&dev->links.consumers); INIT_LIST_HEAD(&dev->links.suppliers); INIT_LIST_HEAD(&dev->links.defer_sync); dev->links.status = DL_DEV_NO_DRIVER; #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) dev->dma_coherent = dma_default_coherent; #endif swiotlb_dev_init(dev); } EXPORT_SYMBOL_GPL(device_initialize); struct kobject *virtual_device_parent(void) { static struct kobject *virtual_dir = NULL; if (!virtual_dir) virtual_dir = kobject_create_and_add("virtual", &devices_kset->kobj); return virtual_dir; } struct class_dir { struct kobject kobj; const struct class *class; }; #define to_class_dir(obj) container_of(obj, struct class_dir, kobj) static void class_dir_release(struct kobject *kobj) { struct class_dir *dir = to_class_dir(kobj); kfree(dir); } static const struct kobj_ns_type_operations *class_dir_child_ns_type(const struct kobject *kobj) { const struct class_dir *dir = to_class_dir(kobj); return dir->class->ns_type; } static const struct kobj_type class_dir_ktype = { .release = class_dir_release, .sysfs_ops = &kobj_sysfs_ops, .child_ns_type = class_dir_child_ns_type }; static struct kobject *class_dir_create_and_add(struct subsys_private *sp, struct kobject *parent_kobj) { struct class_dir *dir; int retval; dir = kzalloc(sizeof(*dir), GFP_KERNEL); if (!dir) return ERR_PTR(-ENOMEM); dir->class = sp->class; kobject_init(&dir->kobj, &class_dir_ktype); dir->kobj.kset = &sp->glue_dirs; retval = kobject_add(&dir->kobj, parent_kobj, "%s", sp->class->name); if (retval < 0) { kobject_put(&dir->kobj); return ERR_PTR(retval); } return &dir->kobj; } static DEFINE_MUTEX(gdp_mutex); static struct kobject *get_device_parent(struct device *dev, struct device *parent) { struct subsys_private *sp = class_to_subsys(dev->class); struct kobject *kobj = NULL; if (sp) { struct kobject *parent_kobj; struct kobject *k; /* * If we have no parent, we live in "virtual". * Class-devices with a non class-device as parent, live * in a "glue" directory to prevent namespace collisions. */ if (parent == NULL) parent_kobj = virtual_device_parent(); else if (parent->class && !dev->class->ns_type) { subsys_put(sp); return &parent->kobj; } else { parent_kobj = &parent->kobj; } mutex_lock(&gdp_mutex); /* find our class-directory at the parent and reference it */ spin_lock(&sp->glue_dirs.list_lock); list_for_each_entry(k, &sp->glue_dirs.list, entry) if (k->parent == parent_kobj) { kobj = kobject_get(k); break; } spin_unlock(&sp->glue_dirs.list_lock); if (kobj) { mutex_unlock(&gdp_mutex); subsys_put(sp); return kobj; } /* or create a new class-directory at the parent device */ k = class_dir_create_and_add(sp, parent_kobj); /* do not emit an uevent for this simple "glue" directory */ mutex_unlock(&gdp_mutex); subsys_put(sp); return k; } /* subsystems can specify a default root directory for their devices */ if (!parent && dev->bus) { struct device *dev_root = bus_get_dev_root(dev->bus); if (dev_root) { kobj = &dev_root->kobj; put_device(dev_root); return kobj; } } if (parent) return &parent->kobj; return NULL; } static inline bool live_in_glue_dir(struct kobject *kobj, struct device *dev) { struct subsys_private *sp; bool retval; if (!kobj || !dev->class) return false; sp = class_to_subsys(dev->class); if (!sp) return false; if (kobj->kset == &sp->glue_dirs) retval = true; else retval = false; subsys_put(sp); return retval; } static inline struct kobject *get_glue_dir(struct device *dev) { return dev->kobj.parent; } /** * kobject_has_children - Returns whether a kobject has children. * @kobj: the object to test * * This will return whether a kobject has other kobjects as children. * * It does NOT account for the presence of attribute files, only sub * directories. It also assumes there is no concurrent addition or * removal of such children, and thus relies on external locking. */ static inline bool kobject_has_children(struct kobject *kobj) { WARN_ON_ONCE(kref_read(&kobj->kref) == 0); return kobj->sd && kobj->sd->dir.subdirs; } /* * make sure cleaning up dir as the last step, we need to make * sure .release handler of kobject is run with holding the * global lock */ static void cleanup_glue_dir(struct device *dev, struct kobject *glue_dir) { unsigned int ref; /* see if we live in a "glue" directory */ if (!live_in_glue_dir(glue_dir, dev)) return; mutex_lock(&gdp_mutex); /** * There is a race condition between removing glue directory * and adding a new device under the glue directory. * * CPU1: CPU2: * * device_add() * get_device_parent() * class_dir_create_and_add() * kobject_add_internal() * create_dir() // create glue_dir * * device_add() * get_device_parent() * kobject_get() // get glue_dir * * device_del() * cleanup_glue_dir() * kobject_del(glue_dir) * * kobject_add() * kobject_add_internal() * create_dir() // in glue_dir * sysfs_create_dir_ns() * kernfs_create_dir_ns(sd) * * sysfs_remove_dir() // glue_dir->sd=NULL * sysfs_put() // free glue_dir->sd * * // sd is freed * kernfs_new_node(sd) * kernfs_get(glue_dir) * kernfs_add_one() * kernfs_put() * * Before CPU1 remove last child device under glue dir, if CPU2 add * a new device under glue dir, the glue_dir kobject reference count * will be increase to 2 in kobject_get(k). And CPU2 has been called * kernfs_create_dir_ns(). Meanwhile, CPU1 call sysfs_remove_dir() * and sysfs_put(). This result in glue_dir->sd is freed. * * Then the CPU2 will see a stale "empty" but still potentially used * glue dir around in kernfs_new_node(). * * In order to avoid this happening, we also should make sure that * kernfs_node for glue_dir is released in CPU1 only when refcount * for glue_dir kobj is 1. */ ref = kref_read(&glue_dir->kref); if (!kobject_has_children(glue_dir) && !--ref) kobject_del(glue_dir); kobject_put(glue_dir); mutex_unlock(&gdp_mutex); } static int device_add_class_symlinks(struct device *dev) { struct device_node *of_node = dev_of_node(dev); struct subsys_private *sp; int error; if (of_node) { error = sysfs_create_link(&dev->kobj, of_node_kobj(of_node), "of_node"); if (error) dev_warn(dev, "Error %d creating of_node link\n",error); /* An error here doesn't warrant bringing down the device */ } sp = class_to_subsys(dev->class); if (!sp) return 0; error = sysfs_create_link(&dev->kobj, &sp->subsys.kobj, "subsystem"); if (error) goto out_devnode; if (dev->parent && device_is_not_partition(dev)) { error = sysfs_create_link(&dev->kobj, &dev->parent->kobj, "device"); if (error) goto out_subsys; } /* link in the class directory pointing to the device */ error = sysfs_create_link(&sp->subsys.kobj, &dev->kobj, dev_name(dev)); if (error) goto out_device; goto exit; out_device: sysfs_remove_link(&dev->kobj, "device"); out_subsys: sysfs_remove_link(&dev->kobj, "subsystem"); out_devnode: sysfs_remove_link(&dev->kobj, "of_node"); exit: subsys_put(sp); return error; } static void device_remove_class_symlinks(struct device *dev) { struct subsys_private *sp = class_to_subsys(dev->class); if (dev_of_node(dev)) sysfs_remove_link(&dev->kobj, "of_node"); if (!sp) return; if (dev->parent && device_is_not_partition(dev)) sysfs_remove_link(&dev->kobj, "device"); sysfs_remove_link(&dev->kobj, "subsystem"); sysfs_delete_link(&sp->subsys.kobj, &dev->kobj, dev_name(dev)); subsys_put(sp); } /** * dev_set_name - set a device name * @dev: device * @fmt: format string for the device's name */ int dev_set_name(struct device *dev, const char *fmt, ...) { va_list vargs; int err; va_start(vargs, fmt); err = kobject_set_name_vargs(&dev->kobj, fmt, vargs); va_end(vargs); return err; } EXPORT_SYMBOL_GPL(dev_set_name); /* select a /sys/dev/ directory for the device */ static struct kobject *device_to_dev_kobj(struct device *dev) { if (is_blockdev(dev)) return sysfs_dev_block_kobj; else return sysfs_dev_char_kobj; } static int device_create_sys_dev_entry(struct device *dev) { struct kobject *kobj = device_to_dev_kobj(dev); int error = 0; char devt_str[15]; if (kobj) { format_dev_t(devt_str, dev->devt); error = sysfs_create_link(kobj, &dev->kobj, devt_str); } return error; } static void device_remove_sys_dev_entry(struct device *dev) { struct kobject *kobj = device_to_dev_kobj(dev); char devt_str[15]; if (kobj) { format_dev_t(devt_str, dev->devt); sysfs_remove_link(kobj, devt_str); } } static int device_private_init(struct device *dev) { dev->p = kzalloc(sizeof(*dev->p), GFP_KERNEL); if (!dev->p) return -ENOMEM; dev->p->device = dev; klist_init(&dev->p->klist_children, klist_children_get, klist_children_put); INIT_LIST_HEAD(&dev->p->deferred_probe); return 0; } /** * device_add - add device to device hierarchy. * @dev: device. * * This is part 2 of device_register(), though may be called * separately _iff_ device_initialize() has been called separately. * * This adds @dev to the kobject hierarchy via kobject_add(), adds it * to the global and sibling lists for the device, then * adds it to the other relevant subsystems of the driver model. * * Do not call this routine or device_register() more than once for * any device structure. The driver model core is not designed to work * with devices that get unregistered and then spring back to life. * (Among other things, it's very hard to guarantee that all references * to the previous incarnation of @dev have been dropped.) Allocate * and register a fresh new struct device instead. * * NOTE: _Never_ directly free @dev after calling this function, even * if it returned an error! Always use put_device() to give up your * reference instead. * * Rule of thumb is: if device_add() succeeds, you should call * device_del() when you want to get rid of it. If device_add() has * *not* succeeded, use *only* put_device() to drop the reference * count. */ int device_add(struct device *dev) { struct subsys_private *sp; struct device *parent; struct kobject *kobj; struct class_interface *class_intf; int error = -EINVAL; struct kobject *glue_dir = NULL; dev = get_device(dev); if (!dev) goto done; if (!dev->p) { error = device_private_init(dev); if (error) goto done; } /* * for statically allocated devices, which should all be converted * some day, we need to initialize the name. We prevent reading back * the name, and force the use of dev_name() */ if (dev->init_name) { error = dev_set_name(dev, "%s", dev->init_name); dev->init_name = NULL; } if (dev_name(dev)) error = 0; /* subsystems can specify simple device enumeration */ else if (dev->bus && dev->bus->dev_name) error = dev_set_name(dev, "%s%u", dev->bus->dev_name, dev->id); else error = -EINVAL; if (error) goto name_error; pr_debug("device: '%s': %s\n", dev_name(dev), __func__); parent = get_device(dev->parent); kobj = get_device_parent(dev, parent); if (IS_ERR(kobj)) { error = PTR_ERR(kobj); goto parent_error; } if (kobj) dev->kobj.parent = kobj; /* use parent numa_node */ if (parent && (dev_to_node(dev) == NUMA_NO_NODE)) set_dev_node(dev, dev_to_node(parent)); /* first, register with generic layer. */ /* we require the name to be set before, and pass NULL */ error = kobject_add(&dev->kobj, dev->kobj.parent, NULL); if (error) { glue_dir = kobj; goto Error; } /* notify platform of device entry */ device_platform_notify(dev); error = device_create_file(dev, &dev_attr_uevent); if (error) goto attrError; error = device_add_class_symlinks(dev); if (error) goto SymlinkError; error = device_add_attrs(dev); if (error) goto AttrsError; error = bus_add_device(dev); if (error) goto BusError; error = dpm_sysfs_add(dev); if (error) goto DPMError; device_pm_add(dev); if (MAJOR(dev->devt)) { error = device_create_file(dev, &dev_attr_dev); if (error) goto DevAttrError; error = device_create_sys_dev_entry(dev); if (error) goto SysEntryError; devtmpfs_create_node(dev); } /* Notify clients of device addition. This call must come * after dpm_sysfs_add() and before kobject_uevent(). */ bus_notify(dev, BUS_NOTIFY_ADD_DEVICE); kobject_uevent(&dev->kobj, KOBJ_ADD); /* * Check if any of the other devices (consumers) have been waiting for * this device (supplier) to be added so that they can create a device * link to it. * * This needs to happen after device_pm_add() because device_link_add() * requires the supplier be registered before it's called. * * But this also needs to happen before bus_probe_device() to make sure * waiting consumers can link to it before the driver is bound to the * device and the driver sync_state callback is called for this device. */ if (dev->fwnode && !dev->fwnode->dev) { dev->fwnode->dev = dev; fw_devlink_link_device(dev); } bus_probe_device(dev); /* * If all driver registration is done and a newly added device doesn't * match with any driver, don't block its consumers from probing in * case the consumer device is able to operate without this supplier. */ if (dev->fwnode && fw_devlink_drv_reg_done && !dev->can_match) fw_devlink_unblock_consumers(dev); if (parent) klist_add_tail(&dev->p->knode_parent, &parent->p->klist_children); sp = class_to_subsys(dev->class); if (sp) { mutex_lock(&sp->mutex); /* tie the class to the device */ klist_add_tail(&dev->p->knode_class, &sp->klist_devices); /* notify any interfaces that the device is here */ list_for_each_entry(class_intf, &sp->interfaces, node) if (class_intf->add_dev) class_intf->add_dev(dev); mutex_unlock(&sp->mutex); subsys_put(sp); } done: put_device(dev); return error; SysEntryError: if (MAJOR(dev->devt)) device_remove_file(dev, &dev_attr_dev); DevAttrError: device_pm_remove(dev); dpm_sysfs_remove(dev); DPMError: dev->driver = NULL; bus_remove_device(dev); BusError: device_remove_attrs(dev); AttrsError: device_remove_class_symlinks(dev); SymlinkError: device_remove_file(dev, &dev_attr_uevent); attrError: device_platform_notify_remove(dev); kobject_uevent(&dev->kobj, KOBJ_REMOVE); glue_dir = get_glue_dir(dev); kobject_del(&dev->kobj); Error: cleanup_glue_dir(dev, glue_dir); parent_error: put_device(parent); name_error: kfree(dev->p); dev->p = NULL; goto done; } EXPORT_SYMBOL_GPL(device_add); /** * device_register - register a device with the system. * @dev: pointer to the device structure * * This happens in two clean steps - initialize the device * and add it to the system. The two steps can be called * separately, but this is the easiest and most common. * I.e. you should only call the two helpers separately if * have a clearly defined need to use and refcount the device * before it is added to the hierarchy. * * For more information, see the kerneldoc for device_initialize() * and device_add(). * * NOTE: _Never_ directly free @dev after calling this function, even * if it returned an error! Always use put_device() to give up the * reference initialized in this function instead. */ int device_register(struct device *dev) { device_initialize(dev); return device_add(dev); } EXPORT_SYMBOL_GPL(device_register); /** * get_device - increment reference count for device. * @dev: device. * * This simply forwards the call to kobject_get(), though * we do take care to provide for the case that we get a NULL * pointer passed in. */ struct device *get_device(struct device *dev) { return dev ? kobj_to_dev(kobject_get(&dev->kobj)) : NULL; } EXPORT_SYMBOL_GPL(get_device); /** * put_device - decrement reference count. * @dev: device in question. */ void put_device(struct device *dev) { /* might_sleep(); */ if (dev) kobject_put(&dev->kobj); } EXPORT_SYMBOL_GPL(put_device); bool kill_device(struct device *dev) { /* * Require the device lock and set the "dead" flag to guarantee that * the update behavior is consistent with the other bitfields near * it and that we cannot have an asynchronous probe routine trying * to run while we are tearing out the bus/class/sysfs from * underneath the device. */ device_lock_assert(dev); if (dev->p->dead) return false; dev->p->dead = true; return true; } EXPORT_SYMBOL_GPL(kill_device); /** * device_del - delete device from system. * @dev: device. * * This is the first part of the device unregistration * sequence. This removes the device from the lists we control * from here, has it removed from the other driver model * subsystems it was added to in device_add(), and removes it * from the kobject hierarchy. * * NOTE: this should be called manually _iff_ device_add() was * also called manually. */ void device_del(struct device *dev) { struct subsys_private *sp; struct device *parent = dev->parent; struct kobject *glue_dir = NULL; struct class_interface *class_intf; unsigned int noio_flag; device_lock(dev); kill_device(dev); device_unlock(dev); if (dev->fwnode && dev->fwnode->dev == dev) dev->fwnode->dev = NULL; /* Notify clients of device removal. This call must come * before dpm_sysfs_remove(). */ noio_flag = memalloc_noio_save(); bus_notify(dev, BUS_NOTIFY_DEL_DEVICE); dpm_sysfs_remove(dev); if (parent) klist_del(&dev->p->knode_parent); if (MAJOR(dev->devt)) { devtmpfs_delete_node(dev); device_remove_sys_dev_entry(dev); device_remove_file(dev, &dev_attr_dev); } sp = class_to_subsys(dev->class); if (sp) { device_remove_class_symlinks(dev); mutex_lock(&sp->mutex); /* notify any interfaces that the device is now gone */ list_for_each_entry(class_intf, &sp->interfaces, node) if (class_intf->remove_dev) class_intf->remove_dev(dev); /* remove the device from the class list */ klist_del(&dev->p->knode_class); mutex_unlock(&sp->mutex); subsys_put(sp); } device_remove_file(dev, &dev_attr_uevent); device_remove_attrs(dev); bus_remove_device(dev); device_pm_remove(dev); driver_deferred_probe_del(dev); device_platform_notify_remove(dev); device_links_purge(dev); /* * If a device does not have a driver attached, we need to clean * up any managed resources. We do this in device_release(), but * it's never called (and we leak the device) if a managed * resource holds a reference to the device. So release all * managed resources here, like we do in driver_detach(). We * still need to do so again in device_release() in case someone * adds a new resource after this point, though. */ devres_release_all(dev); bus_notify(dev, BUS_NOTIFY_REMOVED_DEVICE); kobject_uevent(&dev->kobj, KOBJ_REMOVE); glue_dir = get_glue_dir(dev); kobject_del(&dev->kobj); cleanup_glue_dir(dev, glue_dir); memalloc_noio_restore(noio_flag); put_device(parent); } EXPORT_SYMBOL_GPL(device_del); /** * device_unregister - unregister device from system. * @dev: device going away. * * We do this in two parts, like we do device_register(). First, * we remove it from all the subsystems with device_del(), then * we decrement the reference count via put_device(). If that * is the final reference count, the device will be cleaned up * via device_release() above. Otherwise, the structure will * stick around until the final reference to the device is dropped. */ void device_unregister(struct device *dev) { pr_debug("device: '%s': %s\n", dev_name(dev), __func__); device_del(dev); put_device(dev); } EXPORT_SYMBOL_GPL(device_unregister); static struct device *prev_device(struct klist_iter *i) { struct klist_node *n = klist_prev(i); struct device *dev = NULL; struct device_private *p; if (n) { p = to_device_private_parent(n); dev = p->device; } return dev; } static struct device *next_device(struct klist_iter *i) { struct klist_node *n = klist_next(i); struct device *dev = NULL; struct device_private *p; if (n) { p = to_device_private_parent(n); dev = p->device; } return dev; } /** * device_get_devnode - path of device node file * @dev: device * @mode: returned file access mode * @uid: returned file owner * @gid: returned file group * @tmp: possibly allocated string * * Return the relative path of a possible device node. * Non-default names may need to allocate a memory to compose * a name. This memory is returned in tmp and needs to be * freed by the caller. */ const char *device_get_devnode(const struct device *dev, umode_t *mode, kuid_t *uid, kgid_t *gid, const char **tmp) { char *s; *tmp = NULL; /* the device type may provide a specific name */ if (dev->type && dev->type->devnode) *tmp = dev->type->devnode(dev, mode, uid, gid); if (*tmp) return *tmp; /* the class may provide a specific name */ if (dev->class && dev->class->devnode) *tmp = dev->class->devnode(dev, mode); if (*tmp) return *tmp; /* return name without allocation, tmp == NULL */ if (strchr(dev_name(dev), '!') == NULL) return dev_name(dev); /* replace '!' in the name with '/' */ s = kstrdup_and_replace(dev_name(dev), '!', '/', GFP_KERNEL); if (!s) return NULL; return *tmp = s; } /** * device_for_each_child - device child iterator. * @parent: parent struct device. * @fn: function to be called for each device. * @data: data for the callback. * * Iterate over @parent's child devices, and call @fn for each, * passing it @data. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. */ int device_for_each_child(struct device *parent, void *data, int (*fn)(struct device *dev, void *data)) { struct klist_iter i; struct device *child; int error = 0; if (!parent || !parent->p) return 0; klist_iter_init(&parent->p->klist_children, &i); while (!error && (child = next_device(&i))) error = fn(child, data); klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(device_for_each_child); /** * device_for_each_child_reverse - device child iterator in reversed order. * @parent: parent struct device. * @fn: function to be called for each device. * @data: data for the callback. * * Iterate over @parent's child devices, and call @fn for each, * passing it @data. * * We check the return of @fn each time. If it returns anything * other than 0, we break out and return that value. */ int device_for_each_child_reverse(struct device *parent, void *data, int (*fn)(struct device *dev, void *data)) { struct klist_iter i; struct device *child; int error = 0; if (!parent || !parent->p) return 0; klist_iter_init(&parent->p->klist_children, &i); while ((child = prev_device(&i)) && !error) error = fn(child, data); klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(device_for_each_child_reverse); /** * device_for_each_child_reverse_from - device child iterator in reversed order. * @parent: parent struct device. * @from: optional starting point in child list * @fn: function to be called for each device. * @data: data for the callback. * * Iterate over @parent's child devices, starting at @from, and call @fn * for each, passing it @data. This helper is identical to * device_for_each_child_reverse() when @from is NULL. * * @fn is checked each iteration. If it returns anything other than 0, * iteration stop and that value is returned to the caller of * device_for_each_child_reverse_from(); */ int device_for_each_child_reverse_from(struct device *parent, struct device *from, const void *data, int (*fn)(struct device *, const void *)) { struct klist_iter i; struct device *child; int error = 0; if (!parent->p) return 0; klist_iter_init_node(&parent->p->klist_children, &i, (from ? &from->p->knode_parent : NULL)); while ((child = prev_device(&i)) && !error) error = fn(child, data); klist_iter_exit(&i); return error; } EXPORT_SYMBOL_GPL(device_for_each_child_reverse_from); /** * device_find_child - device iterator for locating a particular device. * @parent: parent struct device * @match: Callback function to check device * @data: Data to pass to match function * * This is similar to the device_for_each_child() function above, but it * returns a reference to a device that is 'found' for later use, as * determined by the @match callback. * * The callback should return 0 if the device doesn't match and non-zero * if it does. If the callback returns non-zero and a reference to the * current device can be obtained, this function will return to the caller * and not iterate over any more devices. * * NOTE: you will need to drop the reference with put_device() after use. */ struct device *device_find_child(struct device *parent, void *data, int (*match)(struct device *dev, void *data)) { struct klist_iter i; struct device *child; if (!parent || !parent->p) return NULL; klist_iter_init(&parent->p->klist_children, &i); while ((child = next_device(&i))) if (match(child, data) && get_device(child)) break; klist_iter_exit(&i); return child; } EXPORT_SYMBOL_GPL(device_find_child); /** * device_find_child_by_name - device iterator for locating a child device. * @parent: parent struct device * @name: name of the child device * * This is similar to the device_find_child() function above, but it * returns a reference to a device that has the name @name. * * NOTE: you will need to drop the reference with put_device() after use. */ struct device *device_find_child_by_name(struct device *parent, const char *name) { struct klist_iter i; struct device *child; if (!parent) return NULL; klist_iter_init(&parent->p->klist_children, &i); while ((child = next_device(&i))) if (sysfs_streq(dev_name(child), name) && get_device(child)) break; klist_iter_exit(&i); return child; } EXPORT_SYMBOL_GPL(device_find_child_by_name); static int match_any(struct device *dev, void *unused) { return 1; } /** * device_find_any_child - device iterator for locating a child device, if any. * @parent: parent struct device * * This is similar to the device_find_child() function above, but it * returns a reference to a child device, if any. * * NOTE: you will need to drop the reference with put_device() after use. */ struct device *device_find_any_child(struct device *parent) { return device_find_child(parent, NULL, match_any); } EXPORT_SYMBOL_GPL(device_find_any_child); int __init devices_init(void) { devices_kset = kset_create_and_add("devices", &device_uevent_ops, NULL); if (!devices_kset) return -ENOMEM; dev_kobj = kobject_create_and_add("dev", NULL); if (!dev_kobj) goto dev_kobj_err; sysfs_dev_block_kobj = kobject_create_and_add("block", dev_kobj); if (!sysfs_dev_block_kobj) goto block_kobj_err; sysfs_dev_char_kobj = kobject_create_and_add("char", dev_kobj); if (!sysfs_dev_char_kobj) goto char_kobj_err; device_link_wq = alloc_workqueue("device_link_wq", 0, 0); if (!device_link_wq) goto wq_err; return 0; wq_err: kobject_put(sysfs_dev_char_kobj); char_kobj_err: kobject_put(sysfs_dev_block_kobj); block_kobj_err: kobject_put(dev_kobj); dev_kobj_err: kset_unregister(devices_kset); return -ENOMEM; } static int device_check_offline(struct device *dev, void *not_used) { int ret; ret = device_for_each_child(dev, NULL, device_check_offline); if (ret) return ret; return device_supports_offline(dev) && !dev->offline ? -EBUSY : 0; } /** * device_offline - Prepare the device for hot-removal. * @dev: Device to be put offline. * * Execute the device bus type's .offline() callback, if present, to prepare * the device for a subsequent hot-removal. If that succeeds, the device must * not be used until either it is removed or its bus type's .online() callback * is executed. * * Call under device_hotplug_lock. */ int device_offline(struct device *dev) { int ret; if (dev->offline_disabled) return -EPERM; ret = device_for_each_child(dev, NULL, device_check_offline); if (ret) return ret; device_lock(dev); if (device_supports_offline(dev)) { if (dev->offline) { ret = 1; } else { ret = dev->bus->offline(dev); if (!ret) { kobject_uevent(&dev->kobj, KOBJ_OFFLINE); dev->offline = true; } } } device_unlock(dev); return ret; } /** * device_online - Put the device back online after successful device_offline(). * @dev: Device to be put back online. * * If device_offline() has been successfully executed for @dev, but the device * has not been removed subsequently, execute its bus type's .online() callback * to indicate that the device can be used again. * * Call under device_hotplug_lock. */ int device_online(struct device *dev) { int ret = 0; device_lock(dev); if (device_supports_offline(dev)) { if (dev->offline) { ret = dev->bus->online(dev); if (!ret) { kobject_uevent(&dev->kobj, KOBJ_ONLINE); dev->offline = false; } } else { ret = 1; } } device_unlock(dev); return ret; } struct root_device { struct device dev; struct module *owner; }; static inline struct root_device *to_root_device(struct device *d) { return container_of(d, struct root_device, dev); } static void root_device_release(struct device *dev) { kfree(to_root_device(dev)); } /** * __root_device_register - allocate and register a root device * @name: root device name * @owner: owner module of the root device, usually THIS_MODULE * * This function allocates a root device and registers it * using device_register(). In order to free the returned * device, use root_device_unregister(). * * Root devices are dummy devices which allow other devices * to be grouped under /sys/devices. Use this function to * allocate a root device and then use it as the parent of * any device which should appear under /sys/devices/{name} * * The /sys/devices/{name} directory will also contain a * 'module' symlink which points to the @owner directory * in sysfs. * * Returns &struct device pointer on success, or ERR_PTR() on error. * * Note: You probably want to use root_device_register(). */ struct device *__root_device_register(const char *name, struct module *owner) { struct root_device *root; int err = -ENOMEM; root = kzalloc(sizeof(struct root_device), GFP_KERNEL); if (!root) return ERR_PTR(err); err = dev_set_name(&root->dev, "%s", name); if (err) { kfree(root); return ERR_PTR(err); } root->dev.release = root_device_release; err = device_register(&root->dev); if (err) { put_device(&root->dev); return ERR_PTR(err); } #ifdef CONFIG_MODULES /* gotta find a "cleaner" way to do this */ if (owner) { struct module_kobject *mk = &owner->mkobj; err = sysfs_create_link(&root->dev.kobj, &mk->kobj, "module"); if (err) { device_unregister(&root->dev); return ERR_PTR(err); } root->owner = owner; } #endif return &root->dev; } EXPORT_SYMBOL_GPL(__root_device_register); /** * root_device_unregister - unregister and free a root device * @dev: device going away * * This function unregisters and cleans up a device that was created by * root_device_register(). */ void root_device_unregister(struct device *dev) { struct root_device *root = to_root_device(dev); if (root->owner) sysfs_remove_link(&root->dev.kobj, "module"); device_unregister(dev); } EXPORT_SYMBOL_GPL(root_device_unregister); static void device_create_release(struct device *dev) { pr_debug("device: '%s': %s\n", dev_name(dev), __func__); kfree(dev); } static __printf(6, 0) struct device * device_create_groups_vargs(const struct class *class, struct device *parent, dev_t devt, void *drvdata, const struct attribute_group **groups, const char *fmt, va_list args) { struct device *dev = NULL; int retval = -ENODEV; if (IS_ERR_OR_NULL(class)) goto error; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) { retval = -ENOMEM; goto error; } device_initialize(dev); dev->devt = devt; dev->class = class; dev->parent = parent; dev->groups = groups; dev->release = device_create_release; dev_set_drvdata(dev, drvdata); retval = kobject_set_name_vargs(&dev->kobj, fmt, args); if (retval) goto error; retval = device_add(dev); if (retval) goto error; return dev; error: put_device(dev); return ERR_PTR(retval); } /** * device_create - creates a device and registers it with sysfs * @class: pointer to the struct class that this device should be registered to * @parent: pointer to the parent struct device of this new device, if any * @devt: the dev_t for the char device to be added * @drvdata: the data to be added to the device for callbacks * @fmt: string for the device's name * * This function can be used by char device classes. A struct device * will be created in sysfs, registered to the specified class. * * A "dev" file will be created, showing the dev_t for the device, if * the dev_t is not 0,0. * If a pointer to a parent struct device is passed in, the newly created * struct device will be a child of that device in sysfs. * The pointer to the struct device will be returned from the call. * Any further sysfs files that might be required can be created using this * pointer. * * Returns &struct device pointer on success, or ERR_PTR() on error. */ struct device *device_create(const struct class *class, struct device *parent, dev_t devt, void *drvdata, const char *fmt, ...) { va_list vargs; struct device *dev; va_start(vargs, fmt); dev = device_create_groups_vargs(class, parent, devt, drvdata, NULL, fmt, vargs); va_end(vargs); return dev; } EXPORT_SYMBOL_GPL(device_create); /** * device_create_with_groups - creates a device and registers it with sysfs * @class: pointer to the struct class that this device should be registered to * @parent: pointer to the parent struct device of this new device, if any * @devt: the dev_t for the char device to be added * @drvdata: the data to be added to the device for callbacks * @groups: NULL-terminated list of attribute groups to be created * @fmt: string for the device's name * * This function can be used by char device classes. A struct device * will be created in sysfs, registered to the specified class. * Additional attributes specified in the groups parameter will also * be created automatically. * * A "dev" file will be created, showing the dev_t for the device, if * the dev_t is not 0,0. * If a pointer to a parent struct device is passed in, the newly created * struct device will be a child of that device in sysfs. * The pointer to the struct device will be returned from the call. * Any further sysfs files that might be required can be created using this * pointer. * * Returns &struct device pointer on success, or ERR_PTR() on error. */ struct device *device_create_with_groups(const struct class *class, struct device *parent, dev_t devt, void *drvdata, const struct attribute_group **groups, const char *fmt, ...) { va_list vargs; struct device *dev; va_start(vargs, fmt); dev = device_create_groups_vargs(class, parent, devt, drvdata, groups, fmt, vargs); va_end(vargs); return dev; } EXPORT_SYMBOL_GPL(device_create_with_groups); /** * device_destroy - removes a device that was created with device_create() * @class: pointer to the struct class that this device was registered with * @devt: the dev_t of the device that was previously registered * * This call unregisters and cleans up a device that was created with a * call to device_create(). */ void device_destroy(const struct class *class, dev_t devt) { struct device *dev; dev = class_find_device_by_devt(class, devt); if (dev) { put_device(dev); device_unregister(dev); } } EXPORT_SYMBOL_GPL(device_destroy); /** * device_rename - renames a device * @dev: the pointer to the struct device to be renamed * @new_name: the new name of the device * * It is the responsibility of the caller to provide mutual * exclusion between two different calls of device_rename * on the same device to ensure that new_name is valid and * won't conflict with other devices. * * Note: given that some subsystems (networking and infiniband) use this * function, with no immediate plans for this to change, we cannot assume or * require that this function not be called at all. * * However, if you're writing new code, do not call this function. The following * text from Kay Sievers offers some insight: * * Renaming devices is racy at many levels, symlinks and other stuff are not * replaced atomically, and you get a "move" uevent, but it's not easy to * connect the event to the old and new device. Device nodes are not renamed at * all, there isn't even support for that in the kernel now. * * In the meantime, during renaming, your target name might be taken by another * driver, creating conflicts. Or the old name is taken directly after you * renamed it -- then you get events for the same DEVPATH, before you even see * the "move" event. It's just a mess, and nothing new should ever rely on * kernel device renaming. Besides that, it's not even implemented now for * other things than (driver-core wise very simple) network devices. * * Make up a "real" name in the driver before you register anything, or add * some other attributes for userspace to find the device, or use udev to add * symlinks -- but never rename kernel devices later, it's a complete mess. We * don't even want to get into that and try to implement the missing pieces in * the core. We really have other pieces to fix in the driver core mess. :) */ int device_rename(struct device *dev, const char *new_name) { struct subsys_private *sp = NULL; struct kobject *kobj = &dev->kobj; char *old_device_name = NULL; int error; bool is_link_renamed = false; dev = get_device(dev); if (!dev) return -EINVAL; dev_dbg(dev, "renaming to %s\n", new_name); old_device_name = kstrdup(dev_name(dev), GFP_KERNEL); if (!old_device_name) { error = -ENOMEM; goto out; } if (dev->class) { sp = class_to_subsys(dev->class); if (!sp) { error = -EINVAL; goto out; } error = sysfs_rename_link_ns(&sp->subsys.kobj, kobj, old_device_name, new_name, kobject_namespace(kobj)); if (error) goto out; is_link_renamed = true; } error = kobject_rename(kobj, new_name); out: if (error && is_link_renamed) sysfs_rename_link_ns(&sp->subsys.kobj, kobj, new_name, old_device_name, kobject_namespace(kobj)); subsys_put(sp); put_device(dev); kfree(old_device_name); return error; } EXPORT_SYMBOL_GPL(device_rename); static int device_move_class_links(struct device *dev, struct device *old_parent, struct device *new_parent) { int error = 0; if (old_parent) sysfs_remove_link(&dev->kobj, "device"); if (new_parent) error = sysfs_create_link(&dev->kobj, &new_parent->kobj, "device"); return error; } /** * device_move - moves a device to a new parent * @dev: the pointer to the struct device to be moved * @new_parent: the new parent of the device (can be NULL) * @dpm_order: how to reorder the dpm_list */ int device_move(struct device *dev, struct device *new_parent, enum dpm_order dpm_order) { int error; struct device *old_parent; struct kobject *new_parent_kobj; dev = get_device(dev); if (!dev) return -EINVAL; device_pm_lock(); new_parent = get_device(new_parent); new_parent_kobj = get_device_parent(dev, new_parent); if (IS_ERR(new_parent_kobj)) { error = PTR_ERR(new_parent_kobj); put_device(new_parent); goto out; } pr_debug("device: '%s': %s: moving to '%s'\n", dev_name(dev), __func__, new_parent ? dev_name(new_parent) : "<NULL>"); error = kobject_move(&dev->kobj, new_parent_kobj); if (error) { cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } old_parent = dev->parent; dev->parent = new_parent; if (old_parent) klist_remove(&dev->p->knode_parent); if (new_parent) { klist_add_tail(&dev->p->knode_parent, &new_parent->p->klist_children); set_dev_node(dev, dev_to_node(new_parent)); } if (dev->class) { error = device_move_class_links(dev, old_parent, new_parent); if (error) { /* We ignore errors on cleanup since we're hosed anyway... */ device_move_class_links(dev, new_parent, old_parent); if (!kobject_move(&dev->kobj, &old_parent->kobj)) { if (new_parent) klist_remove(&dev->p->knode_parent); dev->parent = old_parent; if (old_parent) { klist_add_tail(&dev->p->knode_parent, &old_parent->p->klist_children); set_dev_node(dev, dev_to_node(old_parent)); } } cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } } switch (dpm_order) { case DPM_ORDER_NONE: break; case DPM_ORDER_DEV_AFTER_PARENT: device_pm_move_after(dev, new_parent); devices_kset_move_after(dev, new_parent); break; case DPM_ORDER_PARENT_BEFORE_DEV: device_pm_move_before(new_parent, dev); devices_kset_move_before(new_parent, dev); break; case DPM_ORDER_DEV_LAST: device_pm_move_last(dev); devices_kset_move_last(dev); break; } put_device(old_parent); out: device_pm_unlock(); put_device(dev); return error; } EXPORT_SYMBOL_GPL(device_move); static int device_attrs_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { struct kobject *kobj = &dev->kobj; const struct class *class = dev->class; const struct device_type *type = dev->type; int error; if (class) { /* * Change the device groups of the device class for @dev to * @kuid/@kgid. */ error = sysfs_groups_change_owner(kobj, class->dev_groups, kuid, kgid); if (error) return error; } if (type) { /* * Change the device groups of the device type for @dev to * @kuid/@kgid. */ error = sysfs_groups_change_owner(kobj, type->groups, kuid, kgid); if (error) return error; } /* Change the device groups of @dev to @kuid/@kgid. */ error = sysfs_groups_change_owner(kobj, dev->groups, kuid, kgid); if (error) return error; if (device_supports_offline(dev) && !dev->offline_disabled) { /* Change online device attributes of @dev to @kuid/@kgid. */ error = sysfs_file_change_owner(kobj, dev_attr_online.attr.name, kuid, kgid); if (error) return error; } return 0; } /** * device_change_owner - change the owner of an existing device. * @dev: device. * @kuid: new owner's kuid * @kgid: new owner's kgid * * This changes the owner of @dev and its corresponding sysfs entries to * @kuid/@kgid. This function closely mirrors how @dev was added via driver * core. * * Returns 0 on success or error code on failure. */ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid) { int error; struct kobject *kobj = &dev->kobj; struct subsys_private *sp; dev = get_device(dev); if (!dev) return -EINVAL; /* * Change the kobject and the default attributes and groups of the * ktype associated with it to @kuid/@kgid. */ error = sysfs_change_owner(kobj, kuid, kgid); if (error) goto out; /* * Change the uevent file for @dev to the new owner. The uevent file * was created in a separate step when @dev got added and we mirror * that step here. */ error = sysfs_file_change_owner(kobj, dev_attr_uevent.attr.name, kuid, kgid); if (error) goto out; /* * Change the device groups, the device groups associated with the * device class, and the groups associated with the device type of @dev * to @kuid/@kgid. */ error = device_attrs_change_owner(dev, kuid, kgid); if (error) goto out; error = dpm_sysfs_change_owner(dev, kuid, kgid); if (error) goto out; /* * Change the owner of the symlink located in the class directory of * the device class associated with @dev which points to the actual * directory entry for @dev to @kuid/@kgid. This ensures that the * symlink shows the same permissions as its target. */ sp = class_to_subsys(dev->class); if (!sp) { error = -EINVAL; goto out; } error = sysfs_link_change_owner(&sp->subsys.kobj, &dev->kobj, dev_name(dev), kuid, kgid); subsys_put(sp); out: put_device(dev); return error; } EXPORT_SYMBOL_GPL(device_change_owner); /** * device_shutdown - call ->shutdown() on each device to shutdown. */ void device_shutdown(void) { struct device *dev, *parent; wait_for_device_probe(); device_block_probing(); cpufreq_suspend(); spin_lock(&devices_kset->list_lock); /* * Walk the devices list backward, shutting down each in turn. * Beware that device unplug events may also start pulling * devices offline, even as the system is shutting down. */ while (!list_empty(&devices_kset->list)) { dev = list_entry(devices_kset->list.prev, struct device, kobj.entry); /* * hold reference count of device's parent to * prevent it from being freed because parent's * lock is to be held */ parent = get_device(dev->parent); get_device(dev); /* * Make sure the device is off the kset list, in the * event that dev->*->shutdown() doesn't remove it. */ list_del_init(&dev->kobj.entry); spin_unlock(&devices_kset->list_lock); /* hold lock to avoid race with probe/release */ if (parent) device_lock(parent); device_lock(dev); /* Don't allow any more runtime suspends */ pm_runtime_get_noresume(dev); pm_runtime_barrier(dev); if (dev->class && dev->class->shutdown_pre) { if (initcall_debug) dev_info(dev, "shutdown_pre\n"); dev->class->shutdown_pre(dev); } if (dev->bus && dev->bus->shutdown) { if (initcall_debug) dev_info(dev, "shutdown\n"); dev->bus->shutdown(dev); } else if (dev->driver && dev->driver->shutdown) { if (initcall_debug) dev_info(dev, "shutdown\n"); dev->driver->shutdown(dev); } device_unlock(dev); if (parent) device_unlock(parent); put_device(dev); put_device(parent); spin_lock(&devices_kset->list_lock); } spin_unlock(&devices_kset->list_lock); } /* * Device logging functions */ #ifdef CONFIG_PRINTK static void set_dev_info(const struct device *dev, struct dev_printk_info *dev_info) { const char *subsys; memset(dev_info, 0, sizeof(*dev_info)); if (dev->class) subsys = dev->class->name; else if (dev->bus) subsys = dev->bus->name; else return; strscpy(dev_info->subsystem, subsys); /* * Add device identifier DEVICE=: * b12:8 block dev_t * c127:3 char dev_t * n8 netdev ifindex * +sound:card0 subsystem:devname */ if (MAJOR(dev->devt)) { char c; if (strcmp(subsys, "block") == 0) c = 'b'; else c = 'c'; snprintf(dev_info->device, sizeof(dev_info->device), "%c%u:%u", c, MAJOR(dev->devt), MINOR(dev->devt)); } else if (strcmp(subsys, "net") == 0) { struct net_device *net = to_net_dev(dev); snprintf(dev_info->device, sizeof(dev_info->device), "n%u", net->ifindex); } else { snprintf(dev_info->device, sizeof(dev_info->device), "+%s:%s", subsys, dev_name(dev)); } } int dev_vprintk_emit(int level, const struct device *dev, const char *fmt, va_list args) { struct dev_printk_info dev_info; set_dev_info(dev, &dev_info); return vprintk_emit(0, level, &dev_info, fmt, args); } EXPORT_SYMBOL(dev_vprintk_emit); int dev_printk_emit(int level, const struct device *dev, const char *fmt, ...) { va_list args; int r; va_start(args, fmt); r = dev_vprintk_emit(level, dev, fmt, args); va_end(args); return r; } EXPORT_SYMBOL(dev_printk_emit); static void __dev_printk(const char *level, const struct device *dev, struct va_format *vaf) { if (dev) dev_printk_emit(level[1] - '0', dev, "%s %s: %pV", dev_driver_string(dev), dev_name(dev), vaf); else printk("%s(NULL device *): %pV", level, vaf); } void _dev_printk(const char *level, const struct device *dev, const char *fmt, ...) { struct va_format vaf; va_list args; va_start(args, fmt); vaf.fmt = fmt; vaf.va = &args; __dev_printk(level, dev, &vaf); va_end(args); } EXPORT_SYMBOL(_dev_printk); #define define_dev_printk_level(func, kern_level) \ void func(const struct device *dev, const char *fmt, ...) \ { \ struct va_format vaf; \ va_list args; \ \ va_start(args, fmt); \ \ vaf.fmt = fmt; \ vaf.va = &args; \ \ __dev_printk(kern_level, dev, &vaf); \ \ va_end(args); \ } \ EXPORT_SYMBOL(func); define_dev_printk_level(_dev_emerg, KERN_EMERG); define_dev_printk_level(_dev_alert, KERN_ALERT); define_dev_printk_level(_dev_crit, KERN_CRIT); define_dev_printk_level(_dev_err, KERN_ERR); define_dev_printk_level(_dev_warn, KERN_WARNING); define_dev_printk_level(_dev_notice, KERN_NOTICE); define_dev_printk_level(_dev_info, KERN_INFO); #endif static void __dev_probe_failed(const struct device *dev, int err, bool fatal, const char *fmt, va_list vargsp) { struct va_format vaf; va_list vargs; /* * On x86_64 and possibly on other architectures, va_list is actually a * size-1 array containing a structure. As a result, function parameter * vargsp decays from T[1] to T*, and &vargsp has type T** rather than * T(*)[1], which is expected by its assignment to vaf.va below. * * One standard way to solve this mess is by creating a copy in a local * variable of type va_list and then using a pointer to that local copy * instead, which is the approach employed here. */ va_copy(vargs, vargsp); vaf.fmt = fmt; vaf.va = &vargs; switch (err) { case -EPROBE_DEFER: device_set_deferred_probe_reason(dev, &vaf); dev_dbg(dev, "error %pe: %pV", ERR_PTR(err), &vaf); break; case -ENOMEM: /* Don't print anything on -ENOMEM, there's already enough output */ break; default: /* Log fatal final failures as errors, otherwise produce warnings */ if (fatal) dev_err(dev, "error %pe: %pV", ERR_PTR(err), &vaf); else dev_warn(dev, "error %pe: %pV", ERR_PTR(err), &vaf); break; } va_end(vargs); } /** * dev_err_probe - probe error check and log helper * @dev: the pointer to the struct device * @err: error value to test * @fmt: printf-style format string * @...: arguments as specified in the format string * * This helper implements common pattern present in probe functions for error * checking: print debug or error message depending if the error value is * -EPROBE_DEFER and propagate error upwards. * In case of -EPROBE_DEFER it sets also defer probe reason, which can be * checked later by reading devices_deferred debugfs attribute. * It replaces the following code sequence:: * * if (err != -EPROBE_DEFER) * dev_err(dev, ...); * else * dev_dbg(dev, ...); * return err; * * with:: * * return dev_err_probe(dev, err, ...); * * Using this helper in your probe function is totally fine even if @err * is known to never be -EPROBE_DEFER. * The benefit compared to a normal dev_err() is the standardized format * of the error code, which is emitted symbolically (i.e. you get "EAGAIN" * instead of "-35"), and having the error code returned allows more * compact error paths. * * Returns @err. */ int dev_err_probe(const struct device *dev, int err, const char *fmt, ...) { va_list vargs; va_start(vargs, fmt); /* Use dev_err() for logging when err doesn't equal -EPROBE_DEFER */ __dev_probe_failed(dev, err, true, fmt, vargs); va_end(vargs); return err; } EXPORT_SYMBOL_GPL(dev_err_probe); /** * dev_warn_probe - probe error check and log helper * @dev: the pointer to the struct device * @err: error value to test * @fmt: printf-style format string * @...: arguments as specified in the format string * * This helper implements common pattern present in probe functions for error * checking: print debug or warning message depending if the error value is * -EPROBE_DEFER and propagate error upwards. * In case of -EPROBE_DEFER it sets also defer probe reason, which can be * checked later by reading devices_deferred debugfs attribute. * It replaces the following code sequence:: * * if (err != -EPROBE_DEFER) * dev_warn(dev, ...); * else * dev_dbg(dev, ...); * return err; * * with:: * * return dev_warn_probe(dev, err, ...); * * Using this helper in your probe function is totally fine even if @err * is known to never be -EPROBE_DEFER. * The benefit compared to a normal dev_warn() is the standardized format * of the error code, which is emitted symbolically (i.e. you get "EAGAIN" * instead of "-35"), and having the error code returned allows more * compact error paths. * * Returns @err. */ int dev_warn_probe(const struct device *dev, int err, const char *fmt, ...) { va_list vargs; va_start(vargs, fmt); /* Use dev_warn() for logging when err doesn't equal -EPROBE_DEFER */ __dev_probe_failed(dev, err, false, fmt, vargs); va_end(vargs); return err; } EXPORT_SYMBOL_GPL(dev_warn_probe); static inline bool fwnode_is_primary(struct fwnode_handle *fwnode) { return fwnode && !IS_ERR(fwnode->secondary); } /** * set_primary_fwnode - Change the primary firmware node of a given device. * @dev: Device to handle. * @fwnode: New primary firmware node of the device. * * Set the device's firmware node pointer to @fwnode, but if a secondary * firmware node of the device is present, preserve it. * * Valid fwnode cases are: * - primary --> secondary --> -ENODEV * - primary --> NULL * - secondary --> -ENODEV * - NULL */ void set_primary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { struct device *parent = dev->parent; struct fwnode_handle *fn = dev->fwnode; if (fwnode) { if (fwnode_is_primary(fn)) fn = fn->secondary; if (fn) { WARN_ON(fwnode->secondary); fwnode->secondary = fn; } dev->fwnode = fwnode; } else { if (fwnode_is_primary(fn)) { dev->fwnode = fn->secondary; /* Skip nullifying fn->secondary if the primary is shared */ if (parent && fn == parent->fwnode) return; /* Set fn->secondary = NULL, so fn remains the primary fwnode */ fn->secondary = NULL; } else { dev->fwnode = NULL; } } } EXPORT_SYMBOL_GPL(set_primary_fwnode); /** * set_secondary_fwnode - Change the secondary firmware node of a given device. * @dev: Device to handle. * @fwnode: New secondary firmware node of the device. * * If a primary firmware node of the device is present, set its secondary * pointer to @fwnode. Otherwise, set the device's firmware node pointer to * @fwnode. */ void set_secondary_fwnode(struct device *dev, struct fwnode_handle *fwnode) { if (fwnode) fwnode->secondary = ERR_PTR(-ENODEV); if (fwnode_is_primary(dev->fwnode)) dev->fwnode->secondary = fwnode; else dev->fwnode = fwnode; } EXPORT_SYMBOL_GPL(set_secondary_fwnode); /** * device_set_of_node_from_dev - reuse device-tree node of another device * @dev: device whose device-tree node is being set * @dev2: device whose device-tree node is being reused * * Takes another reference to the new device-tree node after first dropping * any reference held to the old node. */ void device_set_of_node_from_dev(struct device *dev, const struct device *dev2) { of_node_put(dev->of_node); dev->of_node = of_node_get(dev2->of_node); dev->of_node_reused = true; } EXPORT_SYMBOL_GPL(device_set_of_node_from_dev); void device_set_node(struct device *dev, struct fwnode_handle *fwnode) { dev->fwnode = fwnode; dev->of_node = to_of_node(fwnode); } EXPORT_SYMBOL_GPL(device_set_node); int device_match_name(struct device *dev, const void *name) { return sysfs_streq(dev_name(dev), name); } EXPORT_SYMBOL_GPL(device_match_name); int device_match_of_node(struct device *dev, const void *np) { return dev->of_node == np; } EXPORT_SYMBOL_GPL(device_match_of_node); int device_match_fwnode(struct device *dev, const void *fwnode) { return dev_fwnode(dev) == fwnode; } EXPORT_SYMBOL_GPL(device_match_fwnode); int device_match_devt(struct device *dev, const void *pdevt) { return dev->devt == *(dev_t *)pdevt; } EXPORT_SYMBOL_GPL(device_match_devt); int device_match_acpi_dev(struct device *dev, const void *adev) { return ACPI_COMPANION(dev) == adev; } EXPORT_SYMBOL(device_match_acpi_dev); int device_match_acpi_handle(struct device *dev, const void *handle) { return ACPI_HANDLE(dev) == handle; } EXPORT_SYMBOL(device_match_acpi_handle); int device_match_any(struct device *dev, const void *unused) { return 1; } EXPORT_SYMBOL_GPL(device_match_any); |
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021, Linaro Ltd <loic.poulain@linaro.org> */ #include <linux/bitmap.h> #include <linux/err.h> #include <linux/errno.h> #include <linux/debugfs.h> #include <linux/fs.h> #include <linux/init.h> #include <linux/idr.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/poll.h> #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/types.h> #include <linux/uaccess.h> #include <linux/termios.h> #include <linux/wwan.h> #include <net/rtnetlink.h> #include <uapi/linux/wwan.h> /* Maximum number of minors in use */ #define WWAN_MAX_MINORS (1 << MINORBITS) static DEFINE_MUTEX(wwan_register_lock); /* WWAN device create|remove lock */ static DEFINE_IDA(minors); /* minors for WWAN port chardevs */ static DEFINE_IDA(wwan_dev_ids); /* for unique WWAN device IDs */ static const struct class wwan_class = { .name = "wwan", }; static int wwan_major; static struct dentry *wwan_debugfs_dir; #define to_wwan_dev(d) container_of(d, struct wwan_device, dev) #define to_wwan_port(d) container_of(d, struct wwan_port, dev) /* WWAN port flags */ #define WWAN_PORT_TX_OFF 0 /** * struct wwan_device - The structure that defines a WWAN device * * @id: WWAN device unique ID. * @dev: Underlying device. * @port_id: Current available port ID to pick. * @ops: wwan device ops * @ops_ctxt: context to pass to ops * @debugfs_dir: WWAN device debugfs dir */ struct wwan_device { unsigned int id; struct device dev; atomic_t port_id; const struct wwan_ops *ops; void *ops_ctxt; #ifdef CONFIG_WWAN_DEBUGFS struct dentry *debugfs_dir; #endif }; /** * struct wwan_port - The structure that defines a WWAN port * @type: Port type * @start_count: Port start counter * @flags: Store port state and capabilities * @ops: Pointer to WWAN port operations * @ops_lock: Protect port ops * @dev: Underlying device * @rxq: Buffer inbound queue * @waitqueue: The waitqueue for port fops (read/write/poll) * @data_lock: Port specific data access serialization * @headroom_len: SKB reserved headroom size * @frag_len: Length to fragment packet * @at_data: AT port specific data */ struct wwan_port { enum wwan_port_type type; unsigned int start_count; unsigned long flags; const struct wwan_port_ops *ops; struct mutex ops_lock; /* Serialize ops + protect against removal */ struct device dev; struct sk_buff_head rxq; wait_queue_head_t waitqueue; struct mutex data_lock; /* Port specific data access serialization */ size_t headroom_len; size_t frag_len; union { struct { struct ktermios termios; int mdmbits; } at_data; }; }; static ssize_t index_show(struct device *dev, struct device_attribute *attr, char *buf) { struct wwan_device *wwan = to_wwan_dev(dev); return sprintf(buf, "%d\n", wwan->id); } static DEVICE_ATTR_RO(index); static struct attribute *wwan_dev_attrs[] = { &dev_attr_index.attr, NULL, }; ATTRIBUTE_GROUPS(wwan_dev); static void wwan_dev_destroy(struct device *dev) { struct wwan_device *wwandev = to_wwan_dev(dev); ida_free(&wwan_dev_ids, wwandev->id); kfree(wwandev); } static const struct device_type wwan_dev_type = { .name = "wwan_dev", .release = wwan_dev_destroy, .groups = wwan_dev_groups, }; static int wwan_dev_parent_match(struct device *dev, const void *parent) { return (dev->type == &wwan_dev_type && (dev->parent == parent || dev == parent)); } static struct wwan_device *wwan_dev_get_by_parent(struct device *parent) { struct device *dev; dev = class_find_device(&wwan_class, NULL, parent, wwan_dev_parent_match); if (!dev) return ERR_PTR(-ENODEV); return to_wwan_dev(dev); } static int wwan_dev_name_match(struct device *dev, const void *name) { return dev->type == &wwan_dev_type && strcmp(dev_name(dev), name) == 0; } static struct wwan_device *wwan_dev_get_by_name(const char *name) { struct device *dev; dev = class_find_device(&wwan_class, NULL, name, wwan_dev_name_match); if (!dev) return ERR_PTR(-ENODEV); return to_wwan_dev(dev); } #ifdef CONFIG_WWAN_DEBUGFS struct dentry *wwan_get_debugfs_dir(struct device *parent) { struct wwan_device *wwandev; wwandev = wwan_dev_get_by_parent(parent); if (IS_ERR(wwandev)) return ERR_CAST(wwandev); return wwandev->debugfs_dir; } EXPORT_SYMBOL_GPL(wwan_get_debugfs_dir); static int wwan_dev_debugfs_match(struct device *dev, const void *dir) { struct wwan_device *wwandev; if (dev->type != &wwan_dev_type) return 0; wwandev = to_wwan_dev(dev); return wwandev->debugfs_dir == dir; } static struct wwan_device *wwan_dev_get_by_debugfs(struct dentry *dir) { struct device *dev; dev = class_find_device(&wwan_class, NULL, dir, wwan_dev_debugfs_match); if (!dev) return ERR_PTR(-ENODEV); return to_wwan_dev(dev); } void wwan_put_debugfs_dir(struct dentry *dir) { struct wwan_device *wwandev = wwan_dev_get_by_debugfs(dir); if (WARN_ON(IS_ERR(wwandev))) return; /* wwan_dev_get_by_debugfs() also got a reference */ put_device(&wwandev->dev); put_device(&wwandev->dev); } EXPORT_SYMBOL_GPL(wwan_put_debugfs_dir); #endif /* This function allocates and registers a new WWAN device OR if a WWAN device * already exist for the given parent, it gets a reference and return it. * This function is not exported (for now), it is called indirectly via * wwan_create_port(). */ static struct wwan_device *wwan_create_dev(struct device *parent) { struct wwan_device *wwandev; int err, id; /* The 'find-alloc-register' operation must be protected against * concurrent execution, a WWAN device is possibly shared between * multiple callers or concurrently unregistered from wwan_remove_dev(). */ mutex_lock(&wwan_register_lock); /* If wwandev already exists, return it */ wwandev = wwan_dev_get_by_parent(parent); if (!IS_ERR(wwandev)) goto done_unlock; id = ida_alloc(&wwan_dev_ids, GFP_KERNEL); if (id < 0) { wwandev = ERR_PTR(id); goto done_unlock; } wwandev = kzalloc(sizeof(*wwandev), GFP_KERNEL); if (!wwandev) { wwandev = ERR_PTR(-ENOMEM); ida_free(&wwan_dev_ids, id); goto done_unlock; } wwandev->dev.parent = parent; wwandev->dev.class = &wwan_class; wwandev->dev.type = &wwan_dev_type; wwandev->id = id; dev_set_name(&wwandev->dev, "wwan%d", wwandev->id); err = device_register(&wwandev->dev); if (err) { put_device(&wwandev->dev); wwandev = ERR_PTR(err); goto done_unlock; } #ifdef CONFIG_WWAN_DEBUGFS wwandev->debugfs_dir = debugfs_create_dir(kobject_name(&wwandev->dev.kobj), wwan_debugfs_dir); #endif done_unlock: mutex_unlock(&wwan_register_lock); return wwandev; } static int is_wwan_child(struct device *dev, void *data) { return dev->class == &wwan_class; } static void wwan_remove_dev(struct wwan_device *wwandev) { int ret; /* Prevent concurrent picking from wwan_create_dev */ mutex_lock(&wwan_register_lock); /* WWAN device is created and registered (get+add) along with its first * child port, and subsequent port registrations only grab a reference * (get). The WWAN device must then be unregistered (del+put) along with * its last port, and reference simply dropped (put) otherwise. In the * same fashion, we must not unregister it when the ops are still there. */ if (wwandev->ops) ret = 1; else ret = device_for_each_child(&wwandev->dev, NULL, is_wwan_child); if (!ret) { #ifdef CONFIG_WWAN_DEBUGFS debugfs_remove_recursive(wwandev->debugfs_dir); #endif device_unregister(&wwandev->dev); } else { put_device(&wwandev->dev); } mutex_unlock(&wwan_register_lock); } /* ------- WWAN port management ------- */ static const struct { const char * const name; /* Port type name */ const char * const devsuf; /* Port device name suffix */ } wwan_port_types[WWAN_PORT_MAX + 1] = { [WWAN_PORT_AT] = { .name = "AT", .devsuf = "at", }, [WWAN_PORT_MBIM] = { .name = "MBIM", .devsuf = "mbim", }, [WWAN_PORT_QMI] = { .name = "QMI", .devsuf = "qmi", }, [WWAN_PORT_QCDM] = { .name = "QCDM", .devsuf = "qcdm", }, [WWAN_PORT_FIREHOSE] = { .name = "FIREHOSE", .devsuf = "firehose", }, [WWAN_PORT_XMMRPC] = { .name = "XMMRPC", .devsuf = "xmmrpc", }, [WWAN_PORT_FASTBOOT] = { .name = "FASTBOOT", .devsuf = "fastboot", }, [WWAN_PORT_ADB] = { .name = "ADB", .devsuf = "adb", }, [WWAN_PORT_MIPC] = { .name = "MIPC", .devsuf = "mipc", }, }; static ssize_t type_show(struct device *dev, struct device_attribute *attr, char *buf) { struct wwan_port *port = to_wwan_port(dev); return sprintf(buf, "%s\n", wwan_port_types[port->type].name); } static DEVICE_ATTR_RO(type); static struct attribute *wwan_port_attrs[] = { &dev_attr_type.attr, NULL, }; ATTRIBUTE_GROUPS(wwan_port); static void wwan_port_destroy(struct device *dev) { struct wwan_port *port = to_wwan_port(dev); ida_free(&minors, MINOR(port->dev.devt)); mutex_destroy(&port->data_lock); mutex_destroy(&port->ops_lock); kfree(port); } static const struct device_type wwan_port_dev_type = { .name = "wwan_port", .release = wwan_port_destroy, .groups = wwan_port_groups, }; static int wwan_port_minor_match(struct device *dev, const void *minor) { return (dev->type == &wwan_port_dev_type && MINOR(dev->devt) == *(unsigned int *)minor); } static struct wwan_port *wwan_port_get_by_minor(unsigned int minor) { struct device *dev; dev = class_find_device(&wwan_class, NULL, &minor, wwan_port_minor_match); if (!dev) return ERR_PTR(-ENODEV); return to_wwan_port(dev); } /* Allocate and set unique name based on passed format * * Name allocation approach is highly inspired by the __dev_alloc_name() * function. * * To avoid names collision, the caller must prevent the new port device * registration as well as concurrent invocation of this function. */ static int __wwan_port_dev_assign_name(struct wwan_port *port, const char *fmt) { struct wwan_device *wwandev = to_wwan_dev(port->dev.parent); const unsigned int max_ports = PAGE_SIZE * 8; struct class_dev_iter iter; unsigned long *idmap; struct device *dev; char buf[0x20]; int id; idmap = bitmap_zalloc(max_ports, GFP_KERNEL); if (!idmap) return -ENOMEM; /* Collect ids of same name format ports */ class_dev_iter_init(&iter, &wwan_class, NULL, &wwan_port_dev_type); while ((dev = class_dev_iter_next(&iter))) { if (dev->parent != &wwandev->dev) continue; if (sscanf(dev_name(dev), fmt, &id) != 1) continue; if (id < 0 || id >= max_ports) continue; set_bit(id, idmap); } class_dev_iter_exit(&iter); /* Allocate unique id */ id = find_first_zero_bit(idmap, max_ports); bitmap_free(idmap); snprintf(buf, sizeof(buf), fmt, id); /* Name generation */ dev = device_find_child_by_name(&wwandev->dev, buf); if (dev) { put_device(dev); return -ENFILE; } return dev_set_name(&port->dev, "%s", buf); } struct wwan_port *wwan_create_port(struct device *parent, enum wwan_port_type type, const struct wwan_port_ops *ops, struct wwan_port_caps *caps, void *drvdata) { struct wwan_device *wwandev; struct wwan_port *port; char namefmt[0x20]; int minor, err; if (type > WWAN_PORT_MAX || !ops) return ERR_PTR(-EINVAL); /* A port is always a child of a WWAN device, retrieve (allocate or * pick) the WWAN device based on the provided parent device. */ wwandev = wwan_create_dev(parent); if (IS_ERR(wwandev)) return ERR_CAST(wwandev); /* A port is exposed as character device, get a minor */ minor = ida_alloc_range(&minors, 0, WWAN_MAX_MINORS - 1, GFP_KERNEL); if (minor < 0) { err = minor; goto error_wwandev_remove; } port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) { err = -ENOMEM; ida_free(&minors, minor); goto error_wwandev_remove; } port->type = type; port->ops = ops; port->frag_len = caps ? caps->frag_len : SIZE_MAX; port->headroom_len = caps ? caps->headroom_len : 0; mutex_init(&port->ops_lock); skb_queue_head_init(&port->rxq); init_waitqueue_head(&port->waitqueue); mutex_init(&port->data_lock); port->dev.parent = &wwandev->dev; port->dev.class = &wwan_class; port->dev.type = &wwan_port_dev_type; port->dev.devt = MKDEV(wwan_major, minor); dev_set_drvdata(&port->dev, drvdata); /* allocate unique name based on wwan device id, port type and number */ snprintf(namefmt, sizeof(namefmt), "wwan%u%s%%d", wwandev->id, wwan_port_types[port->type].devsuf); /* Serialize ports registration */ mutex_lock(&wwan_register_lock); __wwan_port_dev_assign_name(port, namefmt); err = device_register(&port->dev); mutex_unlock(&wwan_register_lock); if (err) goto error_put_device; dev_info(&wwandev->dev, "port %s attached\n", dev_name(&port->dev)); return port; error_put_device: put_device(&port->dev); error_wwandev_remove: wwan_remove_dev(wwandev); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(wwan_create_port); void wwan_remove_port(struct wwan_port *port) { struct wwan_device *wwandev = to_wwan_dev(port->dev.parent); mutex_lock(&port->ops_lock); if (port->start_count) port->ops->stop(port); port->ops = NULL; /* Prevent any new port operations (e.g. from fops) */ mutex_unlock(&port->ops_lock); wake_up_interruptible(&port->waitqueue); skb_queue_purge(&port->rxq); dev_set_drvdata(&port->dev, NULL); dev_info(&wwandev->dev, "port %s disconnected\n", dev_name(&port->dev)); device_unregister(&port->dev); /* Release related wwan device */ wwan_remove_dev(wwandev); } EXPORT_SYMBOL_GPL(wwan_remove_port); void wwan_port_rx(struct wwan_port *port, struct sk_buff *skb) { skb_queue_tail(&port->rxq, skb); wake_up_interruptible(&port->waitqueue); } EXPORT_SYMBOL_GPL(wwan_port_rx); void wwan_port_txon(struct wwan_port *port) { clear_bit(WWAN_PORT_TX_OFF, &port->flags); wake_up_interruptible(&port->waitqueue); } EXPORT_SYMBOL_GPL(wwan_port_txon); void wwan_port_txoff(struct wwan_port *port) { set_bit(WWAN_PORT_TX_OFF, &port->flags); } EXPORT_SYMBOL_GPL(wwan_port_txoff); void *wwan_port_get_drvdata(struct wwan_port *port) { return dev_get_drvdata(&port->dev); } EXPORT_SYMBOL_GPL(wwan_port_get_drvdata); static int wwan_port_op_start(struct wwan_port *port) { int ret = 0; mutex_lock(&port->ops_lock); if (!port->ops) { /* Port got unplugged */ ret = -ENODEV; goto out_unlock; } /* If port is already started, don't start again */ if (!port->start_count) ret = port->ops->start(port); if (!ret) port->start_count++; out_unlock: mutex_unlock(&port->ops_lock); return ret; } static void wwan_port_op_stop(struct wwan_port *port) { mutex_lock(&port->ops_lock); port->start_count--; if (!port->start_count) { if (port->ops) port->ops->stop(port); skb_queue_purge(&port->rxq); } mutex_unlock(&port->ops_lock); } static int wwan_port_op_tx(struct wwan_port *port, struct sk_buff *skb, bool nonblock) { int ret; mutex_lock(&port->ops_lock); if (!port->ops) { /* Port got unplugged */ ret = -ENODEV; goto out_unlock; } if (nonblock || !port->ops->tx_blocking) ret = port->ops->tx(port, skb); else ret = port->ops->tx_blocking(port, skb); out_unlock: mutex_unlock(&port->ops_lock); return ret; } static bool is_read_blocked(struct wwan_port *port) { return skb_queue_empty(&port->rxq) && port->ops; } static bool is_write_blocked(struct wwan_port *port) { return test_bit(WWAN_PORT_TX_OFF, &port->flags) && port->ops; } static int wwan_wait_rx(struct wwan_port *port, bool nonblock) { if (!is_read_blocked(port)) return 0; if (nonblock) return -EAGAIN; if (wait_event_interruptible(port->waitqueue, !is_read_blocked(port))) return -ERESTARTSYS; return 0; } static int wwan_wait_tx(struct wwan_port *port, bool nonblock) { if (!is_write_blocked(port)) return 0; if (nonblock) return -EAGAIN; if (wait_event_interruptible(port->waitqueue, !is_write_blocked(port))) return -ERESTARTSYS; return 0; } static int wwan_port_fops_open(struct inode *inode, struct file *file) { struct wwan_port *port; int err = 0; port = wwan_port_get_by_minor(iminor(inode)); if (IS_ERR(port)) return PTR_ERR(port); file->private_data = port; stream_open(inode, file); err = wwan_port_op_start(port); if (err) put_device(&port->dev); return err; } static int wwan_port_fops_release(struct inode *inode, struct file *filp) { struct wwan_port *port = filp->private_data; wwan_port_op_stop(port); put_device(&port->dev); return 0; } static ssize_t wwan_port_fops_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) { struct wwan_port *port = filp->private_data; struct sk_buff *skb; size_t copied; int ret; ret = wwan_wait_rx(port, !!(filp->f_flags & O_NONBLOCK)); if (ret) return ret; skb = skb_dequeue(&port->rxq); if (!skb) return -EIO; copied = min_t(size_t, count, skb->len); if (copy_to_user(buf, skb->data, copied)) { kfree_skb(skb); return -EFAULT; } skb_pull(skb, copied); /* skb is not fully consumed, keep it in the queue */ if (skb->len) skb_queue_head(&port->rxq, skb); else consume_skb(skb); return copied; } static ssize_t wwan_port_fops_write(struct file *filp, const char __user *buf, size_t count, loff_t *offp) { struct sk_buff *skb, *head = NULL, *tail = NULL; struct wwan_port *port = filp->private_data; size_t frag_len, remain = count; int ret; ret = wwan_wait_tx(port, !!(filp->f_flags & O_NONBLOCK)); if (ret) return ret; do { frag_len = min(remain, port->frag_len); skb = alloc_skb(frag_len + port->headroom_len, GFP_KERNEL); if (!skb) { ret = -ENOMEM; goto freeskb; } skb_reserve(skb, port->headroom_len); if (!head) { head = skb; } else if (!tail) { skb_shinfo(head)->frag_list = skb; tail = skb; } else { tail->next = skb; tail = skb; } if (copy_from_user(skb_put(skb, frag_len), buf + count - remain, frag_len)) { ret = -EFAULT; goto freeskb; } if (skb != head) { head->data_len += skb->len; head->len += skb->len; head->truesize += skb->truesize; } } while (remain -= frag_len); ret = wwan_port_op_tx(port, head, !!(filp->f_flags & O_NONBLOCK)); if (!ret) return count; freeskb: kfree_skb(head); return ret; } static __poll_t wwan_port_fops_poll(struct file *filp, poll_table *wait) { struct wwan_port *port = filp->private_data; __poll_t mask = 0; poll_wait(filp, &port->waitqueue, wait); mutex_lock(&port->ops_lock); if (port->ops && port->ops->tx_poll) mask |= port->ops->tx_poll(port, filp, wait); else if (!is_write_blocked(port)) mask |= EPOLLOUT | EPOLLWRNORM; if (!is_read_blocked(port)) mask |= EPOLLIN | EPOLLRDNORM; if (!port->ops) mask |= EPOLLHUP | EPOLLERR; mutex_unlock(&port->ops_lock); return mask; } /* Implements minimalistic stub terminal IOCTLs support */ static long wwan_port_fops_at_ioctl(struct wwan_port *port, unsigned int cmd, unsigned long arg) { int ret = 0; mutex_lock(&port->data_lock); switch (cmd) { case TCFLSH: break; case TCGETS: if (copy_to_user((void __user *)arg, &port->at_data.termios, sizeof(struct termios))) ret = -EFAULT; break; case TCSETS: case TCSETSW: case TCSETSF: if (copy_from_user(&port->at_data.termios, (void __user *)arg, sizeof(struct termios))) ret = -EFAULT; break; #ifdef TCGETS2 case TCGETS2: if (copy_to_user((void __user *)arg, &port->at_data.termios, sizeof(struct termios2))) ret = -EFAULT; break; case TCSETS2: case TCSETSW2: case TCSETSF2: if (copy_from_user(&port->at_data.termios, (void __user *)arg, sizeof(struct termios2))) ret = -EFAULT; break; #endif case TIOCMGET: ret = put_user(port->at_data.mdmbits, (int __user *)arg); break; case TIOCMSET: case TIOCMBIC: case TIOCMBIS: { int mdmbits; if (copy_from_user(&mdmbits, (int __user *)arg, sizeof(int))) { ret = -EFAULT; break; } if (cmd == TIOCMBIC) port->at_data.mdmbits &= ~mdmbits; else if (cmd == TIOCMBIS) port->at_data.mdmbits |= mdmbits; else port->at_data.mdmbits = mdmbits; break; } default: ret = -ENOIOCTLCMD; } mutex_unlock(&port->data_lock); return ret; } static long wwan_port_fops_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct wwan_port *port = filp->private_data; int res; if (port->type == WWAN_PORT_AT) { /* AT port specific IOCTLs */ res = wwan_port_fops_at_ioctl(port, cmd, arg); if (res != -ENOIOCTLCMD) return res; } switch (cmd) { case TIOCINQ: { /* aka SIOCINQ aka FIONREAD */ unsigned long flags; struct sk_buff *skb; int amount = 0; spin_lock_irqsave(&port->rxq.lock, flags); skb_queue_walk(&port->rxq, skb) amount += skb->len; spin_unlock_irqrestore(&port->rxq.lock, flags); return put_user(amount, (int __user *)arg); } default: return -ENOIOCTLCMD; } } static const struct file_operations wwan_port_fops = { .owner = THIS_MODULE, .open = wwan_port_fops_open, .release = wwan_port_fops_release, .read = wwan_port_fops_read, .write = wwan_port_fops_write, .poll = wwan_port_fops_poll, .unlocked_ioctl = wwan_port_fops_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = compat_ptr_ioctl, #endif .llseek = noop_llseek, }; static int wwan_rtnl_validate(struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { if (!data) return -EINVAL; if (!tb[IFLA_PARENT_DEV_NAME]) return -EINVAL; if (!data[IFLA_WWAN_LINK_ID]) return -EINVAL; return 0; } static const struct device_type wwan_type = { .name = "wwan" }; static struct net_device *wwan_rtnl_alloc(struct nlattr *tb[], const char *ifname, unsigned char name_assign_type, unsigned int num_tx_queues, unsigned int num_rx_queues) { const char *devname = nla_data(tb[IFLA_PARENT_DEV_NAME]); struct wwan_device *wwandev = wwan_dev_get_by_name(devname); struct net_device *dev; unsigned int priv_size; if (IS_ERR(wwandev)) return ERR_CAST(wwandev); /* only supported if ops were registered (not just ports) */ if (!wwandev->ops) { dev = ERR_PTR(-EOPNOTSUPP); goto out; } priv_size = sizeof(struct wwan_netdev_priv) + wwandev->ops->priv_size; dev = alloc_netdev_mqs(priv_size, ifname, name_assign_type, wwandev->ops->setup, num_tx_queues, num_rx_queues); if (dev) { SET_NETDEV_DEV(dev, &wwandev->dev); SET_NETDEV_DEVTYPE(dev, &wwan_type); } out: /* release the reference */ put_device(&wwandev->dev); return dev; } static int wwan_rtnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct wwan_device *wwandev = wwan_dev_get_by_parent(dev->dev.parent); u32 link_id = nla_get_u32(data[IFLA_WWAN_LINK_ID]); struct wwan_netdev_priv *priv = netdev_priv(dev); int ret; if (IS_ERR(wwandev)) return PTR_ERR(wwandev); /* shouldn't have a netdev (left) with us as parent so WARN */ if (WARN_ON(!wwandev->ops)) { ret = -EOPNOTSUPP; goto out; } priv->link_id = link_id; if (wwandev->ops->newlink) ret = wwandev->ops->newlink(wwandev->ops_ctxt, dev, link_id, extack); else ret = register_netdevice(dev); out: /* release the reference */ put_device(&wwandev->dev); return ret; } static void wwan_rtnl_dellink(struct net_device *dev, struct list_head *head) { struct wwan_device *wwandev = wwan_dev_get_by_parent(dev->dev.parent); if (IS_ERR(wwandev)) return; /* shouldn't have a netdev (left) with us as parent so WARN */ if (WARN_ON(!wwandev->ops)) goto out; if (wwandev->ops->dellink) wwandev->ops->dellink(wwandev->ops_ctxt, dev, head); else unregister_netdevice_queue(dev, head); out: /* release the reference */ put_device(&wwandev->dev); } static size_t wwan_rtnl_get_size(const struct net_device *dev) { return nla_total_size(4) + /* IFLA_WWAN_LINK_ID */ 0; } static int wwan_rtnl_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct wwan_netdev_priv *priv = netdev_priv(dev); if (nla_put_u32(skb, IFLA_WWAN_LINK_ID, priv->link_id)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static const struct nla_policy wwan_rtnl_policy[IFLA_WWAN_MAX + 1] = { [IFLA_WWAN_LINK_ID] = { .type = NLA_U32 }, }; static struct rtnl_link_ops wwan_rtnl_link_ops __read_mostly = { .kind = "wwan", .maxtype = IFLA_WWAN_MAX, .alloc = wwan_rtnl_alloc, .validate = wwan_rtnl_validate, .newlink = wwan_rtnl_newlink, .dellink = wwan_rtnl_dellink, .get_size = wwan_rtnl_get_size, .fill_info = wwan_rtnl_fill_info, .policy = wwan_rtnl_policy, }; static void wwan_create_default_link(struct wwan_device *wwandev, u32 def_link_id) { struct nlattr *tb[IFLA_MAX + 1], *linkinfo[IFLA_INFO_MAX + 1]; struct nlattr *data[IFLA_WWAN_MAX + 1]; struct net_device *dev; struct nlmsghdr *nlh; struct sk_buff *msg; /* Forge attributes required to create a WWAN netdev. We first * build a netlink message and then parse it. This looks * odd, but such approach is less error prone. */ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (WARN_ON(!msg)) return; nlh = nlmsg_put(msg, 0, 0, RTM_NEWLINK, 0, 0); if (WARN_ON(!nlh)) goto free_attrs; if (nla_put_string(msg, IFLA_PARENT_DEV_NAME, dev_name(&wwandev->dev))) goto free_attrs; tb[IFLA_LINKINFO] = nla_nest_start(msg, IFLA_LINKINFO); if (!tb[IFLA_LINKINFO]) goto free_attrs; linkinfo[IFLA_INFO_DATA] = nla_nest_start(msg, IFLA_INFO_DATA); if (!linkinfo[IFLA_INFO_DATA]) goto free_attrs; if (nla_put_u32(msg, IFLA_WWAN_LINK_ID, def_link_id)) goto free_attrs; nla_nest_end(msg, linkinfo[IFLA_INFO_DATA]); nla_nest_end(msg, tb[IFLA_LINKINFO]); nlmsg_end(msg, nlh); /* The next three parsing calls can not fail */ nlmsg_parse_deprecated(nlh, 0, tb, IFLA_MAX, NULL, NULL); nla_parse_nested_deprecated(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO], NULL, NULL); nla_parse_nested_deprecated(data, IFLA_WWAN_MAX, linkinfo[IFLA_INFO_DATA], NULL, NULL); rtnl_lock(); dev = rtnl_create_link(&init_net, "wwan%d", NET_NAME_ENUM, &wwan_rtnl_link_ops, tb, NULL); if (WARN_ON(IS_ERR(dev))) goto unlock; if (WARN_ON(wwan_rtnl_newlink(&init_net, dev, tb, data, NULL))) { free_netdev(dev); goto unlock; } rtnl_configure_link(dev, NULL, 0, NULL); /* Link initialized, notify new link */ unlock: rtnl_unlock(); free_attrs: nlmsg_free(msg); } /** * wwan_register_ops - register WWAN device ops * @parent: Device to use as parent and shared by all WWAN ports and * created netdevs * @ops: operations to register * @ctxt: context to pass to operations * @def_link_id: id of the default link that will be automatically created by * the WWAN core for the WWAN device. The default link will not be created * if the passed value is WWAN_NO_DEFAULT_LINK. * * Returns: 0 on success, a negative error code on failure */ int wwan_register_ops(struct device *parent, const struct wwan_ops *ops, void *ctxt, u32 def_link_id) { struct wwan_device *wwandev; if (WARN_ON(!parent || !ops || !ops->setup)) return -EINVAL; wwandev = wwan_create_dev(parent); if (IS_ERR(wwandev)) return PTR_ERR(wwandev); if (WARN_ON(wwandev->ops)) { wwan_remove_dev(wwandev); return -EBUSY; } wwandev->ops = ops; wwandev->ops_ctxt = ctxt; /* NB: we do not abort ops registration in case of default link * creation failure. Link ops is the management interface, while the * default link creation is a service option. And we should not prevent * a user from manually creating a link latter if service option failed * now. */ if (def_link_id != WWAN_NO_DEFAULT_LINK) wwan_create_default_link(wwandev, def_link_id); return 0; } EXPORT_SYMBOL_GPL(wwan_register_ops); /* Enqueue child netdev deletion */ static int wwan_child_dellink(struct device *dev, void *data) { struct list_head *kill_list = data; if (dev->type == &wwan_type) wwan_rtnl_dellink(to_net_dev(dev), kill_list); return 0; } /** * wwan_unregister_ops - remove WWAN device ops * @parent: Device to use as parent and shared by all WWAN ports and * created netdevs */ void wwan_unregister_ops(struct device *parent) { struct wwan_device *wwandev = wwan_dev_get_by_parent(parent); LIST_HEAD(kill_list); if (WARN_ON(IS_ERR(wwandev))) return; if (WARN_ON(!wwandev->ops)) { put_device(&wwandev->dev); return; } /* put the reference obtained by wwan_dev_get_by_parent(), * we should still have one (that the owner is giving back * now) due to the ops being assigned. */ put_device(&wwandev->dev); rtnl_lock(); /* Prevent concurrent netdev(s) creation/destroying */ /* Remove all child netdev(s), using batch removing */ device_for_each_child(&wwandev->dev, &kill_list, wwan_child_dellink); unregister_netdevice_many(&kill_list); wwandev->ops = NULL; /* Finally remove ops */ rtnl_unlock(); wwandev->ops_ctxt = NULL; wwan_remove_dev(wwandev); } EXPORT_SYMBOL_GPL(wwan_unregister_ops); static int __init wwan_init(void) { int err; err = rtnl_link_register(&wwan_rtnl_link_ops); if (err) return err; err = class_register(&wwan_class); if (err) goto unregister; /* chrdev used for wwan ports */ wwan_major = __register_chrdev(0, 0, WWAN_MAX_MINORS, "wwan_port", &wwan_port_fops); if (wwan_major < 0) { err = wwan_major; goto destroy; } #ifdef CONFIG_WWAN_DEBUGFS wwan_debugfs_dir = debugfs_create_dir("wwan", NULL); #endif return 0; destroy: class_unregister(&wwan_class); unregister: rtnl_link_unregister(&wwan_rtnl_link_ops); return err; } static void __exit wwan_exit(void) { debugfs_remove_recursive(wwan_debugfs_dir); __unregister_chrdev(wwan_major, 0, WWAN_MAX_MINORS, "wwan_port"); rtnl_link_unregister(&wwan_rtnl_link_ops); class_unregister(&wwan_class); } module_init(wwan_init); module_exit(wwan_exit); MODULE_AUTHOR("Loic Poulain <loic.poulain@linaro.org>"); MODULE_DESCRIPTION("WWAN core"); MODULE_LICENSE("GPL v2"); |
10 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM ksm #if !defined(_TRACE_KSM_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_KSM_H #include <linux/tracepoint.h> /** * ksm_scan_template - called for start / stop scan * * @seq: sequence number of scan * @rmap_entries: actual number of rmap entries * * Allows to trace the start / stop of a ksm scan. */ DECLARE_EVENT_CLASS(ksm_scan_template, TP_PROTO(int seq, u32 rmap_entries), TP_ARGS(seq, rmap_entries), TP_STRUCT__entry( __field(int, seq) __field(u32, rmap_entries) ), TP_fast_assign( __entry->seq = seq; __entry->rmap_entries = rmap_entries; ), TP_printk("seq %d rmap size %d", __entry->seq, __entry->rmap_entries) ); /** * ksm_start_scan - called after a new ksm scan is started * * @seq: sequence number of scan * @rmap_entries: actual number of rmap entries * * Allows to trace the start of a ksm scan. */ DEFINE_EVENT(ksm_scan_template, ksm_start_scan, TP_PROTO(int seq, u32 rmap_entries), TP_ARGS(seq, rmap_entries) ); /** * ksm_stop_scan - called after a new ksm scan has completed * * @seq: sequence number of scan * @rmap_entries: actual number of rmap entries * * Allows to trace the completion of a ksm scan. */ DEFINE_EVENT(ksm_scan_template, ksm_stop_scan, TP_PROTO(int seq, u32 rmap_entries), TP_ARGS(seq, rmap_entries) ); /** * ksm_enter - called after a new process has been added / removed from ksm * * @mm: address of the mm object of the process * * Allows to trace the when a process has been added or removed from ksm. */ DECLARE_EVENT_CLASS(ksm_enter_exit_template, TP_PROTO(void *mm), TP_ARGS(mm), TP_STRUCT__entry( __field(void *, mm) ), TP_fast_assign( __entry->mm = mm; ), TP_printk("mm %p", __entry->mm) ); /** * ksm_enter - called after a new process has been added to ksm * * @mm: address of the mm object of the process * * Allows to trace the when a process has been added to ksm. */ DEFINE_EVENT(ksm_enter_exit_template, ksm_enter, TP_PROTO(void *mm), TP_ARGS(mm) ); /** * ksm_exit - called after a new process has been removed from ksm * * @mm: address of the mm object of the process * * Allows to trace the when a process has been removed from ksm. */ DEFINE_EVENT(ksm_enter_exit_template, ksm_exit, TP_PROTO(void *mm), TP_ARGS(mm) ); /** * ksm_merge_one_page - called after a page has been merged * * @pfn: page frame number of ksm page * @rmap_item: address of rmap_item object * @mm: address of the process mm struct * @err: success * * Allows to trace the ksm merging of individual pages. */ TRACE_EVENT(ksm_merge_one_page, TP_PROTO(unsigned long pfn, void *rmap_item, void *mm, int err), TP_ARGS(pfn, rmap_item, mm, err), TP_STRUCT__entry( __field(unsigned long, pfn) __field(void *, rmap_item) __field(void *, mm) __field(int, err) ), TP_fast_assign( __entry->pfn = pfn; __entry->rmap_item = rmap_item; __entry->mm = mm; __entry->err = err; ), TP_printk("ksm pfn %lu rmap_item %p mm %p error %d", __entry->pfn, __entry->rmap_item, __entry->mm, __entry->err) ); /** * ksm_merge_with_ksm_page - called after a page has been merged with a ksm page * * @ksm_page: address ksm page * @pfn: page frame number of ksm page * @rmap_item: address of rmap_item object * @mm: address of the mm object of the process * @err: success * * Allows to trace the merging of a page with a ksm page. */ TRACE_EVENT(ksm_merge_with_ksm_page, TP_PROTO(void *ksm_page, unsigned long pfn, void *rmap_item, void *mm, int err), TP_ARGS(ksm_page, pfn, rmap_item, mm, err), TP_STRUCT__entry( __field(void *, ksm_page) __field(unsigned long, pfn) __field(void *, rmap_item) __field(void *, mm) __field(int, err) ), TP_fast_assign( __entry->ksm_page = ksm_page; __entry->pfn = pfn; __entry->rmap_item = rmap_item; __entry->mm = mm; __entry->err = err; ), TP_printk("%spfn %lu rmap_item %p mm %p error %d", (__entry->ksm_page ? "ksm " : ""), __entry->pfn, __entry->rmap_item, __entry->mm, __entry->err) ); /** * ksm_remove_ksm_page - called after a ksm page has been removed * * @pfn: page frame number of ksm page * * Allows to trace the removing of stable ksm pages. */ TRACE_EVENT(ksm_remove_ksm_page, TP_PROTO(unsigned long pfn), TP_ARGS(pfn), TP_STRUCT__entry( __field(unsigned long, pfn) ), TP_fast_assign( __entry->pfn = pfn; ), TP_printk("pfn %lu", __entry->pfn) ); /** * ksm_remove_rmap_item - called after a rmap_item has been removed from the * stable tree * * @pfn: page frame number of ksm page * @rmap_item: address of rmap_item object * @mm: address of the process mm struct * * Allows to trace the removal of pages from the stable tree list. */ TRACE_EVENT(ksm_remove_rmap_item, TP_PROTO(unsigned long pfn, void *rmap_item, void *mm), TP_ARGS(pfn, rmap_item, mm), TP_STRUCT__entry( __field(unsigned long, pfn) __field(void *, rmap_item) __field(void *, mm) ), TP_fast_assign( __entry->pfn = pfn; __entry->rmap_item = rmap_item; __entry->mm = mm; ), TP_printk("pfn %lu rmap_item %p mm %p", __entry->pfn, __entry->rmap_item, __entry->mm) ); /** * ksm_advisor - called after the advisor has run * * @scan_time: scan time in seconds * @pages_to_scan: new pages_to_scan value * @cpu_percent: cpu usage in percent * * Allows to trace the ksm advisor. */ TRACE_EVENT(ksm_advisor, TP_PROTO(s64 scan_time, unsigned long pages_to_scan, unsigned int cpu_percent), TP_ARGS(scan_time, pages_to_scan, cpu_percent), TP_STRUCT__entry( __field(s64, scan_time) __field(unsigned long, pages_to_scan) __field(unsigned int, cpu_percent) ), TP_fast_assign( __entry->scan_time = scan_time; __entry->pages_to_scan = pages_to_scan; __entry->cpu_percent = cpu_percent; ), TP_printk("ksm scan time %lld pages_to_scan %lu cpu percent %u", __entry->scan_time, __entry->pages_to_scan, __entry->cpu_percent) ); #endif /* _TRACE_KSM_H */ /* This part must be outside protection */ #include <trace/define_trace.h> |
9 1 9 9 5 5 4 5 9 9 9 10 10 9 9 9 5 12 2 10 9 7 1 1 1 8 3 5 5 5 5 5 5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 | /* * Public API and common code for kernel->userspace relay file support. * * See Documentation/filesystems/relay.rst for an overview. * * Copyright (C) 2002-2005 - Tom Zanussi (zanussi@us.ibm.com), IBM Corp * Copyright (C) 1999-2005 - Karim Yaghmour (karim@opersys.com) * * Moved to kernel/relay.c by Paul Mundt, 2006. * November 2006 - CPU hotplug support by Mathieu Desnoyers * (mathieu.desnoyers@polymtl.ca) * * This file is released under the GPL. */ #include <linux/errno.h> #include <linux/stddef.h> #include <linux/slab.h> #include <linux/export.h> #include <linux/string.h> #include <linux/relay.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/cpu.h> #include <linux/splice.h> /* list of open channels, for cpu hotplug */ static DEFINE_MUTEX(relay_channels_mutex); static LIST_HEAD(relay_channels); /* * fault() vm_op implementation for relay file mapping. */ static vm_fault_t relay_buf_fault(struct vm_fault *vmf) { struct page *page; struct rchan_buf *buf = vmf->vma->vm_private_data; pgoff_t pgoff = vmf->pgoff; if (!buf) return VM_FAULT_OOM; page = vmalloc_to_page(buf->start + (pgoff << PAGE_SHIFT)); if (!page) return VM_FAULT_SIGBUS; get_page(page); vmf->page = page; return 0; } /* * vm_ops for relay file mappings. */ static const struct vm_operations_struct relay_file_mmap_ops = { .fault = relay_buf_fault, }; /* * allocate an array of pointers of struct page */ static struct page **relay_alloc_page_array(unsigned int n_pages) { return kvcalloc(n_pages, sizeof(struct page *), GFP_KERNEL); } /* * free an array of pointers of struct page */ static void relay_free_page_array(struct page **array) { kvfree(array); } /** * relay_mmap_buf: - mmap channel buffer to process address space * @buf: relay channel buffer * @vma: vm_area_struct describing memory to be mapped * * Returns 0 if ok, negative on error * * Caller should already have grabbed mmap_lock. */ static int relay_mmap_buf(struct rchan_buf *buf, struct vm_area_struct *vma) { unsigned long length = vma->vm_end - vma->vm_start; if (!buf) return -EBADF; if (length != (unsigned long)buf->chan->alloc_size) return -EINVAL; vma->vm_ops = &relay_file_mmap_ops; vm_flags_set(vma, VM_DONTEXPAND); vma->vm_private_data = buf; return 0; } /** * relay_alloc_buf - allocate a channel buffer * @buf: the buffer struct * @size: total size of the buffer * * Returns a pointer to the resulting buffer, %NULL if unsuccessful. The * passed in size will get page aligned, if it isn't already. */ static void *relay_alloc_buf(struct rchan_buf *buf, size_t *size) { void *mem; unsigned int i, j, n_pages; *size = PAGE_ALIGN(*size); n_pages = *size >> PAGE_SHIFT; buf->page_array = relay_alloc_page_array(n_pages); if (!buf->page_array) return NULL; for (i = 0; i < n_pages; i++) { buf->page_array[i] = alloc_page(GFP_KERNEL); if (unlikely(!buf->page_array[i])) goto depopulate; set_page_private(buf->page_array[i], (unsigned long)buf); } mem = vmap(buf->page_array, n_pages, VM_MAP, PAGE_KERNEL); if (!mem) goto depopulate; memset(mem, 0, *size); buf->page_count = n_pages; return mem; depopulate: for (j = 0; j < i; j++) __free_page(buf->page_array[j]); relay_free_page_array(buf->page_array); return NULL; } /** * relay_create_buf - allocate and initialize a channel buffer * @chan: the relay channel * * Returns channel buffer if successful, %NULL otherwise. */ static struct rchan_buf *relay_create_buf(struct rchan *chan) { struct rchan_buf *buf; if (chan->n_subbufs > KMALLOC_MAX_SIZE / sizeof(size_t)) return NULL; buf = kzalloc(sizeof(struct rchan_buf), GFP_KERNEL); if (!buf) return NULL; buf->padding = kmalloc_array(chan->n_subbufs, sizeof(size_t), GFP_KERNEL); if (!buf->padding) goto free_buf; buf->start = relay_alloc_buf(buf, &chan->alloc_size); if (!buf->start) goto free_buf; buf->chan = chan; kref_get(&buf->chan->kref); return buf; free_buf: kfree(buf->padding); kfree(buf); return NULL; } /** * relay_destroy_channel - free the channel struct * @kref: target kernel reference that contains the relay channel * * Should only be called from kref_put(). */ static void relay_destroy_channel(struct kref *kref) { struct rchan *chan = container_of(kref, struct rchan, kref); free_percpu(chan->buf); kfree(chan); } /** * relay_destroy_buf - destroy an rchan_buf struct and associated buffer * @buf: the buffer struct */ static void relay_destroy_buf(struct rchan_buf *buf) { struct rchan *chan = buf->chan; unsigned int i; if (likely(buf->start)) { vunmap(buf->start); for (i = 0; i < buf->page_count; i++) __free_page(buf->page_array[i]); relay_free_page_array(buf->page_array); } *per_cpu_ptr(chan->buf, buf->cpu) = NULL; kfree(buf->padding); kfree(buf); kref_put(&chan->kref, relay_destroy_channel); } /** * relay_remove_buf - remove a channel buffer * @kref: target kernel reference that contains the relay buffer * * Removes the file from the filesystem, which also frees the * rchan_buf_struct and the channel buffer. Should only be called from * kref_put(). */ static void relay_remove_buf(struct kref *kref) { struct rchan_buf *buf = container_of(kref, struct rchan_buf, kref); relay_destroy_buf(buf); } /** * relay_buf_empty - boolean, is the channel buffer empty? * @buf: channel buffer * * Returns 1 if the buffer is empty, 0 otherwise. */ static int relay_buf_empty(struct rchan_buf *buf) { return (buf->subbufs_produced - buf->subbufs_consumed) ? 0 : 1; } /** * relay_buf_full - boolean, is the channel buffer full? * @buf: channel buffer * * Returns 1 if the buffer is full, 0 otherwise. */ int relay_buf_full(struct rchan_buf *buf) { size_t ready = buf->subbufs_produced - buf->subbufs_consumed; return (ready >= buf->chan->n_subbufs) ? 1 : 0; } EXPORT_SYMBOL_GPL(relay_buf_full); /* * High-level relay kernel API and associated functions. */ static int relay_subbuf_start(struct rchan_buf *buf, void *subbuf, void *prev_subbuf, size_t prev_padding) { if (!buf->chan->cb->subbuf_start) return !relay_buf_full(buf); return buf->chan->cb->subbuf_start(buf, subbuf, prev_subbuf, prev_padding); } /** * wakeup_readers - wake up readers waiting on a channel * @work: contains the channel buffer * * This is the function used to defer reader waking */ static void wakeup_readers(struct irq_work *work) { struct rchan_buf *buf; buf = container_of(work, struct rchan_buf, wakeup_work); wake_up_interruptible(&buf->read_wait); } /** * __relay_reset - reset a channel buffer * @buf: the channel buffer * @init: 1 if this is a first-time initialization * * See relay_reset() for description of effect. */ static void __relay_reset(struct rchan_buf *buf, unsigned int init) { size_t i; if (init) { init_waitqueue_head(&buf->read_wait); kref_init(&buf->kref); init_irq_work(&buf->wakeup_work, wakeup_readers); } else { irq_work_sync(&buf->wakeup_work); } buf->subbufs_produced = 0; buf->subbufs_consumed = 0; buf->bytes_consumed = 0; buf->finalized = 0; buf->data = buf->start; buf->offset = 0; for (i = 0; i < buf->chan->n_subbufs; i++) buf->padding[i] = 0; relay_subbuf_start(buf, buf->data, NULL, 0); } /** * relay_reset - reset the channel * @chan: the channel * * This has the effect of erasing all data from all channel buffers * and restarting the channel in its initial state. The buffers * are not freed, so any mappings are still in effect. * * NOTE. Care should be taken that the channel isn't actually * being used by anything when this call is made. */ void relay_reset(struct rchan *chan) { struct rchan_buf *buf; unsigned int i; if (!chan) return; if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) { __relay_reset(buf, 0); return; } mutex_lock(&relay_channels_mutex); for_each_possible_cpu(i) if ((buf = *per_cpu_ptr(chan->buf, i))) __relay_reset(buf, 0); mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_reset); static inline void relay_set_buf_dentry(struct rchan_buf *buf, struct dentry *dentry) { buf->dentry = dentry; d_inode(buf->dentry)->i_size = buf->early_bytes; } static struct dentry *relay_create_buf_file(struct rchan *chan, struct rchan_buf *buf, unsigned int cpu) { struct dentry *dentry; char *tmpname; tmpname = kzalloc(NAME_MAX + 1, GFP_KERNEL); if (!tmpname) return NULL; snprintf(tmpname, NAME_MAX, "%s%d", chan->base_filename, cpu); /* Create file in fs */ dentry = chan->cb->create_buf_file(tmpname, chan->parent, S_IRUSR, buf, &chan->is_global); if (IS_ERR(dentry)) dentry = NULL; kfree(tmpname); return dentry; } /* * relay_open_buf - create a new relay channel buffer * * used by relay_open() and CPU hotplug. */ static struct rchan_buf *relay_open_buf(struct rchan *chan, unsigned int cpu) { struct rchan_buf *buf; struct dentry *dentry; if (chan->is_global) return *per_cpu_ptr(chan->buf, 0); buf = relay_create_buf(chan); if (!buf) return NULL; if (chan->has_base_filename) { dentry = relay_create_buf_file(chan, buf, cpu); if (!dentry) goto free_buf; relay_set_buf_dentry(buf, dentry); } else { /* Only retrieve global info, nothing more, nothing less */ dentry = chan->cb->create_buf_file(NULL, NULL, S_IRUSR, buf, &chan->is_global); if (IS_ERR_OR_NULL(dentry)) goto free_buf; } buf->cpu = cpu; __relay_reset(buf, 1); if(chan->is_global) { *per_cpu_ptr(chan->buf, 0) = buf; buf->cpu = 0; } return buf; free_buf: relay_destroy_buf(buf); return NULL; } /** * relay_close_buf - close a channel buffer * @buf: channel buffer * * Marks the buffer finalized and restores the default callbacks. * The channel buffer and channel buffer data structure are then freed * automatically when the last reference is given up. */ static void relay_close_buf(struct rchan_buf *buf) { buf->finalized = 1; irq_work_sync(&buf->wakeup_work); buf->chan->cb->remove_buf_file(buf->dentry); kref_put(&buf->kref, relay_remove_buf); } int relay_prepare_cpu(unsigned int cpu) { struct rchan *chan; struct rchan_buf *buf; mutex_lock(&relay_channels_mutex); list_for_each_entry(chan, &relay_channels, list) { if (*per_cpu_ptr(chan->buf, cpu)) continue; buf = relay_open_buf(chan, cpu); if (!buf) { pr_err("relay: cpu %d buffer creation failed\n", cpu); mutex_unlock(&relay_channels_mutex); return -ENOMEM; } *per_cpu_ptr(chan->buf, cpu) = buf; } mutex_unlock(&relay_channels_mutex); return 0; } /** * relay_open - create a new relay channel * @base_filename: base name of files to create, %NULL for buffering only * @parent: dentry of parent directory, %NULL for root directory or buffer * @subbuf_size: size of sub-buffers * @n_subbufs: number of sub-buffers * @cb: client callback functions * @private_data: user-defined data * * Returns channel pointer if successful, %NULL otherwise. * * Creates a channel buffer for each cpu using the sizes and * attributes specified. The created channel buffer files * will be named base_filename0...base_filenameN-1. File * permissions will be %S_IRUSR. * * If opening a buffer (@parent = NULL) that you later wish to register * in a filesystem, call relay_late_setup_files() once the @parent dentry * is available. */ struct rchan *relay_open(const char *base_filename, struct dentry *parent, size_t subbuf_size, size_t n_subbufs, const struct rchan_callbacks *cb, void *private_data) { unsigned int i; struct rchan *chan; struct rchan_buf *buf; if (!(subbuf_size && n_subbufs)) return NULL; if (subbuf_size > UINT_MAX / n_subbufs) return NULL; if (!cb || !cb->create_buf_file || !cb->remove_buf_file) return NULL; chan = kzalloc(sizeof(struct rchan), GFP_KERNEL); if (!chan) return NULL; chan->buf = alloc_percpu(struct rchan_buf *); if (!chan->buf) { kfree(chan); return NULL; } chan->version = RELAYFS_CHANNEL_VERSION; chan->n_subbufs = n_subbufs; chan->subbuf_size = subbuf_size; chan->alloc_size = PAGE_ALIGN(subbuf_size * n_subbufs); chan->parent = parent; chan->private_data = private_data; if (base_filename) { chan->has_base_filename = 1; strscpy(chan->base_filename, base_filename, NAME_MAX); } chan->cb = cb; kref_init(&chan->kref); mutex_lock(&relay_channels_mutex); for_each_online_cpu(i) { buf = relay_open_buf(chan, i); if (!buf) goto free_bufs; *per_cpu_ptr(chan->buf, i) = buf; } list_add(&chan->list, &relay_channels); mutex_unlock(&relay_channels_mutex); return chan; free_bufs: for_each_possible_cpu(i) { if ((buf = *per_cpu_ptr(chan->buf, i))) relay_close_buf(buf); } kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); return NULL; } EXPORT_SYMBOL_GPL(relay_open); struct rchan_percpu_buf_dispatcher { struct rchan_buf *buf; struct dentry *dentry; }; /* Called in atomic context. */ static void __relay_set_buf_dentry(void *info) { struct rchan_percpu_buf_dispatcher *p = info; relay_set_buf_dentry(p->buf, p->dentry); } /** * relay_late_setup_files - triggers file creation * @chan: channel to operate on * @base_filename: base name of files to create * @parent: dentry of parent directory, %NULL for root directory * * Returns 0 if successful, non-zero otherwise. * * Use to setup files for a previously buffer-only channel created * by relay_open() with a NULL parent dentry. * * For example, this is useful for perfomring early tracing in kernel, * before VFS is up and then exposing the early results once the dentry * is available. */ int relay_late_setup_files(struct rchan *chan, const char *base_filename, struct dentry *parent) { int err = 0; unsigned int i, curr_cpu; unsigned long flags; struct dentry *dentry; struct rchan_buf *buf; struct rchan_percpu_buf_dispatcher disp; if (!chan || !base_filename) return -EINVAL; strscpy(chan->base_filename, base_filename, NAME_MAX); mutex_lock(&relay_channels_mutex); /* Is chan already set up? */ if (unlikely(chan->has_base_filename)) { mutex_unlock(&relay_channels_mutex); return -EEXIST; } chan->has_base_filename = 1; chan->parent = parent; if (chan->is_global) { err = -EINVAL; buf = *per_cpu_ptr(chan->buf, 0); if (!WARN_ON_ONCE(!buf)) { dentry = relay_create_buf_file(chan, buf, 0); if (dentry && !WARN_ON_ONCE(!chan->is_global)) { relay_set_buf_dentry(buf, dentry); err = 0; } } mutex_unlock(&relay_channels_mutex); return err; } curr_cpu = get_cpu(); /* * The CPU hotplug notifier ran before us and created buffers with * no files associated. So it's safe to call relay_setup_buf_file() * on all currently online CPUs. */ for_each_online_cpu(i) { buf = *per_cpu_ptr(chan->buf, i); if (unlikely(!buf)) { WARN_ONCE(1, KERN_ERR "CPU has no buffer!\n"); err = -EINVAL; break; } dentry = relay_create_buf_file(chan, buf, i); if (unlikely(!dentry)) { err = -EINVAL; break; } if (curr_cpu == i) { local_irq_save(flags); relay_set_buf_dentry(buf, dentry); local_irq_restore(flags); } else { disp.buf = buf; disp.dentry = dentry; smp_mb(); /* relay_channels_mutex must be held, so wait. */ err = smp_call_function_single(i, __relay_set_buf_dentry, &disp, 1); } if (unlikely(err)) break; } put_cpu(); mutex_unlock(&relay_channels_mutex); return err; } EXPORT_SYMBOL_GPL(relay_late_setup_files); /** * relay_switch_subbuf - switch to a new sub-buffer * @buf: channel buffer * @length: size of current event * * Returns either the length passed in or 0 if full. * * Performs sub-buffer-switch tasks such as invoking callbacks, * updating padding counts, waking up readers, etc. */ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length) { void *old, *new; size_t old_subbuf, new_subbuf; if (unlikely(length > buf->chan->subbuf_size)) goto toobig; if (buf->offset != buf->chan->subbuf_size + 1) { buf->prev_padding = buf->chan->subbuf_size - buf->offset; old_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; buf->padding[old_subbuf] = buf->prev_padding; buf->subbufs_produced++; if (buf->dentry) d_inode(buf->dentry)->i_size += buf->chan->subbuf_size - buf->padding[old_subbuf]; else buf->early_bytes += buf->chan->subbuf_size - buf->padding[old_subbuf]; smp_mb(); if (waitqueue_active(&buf->read_wait)) { /* * Calling wake_up_interruptible() from here * will deadlock if we happen to be logging * from the scheduler (trying to re-grab * rq->lock), so defer it. */ irq_work_queue(&buf->wakeup_work); } } old = buf->data; new_subbuf = buf->subbufs_produced % buf->chan->n_subbufs; new = buf->start + new_subbuf * buf->chan->subbuf_size; buf->offset = 0; if (!relay_subbuf_start(buf, new, old, buf->prev_padding)) { buf->offset = buf->chan->subbuf_size + 1; return 0; } buf->data = new; buf->padding[new_subbuf] = 0; if (unlikely(length + buf->offset > buf->chan->subbuf_size)) goto toobig; return length; toobig: buf->chan->last_toobig = length; return 0; } EXPORT_SYMBOL_GPL(relay_switch_subbuf); /** * relay_subbufs_consumed - update the buffer's sub-buffers-consumed count * @chan: the channel * @cpu: the cpu associated with the channel buffer to update * @subbufs_consumed: number of sub-buffers to add to current buf's count * * Adds to the channel buffer's consumed sub-buffer count. * subbufs_consumed should be the number of sub-buffers newly consumed, * not the total consumed. * * NOTE. Kernel clients don't need to call this function if the channel * mode is 'overwrite'. */ void relay_subbufs_consumed(struct rchan *chan, unsigned int cpu, size_t subbufs_consumed) { struct rchan_buf *buf; if (!chan || cpu >= NR_CPUS) return; buf = *per_cpu_ptr(chan->buf, cpu); if (!buf || subbufs_consumed > chan->n_subbufs) return; if (subbufs_consumed > buf->subbufs_produced - buf->subbufs_consumed) buf->subbufs_consumed = buf->subbufs_produced; else buf->subbufs_consumed += subbufs_consumed; } EXPORT_SYMBOL_GPL(relay_subbufs_consumed); /** * relay_close - close the channel * @chan: the channel * * Closes all channel buffers and frees the channel. */ void relay_close(struct rchan *chan) { struct rchan_buf *buf; unsigned int i; if (!chan) return; mutex_lock(&relay_channels_mutex); if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) relay_close_buf(buf); else for_each_possible_cpu(i) if ((buf = *per_cpu_ptr(chan->buf, i))) relay_close_buf(buf); if (chan->last_toobig) printk(KERN_WARNING "relay: one or more items not logged " "[item size (%zd) > sub-buffer size (%zd)]\n", chan->last_toobig, chan->subbuf_size); list_del(&chan->list); kref_put(&chan->kref, relay_destroy_channel); mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_close); /** * relay_flush - close the channel * @chan: the channel * * Flushes all channel buffers, i.e. forces buffer switch. */ void relay_flush(struct rchan *chan) { struct rchan_buf *buf; unsigned int i; if (!chan) return; if (chan->is_global && (buf = *per_cpu_ptr(chan->buf, 0))) { relay_switch_subbuf(buf, 0); return; } mutex_lock(&relay_channels_mutex); for_each_possible_cpu(i) if ((buf = *per_cpu_ptr(chan->buf, i))) relay_switch_subbuf(buf, 0); mutex_unlock(&relay_channels_mutex); } EXPORT_SYMBOL_GPL(relay_flush); /** * relay_file_open - open file op for relay files * @inode: the inode * @filp: the file * * Increments the channel buffer refcount. */ static int relay_file_open(struct inode *inode, struct file *filp) { struct rchan_buf *buf = inode->i_private; kref_get(&buf->kref); filp->private_data = buf; return nonseekable_open(inode, filp); } /** * relay_file_mmap - mmap file op for relay files * @filp: the file * @vma: the vma describing what to map * * Calls upon relay_mmap_buf() to map the file into user space. */ static int relay_file_mmap(struct file *filp, struct vm_area_struct *vma) { struct rchan_buf *buf = filp->private_data; return relay_mmap_buf(buf, vma); } /** * relay_file_poll - poll file op for relay files * @filp: the file * @wait: poll table * * Poll implemention. */ static __poll_t relay_file_poll(struct file *filp, poll_table *wait) { __poll_t mask = 0; struct rchan_buf *buf = filp->private_data; if (buf->finalized) return EPOLLERR; if (filp->f_mode & FMODE_READ) { poll_wait(filp, &buf->read_wait, wait); if (!relay_buf_empty(buf)) mask |= EPOLLIN | EPOLLRDNORM; } return mask; } /** * relay_file_release - release file op for relay files * @inode: the inode * @filp: the file * * Decrements the channel refcount, as the filesystem is * no longer using it. */ static int relay_file_release(struct inode *inode, struct file *filp) { struct rchan_buf *buf = filp->private_data; kref_put(&buf->kref, relay_remove_buf); return 0; } /* * relay_file_read_consume - update the consumed count for the buffer */ static void relay_file_read_consume(struct rchan_buf *buf, size_t read_pos, size_t bytes_consumed) { size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; size_t read_subbuf; if (buf->subbufs_produced == buf->subbufs_consumed && buf->offset == buf->bytes_consumed) return; if (buf->bytes_consumed + bytes_consumed > subbuf_size) { relay_subbufs_consumed(buf->chan, buf->cpu, 1); buf->bytes_consumed = 0; } buf->bytes_consumed += bytes_consumed; if (!read_pos) read_subbuf = buf->subbufs_consumed % n_subbufs; else read_subbuf = read_pos / buf->chan->subbuf_size; if (buf->bytes_consumed + buf->padding[read_subbuf] == subbuf_size) { if ((read_subbuf == buf->subbufs_produced % n_subbufs) && (buf->offset == subbuf_size)) return; relay_subbufs_consumed(buf->chan, buf->cpu, 1); buf->bytes_consumed = 0; } } /* * relay_file_read_avail - boolean, are there unconsumed bytes available? */ static int relay_file_read_avail(struct rchan_buf *buf) { size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; size_t produced = buf->subbufs_produced; size_t consumed; relay_file_read_consume(buf, 0, 0); consumed = buf->subbufs_consumed; if (unlikely(buf->offset > subbuf_size)) { if (produced == consumed) return 0; return 1; } if (unlikely(produced - consumed >= n_subbufs)) { consumed = produced - n_subbufs + 1; buf->subbufs_consumed = consumed; buf->bytes_consumed = 0; } produced = (produced % n_subbufs) * subbuf_size + buf->offset; consumed = (consumed % n_subbufs) * subbuf_size + buf->bytes_consumed; if (consumed > produced) produced += n_subbufs * subbuf_size; if (consumed == produced) { if (buf->offset == subbuf_size && buf->subbufs_produced > buf->subbufs_consumed) return 1; return 0; } return 1; } /** * relay_file_read_subbuf_avail - return bytes available in sub-buffer * @read_pos: file read position * @buf: relay channel buffer */ static size_t relay_file_read_subbuf_avail(size_t read_pos, struct rchan_buf *buf) { size_t padding, avail = 0; size_t read_subbuf, read_offset, write_subbuf, write_offset; size_t subbuf_size = buf->chan->subbuf_size; write_subbuf = (buf->data - buf->start) / subbuf_size; write_offset = buf->offset > subbuf_size ? subbuf_size : buf->offset; read_subbuf = read_pos / subbuf_size; read_offset = read_pos % subbuf_size; padding = buf->padding[read_subbuf]; if (read_subbuf == write_subbuf) { if (read_offset + padding < write_offset) avail = write_offset - (read_offset + padding); } else avail = (subbuf_size - padding) - read_offset; return avail; } /** * relay_file_read_start_pos - find the first available byte to read * @buf: relay channel buffer * * If the read_pos is in the middle of padding, return the * position of the first actually available byte, otherwise * return the original value. */ static size_t relay_file_read_start_pos(struct rchan_buf *buf) { size_t read_subbuf, padding, padding_start, padding_end; size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; size_t consumed = buf->subbufs_consumed % n_subbufs; size_t read_pos = (consumed * subbuf_size + buf->bytes_consumed) % (n_subbufs * subbuf_size); read_subbuf = read_pos / subbuf_size; padding = buf->padding[read_subbuf]; padding_start = (read_subbuf + 1) * subbuf_size - padding; padding_end = (read_subbuf + 1) * subbuf_size; if (read_pos >= padding_start && read_pos < padding_end) { read_subbuf = (read_subbuf + 1) % n_subbufs; read_pos = read_subbuf * subbuf_size; } return read_pos; } /** * relay_file_read_end_pos - return the new read position * @read_pos: file read position * @buf: relay channel buffer * @count: number of bytes to be read */ static size_t relay_file_read_end_pos(struct rchan_buf *buf, size_t read_pos, size_t count) { size_t read_subbuf, padding, end_pos; size_t subbuf_size = buf->chan->subbuf_size; size_t n_subbufs = buf->chan->n_subbufs; read_subbuf = read_pos / subbuf_size; padding = buf->padding[read_subbuf]; if (read_pos % subbuf_size + count + padding == subbuf_size) end_pos = (read_subbuf + 1) * subbuf_size; else end_pos = read_pos + count; if (end_pos >= subbuf_size * n_subbufs) end_pos = 0; return end_pos; } static ssize_t relay_file_read(struct file *filp, char __user *buffer, size_t count, loff_t *ppos) { struct rchan_buf *buf = filp->private_data; size_t read_start, avail; size_t written = 0; int ret; if (!count) return 0; inode_lock(file_inode(filp)); do { void *from; if (!relay_file_read_avail(buf)) break; read_start = relay_file_read_start_pos(buf); avail = relay_file_read_subbuf_avail(read_start, buf); if (!avail) break; avail = min(count, avail); from = buf->start + read_start; ret = avail; if (copy_to_user(buffer, from, avail)) break; buffer += ret; written += ret; count -= ret; relay_file_read_consume(buf, read_start, ret); *ppos = relay_file_read_end_pos(buf, read_start, ret); } while (count); inode_unlock(file_inode(filp)); return written; } const struct file_operations relay_file_operations = { .open = relay_file_open, .poll = relay_file_poll, .mmap = relay_file_mmap, .read = relay_file_read, .release = relay_file_release, }; EXPORT_SYMBOL_GPL(relay_file_operations); |
3 3 2 1 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 | // SPDX-License-Identifier: GPL-2.0+ /****************************************************************************** * speedtch.c - Alcatel SpeedTouch USB xDSL modem driver * * Copyright (C) 2001, Alcatel * Copyright (C) 2003, Duncan Sands * Copyright (C) 2004, David Woodhouse * * Based on "modem_run.c", copyright (C) 2001, Benoit Papillault ******************************************************************************/ #include <asm/page.h> #include <linux/device.h> #include <linux/errno.h> #include <linux/firmware.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/timer.h> #include <linux/types.h> #include <linux/usb/ch9.h> #include <linux/workqueue.h> #include "usbatm.h" #define DRIVER_AUTHOR "Johan Verrept, Duncan Sands <duncan.sands@free.fr>" #define DRIVER_DESC "Alcatel SpeedTouch USB driver" static const char speedtch_driver_name[] = "speedtch"; #define CTRL_TIMEOUT 2000 /* milliseconds */ #define DATA_TIMEOUT 2000 /* milliseconds */ #define OFFSET_7 0 /* size 1 */ #define OFFSET_b 1 /* size 8 */ #define OFFSET_d 9 /* size 4 */ #define OFFSET_e 13 /* size 1 */ #define OFFSET_f 14 /* size 1 */ #define SIZE_7 1 #define SIZE_b 8 #define SIZE_d 4 #define SIZE_e 1 #define SIZE_f 1 #define MIN_POLL_DELAY 5000 /* milliseconds */ #define MAX_POLL_DELAY 60000 /* milliseconds */ #define RESUBMIT_DELAY 1000 /* milliseconds */ #define DEFAULT_BULK_ALTSETTING 1 #define DEFAULT_ISOC_ALTSETTING 3 #define DEFAULT_DL_512_FIRST 0 #define DEFAULT_ENABLE_ISOC 0 #define DEFAULT_SW_BUFFERING 0 static unsigned int altsetting = 0; /* zero means: use the default */ static bool dl_512_first = DEFAULT_DL_512_FIRST; static bool enable_isoc = DEFAULT_ENABLE_ISOC; static bool sw_buffering = DEFAULT_SW_BUFFERING; #define DEFAULT_B_MAX_DSL 8128 #define DEFAULT_MODEM_MODE 11 #define MODEM_OPTION_LENGTH 16 static const unsigned char DEFAULT_MODEM_OPTION[MODEM_OPTION_LENGTH] = { 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; static unsigned int BMaxDSL = DEFAULT_B_MAX_DSL; static unsigned char ModemMode = DEFAULT_MODEM_MODE; static unsigned char ModemOption[MODEM_OPTION_LENGTH]; static unsigned int num_ModemOption; module_param(altsetting, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(altsetting, "Alternative setting for data interface (bulk_default: " __MODULE_STRING(DEFAULT_BULK_ALTSETTING) "; isoc_default: " __MODULE_STRING(DEFAULT_ISOC_ALTSETTING) ")"); module_param(dl_512_first, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(dl_512_first, "Read 512 bytes before sending firmware (default: " __MODULE_STRING(DEFAULT_DL_512_FIRST) ")"); module_param(enable_isoc, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(enable_isoc, "Use isochronous transfers if available (default: " __MODULE_STRING(DEFAULT_ENABLE_ISOC) ")"); module_param(sw_buffering, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(sw_buffering, "Enable software buffering (default: " __MODULE_STRING(DEFAULT_SW_BUFFERING) ")"); module_param(BMaxDSL, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(BMaxDSL, "default: " __MODULE_STRING(DEFAULT_B_MAX_DSL)); module_param(ModemMode, byte, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(ModemMode, "default: " __MODULE_STRING(DEFAULT_MODEM_MODE)); module_param_array(ModemOption, byte, &num_ModemOption, S_IRUGO); MODULE_PARM_DESC(ModemOption, "default: 0x10,0x00,0x00,0x00,0x20"); #define INTERFACE_DATA 1 #define ENDPOINT_INT 0x81 #define ENDPOINT_BULK_DATA 0x07 #define ENDPOINT_ISOC_DATA 0x07 #define ENDPOINT_FIRMWARE 0x05 struct speedtch_params { unsigned int altsetting; unsigned int BMaxDSL; unsigned char ModemMode; unsigned char ModemOption[MODEM_OPTION_LENGTH]; }; struct speedtch_instance_data { struct usbatm_data *usbatm; struct speedtch_params params; /* set in probe, constant afterwards */ struct timer_list status_check_timer; struct work_struct status_check_work; unsigned char last_status; int poll_delay; /* milliseconds */ struct timer_list resubmit_timer; struct urb *int_urb; unsigned char int_data[16]; unsigned char scratch_buffer[16]; }; /*************** ** firmware ** ***************/ static void speedtch_set_swbuff(struct speedtch_instance_data *instance, int state) { struct usbatm_data *usbatm = instance->usbatm; struct usb_device *usb_dev = usbatm->usb_dev; int ret; ret = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), 0x32, 0x40, state ? 0x01 : 0x00, 0x00, NULL, 0, CTRL_TIMEOUT); if (ret < 0) usb_warn(usbatm, "%sabling SW buffering: usb_control_msg returned %d\n", state ? "En" : "Dis", ret); else usb_dbg(usbatm, "speedtch_set_swbuff: %sbled SW buffering\n", state ? "En" : "Dis"); } static void speedtch_test_sequence(struct speedtch_instance_data *instance) { struct usbatm_data *usbatm = instance->usbatm; struct usb_device *usb_dev = usbatm->usb_dev; unsigned char *buf = instance->scratch_buffer; int ret; /* URB 147 */ buf[0] = 0x1c; buf[1] = 0x50; ret = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), 0x01, 0x40, 0x0b, 0x00, buf, 2, CTRL_TIMEOUT); if (ret < 0) usb_warn(usbatm, "%s failed on URB147: %d\n", __func__, ret); /* URB 148 */ buf[0] = 0x32; buf[1] = 0x00; ret = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), 0x01, 0x40, 0x02, 0x00, buf, 2, CTRL_TIMEOUT); if (ret < 0) usb_warn(usbatm, "%s failed on URB148: %d\n", __func__, ret); /* URB 149 */ buf[0] = 0x01; buf[1] = 0x00; buf[2] = 0x01; ret = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), 0x01, 0x40, 0x03, 0x00, buf, 3, CTRL_TIMEOUT); if (ret < 0) usb_warn(usbatm, "%s failed on URB149: %d\n", __func__, ret); /* URB 150 */ buf[0] = 0x01; buf[1] = 0x00; buf[2] = 0x01; ret = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), 0x01, 0x40, 0x04, 0x00, buf, 3, CTRL_TIMEOUT); if (ret < 0) usb_warn(usbatm, "%s failed on URB150: %d\n", __func__, ret); /* Extra initialisation in recent drivers - gives higher speeds */ /* URBext1 */ buf[0] = instance->params.ModemMode; ret = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), 0x01, 0x40, 0x11, 0x00, buf, 1, CTRL_TIMEOUT); if (ret < 0) usb_warn(usbatm, "%s failed on URBext1: %d\n", __func__, ret); /* URBext2 */ /* This seems to be the one which actually triggers the higher sync rate -- it does require the new firmware too, although it works OK with older firmware */ ret = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), 0x01, 0x40, 0x14, 0x00, instance->params.ModemOption, MODEM_OPTION_LENGTH, CTRL_TIMEOUT); if (ret < 0) usb_warn(usbatm, "%s failed on URBext2: %d\n", __func__, ret); /* URBext3 */ buf[0] = instance->params.BMaxDSL & 0xff; buf[1] = instance->params.BMaxDSL >> 8; ret = usb_control_msg(usb_dev, usb_sndctrlpipe(usb_dev, 0), 0x01, 0x40, 0x12, 0x00, buf, 2, CTRL_TIMEOUT); if (ret < 0) usb_warn(usbatm, "%s failed on URBext3: %d\n", __func__, ret); } static int speedtch_upload_firmware(struct speedtch_instance_data *instance, const struct firmware *fw1, const struct firmware *fw2) { unsigned char *buffer; struct usbatm_data *usbatm = instance->usbatm; struct usb_device *usb_dev = usbatm->usb_dev; int actual_length; int ret = 0; int offset; usb_dbg(usbatm, "%s entered\n", __func__); buffer = (unsigned char *)__get_free_page(GFP_KERNEL); if (!buffer) { ret = -ENOMEM; usb_dbg(usbatm, "%s: no memory for buffer!\n", __func__); goto out; } if (!usb_ifnum_to_if(usb_dev, 2)) { ret = -ENODEV; usb_dbg(usbatm, "%s: interface not found!\n", __func__); goto out_free; } /* URB 7 */ if (dl_512_first) { /* some modems need a read before writing the firmware */ ret = usb_bulk_msg(usb_dev, usb_rcvbulkpipe(usb_dev, ENDPOINT_FIRMWARE), buffer, 0x200, &actual_length, 2000); if (ret < 0 && ret != -ETIMEDOUT) usb_warn(usbatm, "%s: read BLOCK0 from modem failed (%d)!\n", __func__, ret); else usb_dbg(usbatm, "%s: BLOCK0 downloaded (%d bytes)\n", __func__, ret); } /* URB 8 : both leds are static green */ for (offset = 0; offset < fw1->size; offset += PAGE_SIZE) { int thislen = min_t(int, PAGE_SIZE, fw1->size - offset); memcpy(buffer, fw1->data + offset, thislen); ret = usb_bulk_msg(usb_dev, usb_sndbulkpipe(usb_dev, ENDPOINT_FIRMWARE), buffer, thislen, &actual_length, DATA_TIMEOUT); if (ret < 0) { usb_err(usbatm, "%s: write BLOCK1 to modem failed (%d)!\n", __func__, ret); goto out_free; } usb_dbg(usbatm, "%s: BLOCK1 uploaded (%zu bytes)\n", __func__, fw1->size); } /* USB led blinking green, ADSL led off */ /* URB 11 */ ret = usb_bulk_msg(usb_dev, usb_rcvbulkpipe(usb_dev, ENDPOINT_FIRMWARE), buffer, 0x200, &actual_length, DATA_TIMEOUT); if (ret < 0) { usb_err(usbatm, "%s: read BLOCK2 from modem failed (%d)!\n", __func__, ret); goto out_free; } usb_dbg(usbatm, "%s: BLOCK2 downloaded (%d bytes)\n", __func__, actual_length); /* URBs 12 to 139 - USB led blinking green, ADSL led off */ for (offset = 0; offset < fw2->size; offset += PAGE_SIZE) { int thislen = min_t(int, PAGE_SIZE, fw2->size - offset); memcpy(buffer, fw2->data + offset, thislen); ret = usb_bulk_msg(usb_dev, usb_sndbulkpipe(usb_dev, ENDPOINT_FIRMWARE), buffer, thislen, &actual_length, DATA_TIMEOUT); if (ret < 0) { usb_err(usbatm, "%s: write BLOCK3 to modem failed (%d)!\n", __func__, ret); goto out_free; } } usb_dbg(usbatm, "%s: BLOCK3 uploaded (%zu bytes)\n", __func__, fw2->size); /* USB led static green, ADSL led static red */ /* URB 142 */ ret = usb_bulk_msg(usb_dev, usb_rcvbulkpipe(usb_dev, ENDPOINT_FIRMWARE), buffer, 0x200, &actual_length, DATA_TIMEOUT); if (ret < 0) { usb_err(usbatm, "%s: read BLOCK4 from modem failed (%d)!\n", __func__, ret); goto out_free; } /* success */ usb_dbg(usbatm, "%s: BLOCK4 downloaded (%d bytes)\n", __func__, actual_length); /* Delay to allow firmware to start up. We can do this here because we're in our own kernel thread anyway. */ msleep_interruptible(1000); if ((ret = usb_set_interface(usb_dev, INTERFACE_DATA, instance->params.altsetting)) < 0) { usb_err(usbatm, "%s: setting interface to %d failed (%d)!\n", __func__, instance->params.altsetting, ret); goto out_free; } /* Enable software buffering, if requested */ if (sw_buffering) speedtch_set_swbuff(instance, 1); /* Magic spell; don't ask us what this does */ speedtch_test_sequence(instance); ret = 0; out_free: free_page((unsigned long)buffer); out: return ret; } static int speedtch_find_firmware(struct usbatm_data *usbatm, struct usb_interface *intf, int phase, const struct firmware **fw_p) { struct device *dev = &intf->dev; const u16 bcdDevice = le16_to_cpu(interface_to_usbdev(intf)->descriptor.bcdDevice); const u8 major_revision = bcdDevice >> 8; const u8 minor_revision = bcdDevice & 0xff; char buf[24]; sprintf(buf, "speedtch-%d.bin.%x.%02x", phase, major_revision, minor_revision); usb_dbg(usbatm, "%s: looking for %s\n", __func__, buf); if (request_firmware(fw_p, buf, dev)) { sprintf(buf, "speedtch-%d.bin.%x", phase, major_revision); usb_dbg(usbatm, "%s: looking for %s\n", __func__, buf); if (request_firmware(fw_p, buf, dev)) { sprintf(buf, "speedtch-%d.bin", phase); usb_dbg(usbatm, "%s: looking for %s\n", __func__, buf); if (request_firmware(fw_p, buf, dev)) { usb_err(usbatm, "%s: no stage %d firmware found!\n", __func__, phase); return -ENOENT; } } } usb_info(usbatm, "found stage %d firmware %s\n", phase, buf); return 0; } static int speedtch_heavy_init(struct usbatm_data *usbatm, struct usb_interface *intf) { const struct firmware *fw1, *fw2; struct speedtch_instance_data *instance = usbatm->driver_data; int ret; if ((ret = speedtch_find_firmware(usbatm, intf, 1, &fw1)) < 0) return ret; if ((ret = speedtch_find_firmware(usbatm, intf, 2, &fw2)) < 0) { release_firmware(fw1); return ret; } if ((ret = speedtch_upload_firmware(instance, fw1, fw2)) < 0) usb_err(usbatm, "%s: firmware upload failed (%d)!\n", __func__, ret); release_firmware(fw2); release_firmware(fw1); return ret; } /********** ** ATM ** **********/ static int speedtch_read_status(struct speedtch_instance_data *instance) { struct usbatm_data *usbatm = instance->usbatm; struct usb_device *usb_dev = usbatm->usb_dev; unsigned char *buf = instance->scratch_buffer; int ret; memset(buf, 0, 16); ret = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), 0x12, 0xc0, 0x07, 0x00, buf + OFFSET_7, SIZE_7, CTRL_TIMEOUT); if (ret < 0) { atm_dbg(usbatm, "%s: MSG 7 failed\n", __func__); return ret; } ret = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), 0x12, 0xc0, 0x0b, 0x00, buf + OFFSET_b, SIZE_b, CTRL_TIMEOUT); if (ret < 0) { atm_dbg(usbatm, "%s: MSG B failed\n", __func__); return ret; } ret = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), 0x12, 0xc0, 0x0d, 0x00, buf + OFFSET_d, SIZE_d, CTRL_TIMEOUT); if (ret < 0) { atm_dbg(usbatm, "%s: MSG D failed\n", __func__); return ret; } ret = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), 0x01, 0xc0, 0x0e, 0x00, buf + OFFSET_e, SIZE_e, CTRL_TIMEOUT); if (ret < 0) { atm_dbg(usbatm, "%s: MSG E failed\n", __func__); return ret; } ret = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), 0x01, 0xc0, 0x0f, 0x00, buf + OFFSET_f, SIZE_f, CTRL_TIMEOUT); if (ret < 0) { atm_dbg(usbatm, "%s: MSG F failed\n", __func__); return ret; } return 0; } static int speedtch_start_synchro(struct speedtch_instance_data *instance) { struct usbatm_data *usbatm = instance->usbatm; struct usb_device *usb_dev = usbatm->usb_dev; unsigned char *buf = instance->scratch_buffer; int ret; atm_dbg(usbatm, "%s entered\n", __func__); memset(buf, 0, 2); ret = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), 0x12, 0xc0, 0x04, 0x00, buf, 2, CTRL_TIMEOUT); if (ret < 0) atm_warn(usbatm, "failed to start ADSL synchronisation: %d\n", ret); else atm_dbg(usbatm, "%s: modem prodded. %d bytes returned: %02x %02x\n", __func__, ret, buf[0], buf[1]); return ret; } static void speedtch_check_status(struct work_struct *work) { struct speedtch_instance_data *instance = container_of(work, struct speedtch_instance_data, status_check_work); struct usbatm_data *usbatm = instance->usbatm; struct atm_dev *atm_dev = usbatm->atm_dev; unsigned char *buf = instance->scratch_buffer; int down_speed, up_speed, ret; unsigned char status; #ifdef VERBOSE_DEBUG atm_dbg(usbatm, "%s entered\n", __func__); #endif ret = speedtch_read_status(instance); if (ret < 0) { atm_warn(usbatm, "error %d fetching device status\n", ret); instance->poll_delay = min(2 * instance->poll_delay, MAX_POLL_DELAY); return; } instance->poll_delay = max(instance->poll_delay / 2, MIN_POLL_DELAY); status = buf[OFFSET_7]; if ((status != instance->last_status) || !status) { atm_dbg(usbatm, "%s: line state 0x%02x\n", __func__, status); switch (status) { case 0: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_LOST); if (instance->last_status) atm_info(usbatm, "ADSL line is down\n"); /* It may never resync again unless we ask it to... */ ret = speedtch_start_synchro(instance); break; case 0x08: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_UNKNOWN); atm_info(usbatm, "ADSL line is blocked?\n"); break; case 0x10: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_LOST); atm_info(usbatm, "ADSL line is synchronising\n"); break; case 0x20: down_speed = buf[OFFSET_b] | (buf[OFFSET_b + 1] << 8) | (buf[OFFSET_b + 2] << 16) | (buf[OFFSET_b + 3] << 24); up_speed = buf[OFFSET_b + 4] | (buf[OFFSET_b + 5] << 8) | (buf[OFFSET_b + 6] << 16) | (buf[OFFSET_b + 7] << 24); if (!(down_speed & 0x0000ffff) && !(up_speed & 0x0000ffff)) { down_speed >>= 16; up_speed >>= 16; } atm_dev->link_rate = down_speed * 1000 / 424; atm_dev_signal_change(atm_dev, ATM_PHY_SIG_FOUND); atm_info(usbatm, "ADSL line is up (%d kb/s down | %d kb/s up)\n", down_speed, up_speed); break; default: atm_dev_signal_change(atm_dev, ATM_PHY_SIG_UNKNOWN); atm_info(usbatm, "unknown line state %02x\n", status); break; } instance->last_status = status; } } static void speedtch_status_poll(struct timer_list *t) { struct speedtch_instance_data *instance = from_timer(instance, t, status_check_timer); schedule_work(&instance->status_check_work); /* The following check is racy, but the race is harmless */ if (instance->poll_delay < MAX_POLL_DELAY) mod_timer(&instance->status_check_timer, jiffies + msecs_to_jiffies(instance->poll_delay)); else atm_warn(instance->usbatm, "Too many failures - disabling line status polling\n"); } static void speedtch_resubmit_int(struct timer_list *t) { struct speedtch_instance_data *instance = from_timer(instance, t, resubmit_timer); struct urb *int_urb = instance->int_urb; int ret; atm_dbg(instance->usbatm, "%s entered\n", __func__); if (int_urb) { ret = usb_submit_urb(int_urb, GFP_ATOMIC); if (!ret) schedule_work(&instance->status_check_work); else { atm_dbg(instance->usbatm, "%s: usb_submit_urb failed with result %d\n", __func__, ret); mod_timer(&instance->resubmit_timer, jiffies + msecs_to_jiffies(RESUBMIT_DELAY)); } } } static void speedtch_handle_int(struct urb *int_urb) { struct speedtch_instance_data *instance = int_urb->context; struct usbatm_data *usbatm = instance->usbatm; unsigned int count = int_urb->actual_length; int status = int_urb->status; int ret; /* The magic interrupt for "up state" */ static const unsigned char up_int[6] = { 0xa1, 0x00, 0x01, 0x00, 0x00, 0x00 }; /* The magic interrupt for "down state" */ static const unsigned char down_int[6] = { 0xa1, 0x00, 0x00, 0x00, 0x00, 0x00 }; atm_dbg(usbatm, "%s entered\n", __func__); if (status < 0) { atm_dbg(usbatm, "%s: nonzero urb status %d!\n", __func__, status); goto fail; } if ((count == 6) && !memcmp(up_int, instance->int_data, 6)) { del_timer(&instance->status_check_timer); atm_info(usbatm, "DSL line goes up\n"); } else if ((count == 6) && !memcmp(down_int, instance->int_data, 6)) { atm_info(usbatm, "DSL line goes down\n"); } else { int i; atm_dbg(usbatm, "%s: unknown interrupt packet of length %d:", __func__, count); for (i = 0; i < count; i++) printk(" %02x", instance->int_data[i]); printk("\n"); goto fail; } int_urb = instance->int_urb; if (int_urb) { ret = usb_submit_urb(int_urb, GFP_ATOMIC); schedule_work(&instance->status_check_work); if (ret < 0) { atm_dbg(usbatm, "%s: usb_submit_urb failed with result %d\n", __func__, ret); goto fail; } } return; fail: int_urb = instance->int_urb; if (int_urb) mod_timer(&instance->resubmit_timer, jiffies + msecs_to_jiffies(RESUBMIT_DELAY)); } static int speedtch_atm_start(struct usbatm_data *usbatm, struct atm_dev *atm_dev) { struct usb_device *usb_dev = usbatm->usb_dev; struct speedtch_instance_data *instance = usbatm->driver_data; int i, ret; unsigned char mac_str[13]; atm_dbg(usbatm, "%s entered\n", __func__); /* Set MAC address, it is stored in the serial number */ memset(atm_dev->esi, 0, sizeof(atm_dev->esi)); if (usb_string(usb_dev, usb_dev->descriptor.iSerialNumber, mac_str, sizeof(mac_str)) == 12) { for (i = 0; i < 6; i++) atm_dev->esi[i] = (hex_to_bin(mac_str[i * 2]) << 4) + hex_to_bin(mac_str[i * 2 + 1]); } /* Start modem synchronisation */ ret = speedtch_start_synchro(instance); /* Set up interrupt endpoint */ if (instance->int_urb) { ret = usb_submit_urb(instance->int_urb, GFP_KERNEL); if (ret < 0) { /* Doesn't matter; we'll poll anyway */ atm_dbg(usbatm, "%s: submission of interrupt URB failed (%d)!\n", __func__, ret); usb_free_urb(instance->int_urb); instance->int_urb = NULL; } } /* Start status polling */ mod_timer(&instance->status_check_timer, jiffies + msecs_to_jiffies(1000)); return 0; } static void speedtch_atm_stop(struct usbatm_data *usbatm, struct atm_dev *atm_dev) { struct speedtch_instance_data *instance = usbatm->driver_data; struct urb *int_urb = instance->int_urb; atm_dbg(usbatm, "%s entered\n", __func__); del_timer_sync(&instance->status_check_timer); /* * Since resubmit_timer and int_urb can schedule themselves and * each other, shutting them down correctly takes some care */ instance->int_urb = NULL; /* signal shutdown */ mb(); usb_kill_urb(int_urb); del_timer_sync(&instance->resubmit_timer); /* * At this point, speedtch_handle_int and speedtch_resubmit_int * can run or be running, but instance->int_urb == NULL means that * they will not reschedule */ usb_kill_urb(int_urb); del_timer_sync(&instance->resubmit_timer); usb_free_urb(int_urb); flush_work(&instance->status_check_work); } static int speedtch_pre_reset(struct usb_interface *intf) { return 0; } static int speedtch_post_reset(struct usb_interface *intf) { return 0; } /********** ** USB ** **********/ static const struct usb_device_id speedtch_usb_ids[] = { {USB_DEVICE(0x06b9, 0x4061)}, {} }; MODULE_DEVICE_TABLE(usb, speedtch_usb_ids); static int speedtch_usb_probe(struct usb_interface *, const struct usb_device_id *); static struct usb_driver speedtch_usb_driver = { .name = speedtch_driver_name, .probe = speedtch_usb_probe, .disconnect = usbatm_usb_disconnect, .pre_reset = speedtch_pre_reset, .post_reset = speedtch_post_reset, .id_table = speedtch_usb_ids }; static void speedtch_release_interfaces(struct usb_device *usb_dev, int num_interfaces) { struct usb_interface *cur_intf; int i; for (i = 0; i < num_interfaces; i++) { cur_intf = usb_ifnum_to_if(usb_dev, i); if (cur_intf) { usb_set_intfdata(cur_intf, NULL); usb_driver_release_interface(&speedtch_usb_driver, cur_intf); } } } static int speedtch_bind(struct usbatm_data *usbatm, struct usb_interface *intf, const struct usb_device_id *id) { struct usb_device *usb_dev = interface_to_usbdev(intf); struct usb_interface *cur_intf, *data_intf; struct speedtch_instance_data *instance; int ifnum = intf->altsetting->desc.bInterfaceNumber; int num_interfaces = usb_dev->actconfig->desc.bNumInterfaces; int i, ret; int use_isoc; usb_dbg(usbatm, "%s entered\n", __func__); /* sanity checks */ if (usb_dev->descriptor.bDeviceClass != USB_CLASS_VENDOR_SPEC) { usb_err(usbatm, "%s: wrong device class %d\n", __func__, usb_dev->descriptor.bDeviceClass); return -ENODEV; } data_intf = usb_ifnum_to_if(usb_dev, INTERFACE_DATA); if (!data_intf) { usb_err(usbatm, "%s: data interface not found!\n", __func__); return -ENODEV; } /* claim all interfaces */ for (i = 0; i < num_interfaces; i++) { cur_intf = usb_ifnum_to_if(usb_dev, i); if ((i != ifnum) && cur_intf) { ret = usb_driver_claim_interface(&speedtch_usb_driver, cur_intf, usbatm); if (ret < 0) { usb_err(usbatm, "%s: failed to claim interface %2d (%d)!\n", __func__, i, ret); speedtch_release_interfaces(usb_dev, i); return ret; } } } instance = kzalloc(sizeof(*instance), GFP_KERNEL); if (!instance) { ret = -ENOMEM; goto fail_release; } instance->usbatm = usbatm; /* module parameters may change at any moment, so take a snapshot */ instance->params.altsetting = altsetting; instance->params.BMaxDSL = BMaxDSL; instance->params.ModemMode = ModemMode; memcpy(instance->params.ModemOption, DEFAULT_MODEM_OPTION, MODEM_OPTION_LENGTH); memcpy(instance->params.ModemOption, ModemOption, num_ModemOption); use_isoc = enable_isoc; if (instance->params.altsetting) if ((ret = usb_set_interface(usb_dev, INTERFACE_DATA, instance->params.altsetting)) < 0) { usb_err(usbatm, "%s: setting interface to %2d failed (%d)!\n", __func__, instance->params.altsetting, ret); instance->params.altsetting = 0; /* fall back to default */ } if (!instance->params.altsetting && use_isoc) if ((ret = usb_set_interface(usb_dev, INTERFACE_DATA, DEFAULT_ISOC_ALTSETTING)) < 0) { usb_dbg(usbatm, "%s: setting interface to %2d failed (%d)!\n", __func__, DEFAULT_ISOC_ALTSETTING, ret); use_isoc = 0; /* fall back to bulk */ } if (use_isoc) { const struct usb_host_interface *desc = data_intf->cur_altsetting; const __u8 target_address = USB_DIR_IN | usbatm->driver->isoc_in; use_isoc = 0; /* fall back to bulk if endpoint not found */ for (i = 0; i < desc->desc.bNumEndpoints; i++) { const struct usb_endpoint_descriptor *endpoint_desc = &desc->endpoint[i].desc; if ((endpoint_desc->bEndpointAddress == target_address)) { use_isoc = usb_endpoint_xfer_isoc(endpoint_desc); break; } } if (!use_isoc) usb_info(usbatm, "isochronous transfer not supported - using bulk\n"); } if (!use_isoc && !instance->params.altsetting) if ((ret = usb_set_interface(usb_dev, INTERFACE_DATA, DEFAULT_BULK_ALTSETTING)) < 0) { usb_err(usbatm, "%s: setting interface to %2d failed (%d)!\n", __func__, DEFAULT_BULK_ALTSETTING, ret); goto fail_free; } if (!instance->params.altsetting) instance->params.altsetting = use_isoc ? DEFAULT_ISOC_ALTSETTING : DEFAULT_BULK_ALTSETTING; usbatm->flags |= (use_isoc ? UDSL_USE_ISOC : 0); INIT_WORK(&instance->status_check_work, speedtch_check_status); timer_setup(&instance->status_check_timer, speedtch_status_poll, 0); instance->last_status = 0xff; instance->poll_delay = MIN_POLL_DELAY; timer_setup(&instance->resubmit_timer, speedtch_resubmit_int, 0); instance->int_urb = usb_alloc_urb(0, GFP_KERNEL); if (instance->int_urb) usb_fill_int_urb(instance->int_urb, usb_dev, usb_rcvintpipe(usb_dev, ENDPOINT_INT), instance->int_data, sizeof(instance->int_data), speedtch_handle_int, instance, 16); else usb_dbg(usbatm, "%s: no memory for interrupt urb!\n", __func__); /* check whether the modem already seems to be alive */ ret = usb_control_msg(usb_dev, usb_rcvctrlpipe(usb_dev, 0), 0x12, 0xc0, 0x07, 0x00, instance->scratch_buffer + OFFSET_7, SIZE_7, 500); usbatm->flags |= (ret == SIZE_7 ? UDSL_SKIP_HEAVY_INIT : 0); usb_dbg(usbatm, "%s: firmware %s loaded\n", __func__, usbatm->flags & UDSL_SKIP_HEAVY_INIT ? "already" : "not"); if (!(usbatm->flags & UDSL_SKIP_HEAVY_INIT)) if ((ret = usb_reset_device(usb_dev)) < 0) { usb_err(usbatm, "%s: device reset failed (%d)!\n", __func__, ret); goto fail_free; } usbatm->driver_data = instance; return 0; fail_free: usb_free_urb(instance->int_urb); kfree(instance); fail_release: speedtch_release_interfaces(usb_dev, num_interfaces); return ret; } static void speedtch_unbind(struct usbatm_data *usbatm, struct usb_interface *intf) { struct usb_device *usb_dev = interface_to_usbdev(intf); struct speedtch_instance_data *instance = usbatm->driver_data; usb_dbg(usbatm, "%s entered\n", __func__); speedtch_release_interfaces(usb_dev, usb_dev->actconfig->desc.bNumInterfaces); usb_free_urb(instance->int_urb); kfree(instance); } /*********** ** init ** ***********/ static struct usbatm_driver speedtch_usbatm_driver = { .driver_name = speedtch_driver_name, .bind = speedtch_bind, .heavy_init = speedtch_heavy_init, .unbind = speedtch_unbind, .atm_start = speedtch_atm_start, .atm_stop = speedtch_atm_stop, .bulk_in = ENDPOINT_BULK_DATA, .bulk_out = ENDPOINT_BULK_DATA, .isoc_in = ENDPOINT_ISOC_DATA }; static int speedtch_usb_probe(struct usb_interface *intf, const struct usb_device_id *id) { return usbatm_usb_probe(intf, id, &speedtch_usbatm_driver); } module_usb_driver(speedtch_usb_driver); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); MODULE_LICENSE("GPL"); |
334 12 313 14 6 19 4 4 32 45 45 44 45 66 11 69 3 27 42 57 57 57 57 57 57 57 27 10 19 14 15 10 19 27 27 14 7 9 4 2 3 1 3 15 1 248 2 28 2 9 5 56 6 245 248 249 4 3 245 5 245 246 32 202 37 192 202 161 62 179 38 192 2 201 204 249 251 202 204 147 62 3 3 6 59 98 3 93 2 25 2 26 6 4 66 11 259 39 3 82 82 76 55 1 78 3 5 1 28 69 18 61 63 27 29 13 16 5 24 2 1 2 2 5 2 1 1 1 6 3 3 6 7 2 5 4 1 1 2 1 1 2 89 1 89 89 48 46 6 83 85 3 62 1 1 60 51 2 2 48 2 1 1 74 9 57 48 59 8 84 2 2 2 2 78 8 76 2 78 33 2 2 2 1 27 1 23 1 27 57 56 1 24 2 2 7 2 4 1 2 2 3 1 2 4 3 1 3 2 1 32 4 33 8 1 7 4 5 1 1 13 3 10 3 4 5 9 2 7 6 51 2 49 18 31 47 1 37 15 2 17 38 4 5 13 34 63 44 8 11 5 58 9 2 1 6 9 2 1 1 3 2 3 7 1 4 4 5 2 2 1 182 179 2 4 175 79 108 58 42 42 41 43 43 27 26 26 218 219 212 144 142 14 14 4 4 4 4 3 4 1 4 4 4 4 4 4 4 4 4 4 4 4 4 1 4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 | // SPDX-License-Identifier: GPL-2.0-or-later /* * ALSA sequencer Client Manager * Copyright (c) 1998-2001 by Frank van de Pol <fvdpol@coil.demon.nl> * Jaroslav Kysela <perex@perex.cz> * Takashi Iwai <tiwai@suse.de> */ #include <linux/init.h> #include <linux/export.h> #include <linux/slab.h> #include <sound/core.h> #include <sound/minors.h> #include <linux/kmod.h> #include <sound/seq_kernel.h> #include <sound/ump.h> #include "seq_clientmgr.h" #include "seq_memory.h" #include "seq_queue.h" #include "seq_timer.h" #include "seq_info.h" #include "seq_system.h" #include "seq_ump_convert.h" #include <sound/seq_device.h> #ifdef CONFIG_COMPAT #include <linux/compat.h> #endif /* Client Manager * this module handles the connections of userland and kernel clients * */ /* * There are four ranges of client numbers (last two shared): * 0..15: global clients * 16..127: statically allocated client numbers for cards 0..27 * 128..191: dynamically allocated client numbers for cards 28..31 * 128..191: dynamically allocated client numbers for applications */ /* number of kernel non-card clients */ #define SNDRV_SEQ_GLOBAL_CLIENTS 16 /* clients per cards, for static clients */ #define SNDRV_SEQ_CLIENTS_PER_CARD 4 /* dynamically allocated client numbers (both kernel drivers and user space) */ #define SNDRV_SEQ_DYNAMIC_CLIENTS_BEGIN 128 #define SNDRV_SEQ_LFLG_INPUT 0x0001 #define SNDRV_SEQ_LFLG_OUTPUT 0x0002 #define SNDRV_SEQ_LFLG_OPEN (SNDRV_SEQ_LFLG_INPUT|SNDRV_SEQ_LFLG_OUTPUT) static DEFINE_SPINLOCK(clients_lock); static DEFINE_MUTEX(register_mutex); /* * client table */ static char clienttablock[SNDRV_SEQ_MAX_CLIENTS]; static struct snd_seq_client *clienttab[SNDRV_SEQ_MAX_CLIENTS]; static struct snd_seq_usage client_usage; /* * prototypes */ static int bounce_error_event(struct snd_seq_client *client, struct snd_seq_event *event, int err, int atomic, int hop); static int snd_seq_deliver_single_event(struct snd_seq_client *client, struct snd_seq_event *event, int atomic, int hop); #if IS_ENABLED(CONFIG_SND_SEQ_UMP) static void free_ump_info(struct snd_seq_client *client); #endif /* */ static inline unsigned short snd_seq_file_flags(struct file *file) { switch (file->f_mode & (FMODE_READ | FMODE_WRITE)) { case FMODE_WRITE: return SNDRV_SEQ_LFLG_OUTPUT; case FMODE_READ: return SNDRV_SEQ_LFLG_INPUT; default: return SNDRV_SEQ_LFLG_OPEN; } } static inline int snd_seq_write_pool_allocated(struct snd_seq_client *client) { return snd_seq_total_cells(client->pool) > 0; } /* return pointer to client structure for specified id */ static struct snd_seq_client *clientptr(int clientid) { if (clientid < 0 || clientid >= SNDRV_SEQ_MAX_CLIENTS) { pr_debug("ALSA: seq: oops. Trying to get pointer to client %d\n", clientid); return NULL; } return clienttab[clientid]; } struct snd_seq_client *snd_seq_client_use_ptr(int clientid) { unsigned long flags; struct snd_seq_client *client; if (clientid < 0 || clientid >= SNDRV_SEQ_MAX_CLIENTS) { pr_debug("ALSA: seq: oops. Trying to get pointer to client %d\n", clientid); return NULL; } spin_lock_irqsave(&clients_lock, flags); client = clientptr(clientid); if (client) goto __lock; if (clienttablock[clientid]) { spin_unlock_irqrestore(&clients_lock, flags); return NULL; } spin_unlock_irqrestore(&clients_lock, flags); #ifdef CONFIG_MODULES if (!in_interrupt()) { static DECLARE_BITMAP(client_requested, SNDRV_SEQ_GLOBAL_CLIENTS); static DECLARE_BITMAP(card_requested, SNDRV_CARDS); if (clientid < SNDRV_SEQ_GLOBAL_CLIENTS) { int idx; if (!test_and_set_bit(clientid, client_requested)) { for (idx = 0; idx < 15; idx++) { if (seq_client_load[idx] < 0) break; if (seq_client_load[idx] == clientid) { request_module("snd-seq-client-%i", clientid); break; } } } } else if (clientid < SNDRV_SEQ_DYNAMIC_CLIENTS_BEGIN) { int card = (clientid - SNDRV_SEQ_GLOBAL_CLIENTS) / SNDRV_SEQ_CLIENTS_PER_CARD; if (card < snd_ecards_limit) { if (!test_and_set_bit(card, card_requested)) snd_request_card(card); snd_seq_device_load_drivers(); } } spin_lock_irqsave(&clients_lock, flags); client = clientptr(clientid); if (client) goto __lock; spin_unlock_irqrestore(&clients_lock, flags); } #endif return NULL; __lock: snd_use_lock_use(&client->use_lock); spin_unlock_irqrestore(&clients_lock, flags); return client; } /* Take refcount and perform ioctl_mutex lock on the given client; * used only for OSS sequencer * Unlock via snd_seq_client_ioctl_unlock() below */ bool snd_seq_client_ioctl_lock(int clientid) { struct snd_seq_client *client; client = snd_seq_client_use_ptr(clientid); if (!client) return false; mutex_lock(&client->ioctl_mutex); /* The client isn't unrefed here; see snd_seq_client_ioctl_unlock() */ return true; } EXPORT_SYMBOL_GPL(snd_seq_client_ioctl_lock); /* Unlock and unref the given client; for OSS sequencer use only */ void snd_seq_client_ioctl_unlock(int clientid) { struct snd_seq_client *client; client = snd_seq_client_use_ptr(clientid); if (WARN_ON(!client)) return; mutex_unlock(&client->ioctl_mutex); /* The doubly unrefs below are intentional; the first one releases the * leftover from snd_seq_client_ioctl_lock() above, and the second one * is for releasing snd_seq_client_use_ptr() in this function */ snd_seq_client_unlock(client); snd_seq_client_unlock(client); } EXPORT_SYMBOL_GPL(snd_seq_client_ioctl_unlock); static void usage_alloc(struct snd_seq_usage *res, int num) { res->cur += num; if (res->cur > res->peak) res->peak = res->cur; } static void usage_free(struct snd_seq_usage *res, int num) { res->cur -= num; } /* initialise data structures */ int __init client_init_data(void) { /* zap out the client table */ memset(&clienttablock, 0, sizeof(clienttablock)); memset(&clienttab, 0, sizeof(clienttab)); return 0; } static struct snd_seq_client *seq_create_client1(int client_index, int poolsize) { int c; struct snd_seq_client *client; /* init client data */ client = kzalloc(sizeof(*client), GFP_KERNEL); if (client == NULL) return NULL; client->pool = snd_seq_pool_new(poolsize); if (client->pool == NULL) { kfree(client); return NULL; } client->type = NO_CLIENT; snd_use_lock_init(&client->use_lock); rwlock_init(&client->ports_lock); mutex_init(&client->ports_mutex); INIT_LIST_HEAD(&client->ports_list_head); mutex_init(&client->ioctl_mutex); client->ump_endpoint_port = -1; /* find free slot in the client table */ spin_lock_irq(&clients_lock); if (client_index < 0) { for (c = SNDRV_SEQ_DYNAMIC_CLIENTS_BEGIN; c < SNDRV_SEQ_MAX_CLIENTS; c++) { if (clienttab[c] || clienttablock[c]) continue; clienttab[client->number = c] = client; spin_unlock_irq(&clients_lock); return client; } } else { if (clienttab[client_index] == NULL && !clienttablock[client_index]) { clienttab[client->number = client_index] = client; spin_unlock_irq(&clients_lock); return client; } } spin_unlock_irq(&clients_lock); snd_seq_pool_delete(&client->pool); kfree(client); return NULL; /* no free slot found or busy, return failure code */ } static int seq_free_client1(struct snd_seq_client *client) { if (!client) return 0; spin_lock_irq(&clients_lock); clienttablock[client->number] = 1; clienttab[client->number] = NULL; spin_unlock_irq(&clients_lock); snd_seq_delete_all_ports(client); snd_seq_queue_client_leave(client->number); snd_use_lock_sync(&client->use_lock); if (client->pool) snd_seq_pool_delete(&client->pool); spin_lock_irq(&clients_lock); clienttablock[client->number] = 0; spin_unlock_irq(&clients_lock); return 0; } static void seq_free_client(struct snd_seq_client * client) { mutex_lock(®ister_mutex); switch (client->type) { case NO_CLIENT: pr_warn("ALSA: seq: Trying to free unused client %d\n", client->number); break; case USER_CLIENT: case KERNEL_CLIENT: seq_free_client1(client); usage_free(&client_usage, 1); break; default: pr_err("ALSA: seq: Trying to free client %d with undefined type = %d\n", client->number, client->type); } mutex_unlock(®ister_mutex); snd_seq_system_client_ev_client_exit(client->number); } /* -------------------------------------------------------- */ /* create a user client */ static int snd_seq_open(struct inode *inode, struct file *file) { int c, mode; /* client id */ struct snd_seq_client *client; struct snd_seq_user_client *user; int err; err = stream_open(inode, file); if (err < 0) return err; mutex_lock(®ister_mutex); client = seq_create_client1(-1, SNDRV_SEQ_DEFAULT_EVENTS); if (!client) { mutex_unlock(®ister_mutex); return -ENOMEM; /* failure code */ } mode = snd_seq_file_flags(file); if (mode & SNDRV_SEQ_LFLG_INPUT) client->accept_input = 1; if (mode & SNDRV_SEQ_LFLG_OUTPUT) client->accept_output = 1; user = &client->data.user; user->fifo = NULL; user->fifo_pool_size = 0; if (mode & SNDRV_SEQ_LFLG_INPUT) { user->fifo_pool_size = SNDRV_SEQ_DEFAULT_CLIENT_EVENTS; user->fifo = snd_seq_fifo_new(user->fifo_pool_size); if (user->fifo == NULL) { seq_free_client1(client); kfree(client); mutex_unlock(®ister_mutex); return -ENOMEM; } } usage_alloc(&client_usage, 1); client->type = USER_CLIENT; mutex_unlock(®ister_mutex); c = client->number; file->private_data = client; /* fill client data */ user->file = file; sprintf(client->name, "Client-%d", c); client->data.user.owner = get_pid(task_pid(current)); /* make others aware this new client */ snd_seq_system_client_ev_client_start(c); return 0; } /* delete a user client */ static int snd_seq_release(struct inode *inode, struct file *file) { struct snd_seq_client *client = file->private_data; if (client) { seq_free_client(client); if (client->data.user.fifo) snd_seq_fifo_delete(&client->data.user.fifo); #if IS_ENABLED(CONFIG_SND_SEQ_UMP) free_ump_info(client); #endif put_pid(client->data.user.owner); kfree(client); } return 0; } static bool event_is_compatible(const struct snd_seq_client *client, const struct snd_seq_event *ev) { if (snd_seq_ev_is_ump(ev) && !client->midi_version) return false; if (snd_seq_ev_is_ump(ev) && snd_seq_ev_is_variable(ev)) return false; return true; } /* handle client read() */ /* possible error values: * -ENXIO invalid client or file open mode * -ENOSPC FIFO overflow (the flag is cleared after this error report) * -EINVAL no enough user-space buffer to write the whole event * -EFAULT seg. fault during copy to user space */ static ssize_t snd_seq_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { struct snd_seq_client *client = file->private_data; struct snd_seq_fifo *fifo; size_t aligned_size; int err; long result = 0; struct snd_seq_event_cell *cell; if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_INPUT)) return -ENXIO; if (!access_ok(buf, count)) return -EFAULT; /* check client structures are in place */ if (snd_BUG_ON(!client)) return -ENXIO; if (!client->accept_input) return -ENXIO; fifo = client->data.user.fifo; if (!fifo) return -ENXIO; if (atomic_read(&fifo->overflow) > 0) { /* buffer overflow is detected */ snd_seq_fifo_clear(fifo); /* return error code */ return -ENOSPC; } cell = NULL; err = 0; snd_seq_fifo_lock(fifo); if (IS_ENABLED(CONFIG_SND_SEQ_UMP) && client->midi_version > 0) aligned_size = sizeof(struct snd_seq_ump_event); else aligned_size = sizeof(struct snd_seq_event); /* while data available in queue */ while (count >= aligned_size) { int nonblock; nonblock = (file->f_flags & O_NONBLOCK) || result > 0; err = snd_seq_fifo_cell_out(fifo, &cell, nonblock); if (err < 0) break; if (!event_is_compatible(client, &cell->event)) { snd_seq_cell_free(cell); cell = NULL; continue; } if (snd_seq_ev_is_variable(&cell->event)) { struct snd_seq_ump_event tmpev; memcpy(&tmpev, &cell->event, aligned_size); tmpev.data.ext.len &= ~SNDRV_SEQ_EXT_MASK; if (copy_to_user(buf, &tmpev, aligned_size)) { err = -EFAULT; break; } count -= aligned_size; buf += aligned_size; err = snd_seq_expand_var_event(&cell->event, count, (char __force *)buf, 0, aligned_size); if (err < 0) break; result += err; count -= err; buf += err; } else { if (copy_to_user(buf, &cell->event, aligned_size)) { err = -EFAULT; break; } count -= aligned_size; buf += aligned_size; } snd_seq_cell_free(cell); cell = NULL; /* to be sure */ result += aligned_size; } if (err < 0) { if (cell) snd_seq_fifo_cell_putback(fifo, cell); if (err == -EAGAIN && result > 0) err = 0; } snd_seq_fifo_unlock(fifo); return (err < 0) ? err : result; } /* * check access permission to the port */ static int check_port_perm(struct snd_seq_client_port *port, unsigned int flags) { if ((port->capability & flags) != flags) return 0; return flags; } /* * check if the destination client is available, and return the pointer */ static struct snd_seq_client *get_event_dest_client(struct snd_seq_event *event) { struct snd_seq_client *dest; dest = snd_seq_client_use_ptr(event->dest.client); if (dest == NULL) return NULL; if (! dest->accept_input) goto __not_avail; if (snd_seq_ev_is_ump(event)) return dest; /* ok - no filter checks */ if ((dest->filter & SNDRV_SEQ_FILTER_USE_EVENT) && ! test_bit(event->type, dest->event_filter)) goto __not_avail; return dest; /* ok - accessible */ __not_avail: snd_seq_client_unlock(dest); return NULL; } /* * Return the error event. * * If the receiver client is a user client, the original event is * encapsulated in SNDRV_SEQ_EVENT_BOUNCE as variable length event. If * the original event is also variable length, the external data is * copied after the event record. * If the receiver client is a kernel client, the original event is * quoted in SNDRV_SEQ_EVENT_KERNEL_ERROR, since this requires no extra * kmalloc. */ static int bounce_error_event(struct snd_seq_client *client, struct snd_seq_event *event, int err, int atomic, int hop) { struct snd_seq_event bounce_ev; int result; if (client == NULL || ! (client->filter & SNDRV_SEQ_FILTER_BOUNCE) || ! client->accept_input) return 0; /* ignored */ /* set up quoted error */ memset(&bounce_ev, 0, sizeof(bounce_ev)); bounce_ev.type = SNDRV_SEQ_EVENT_KERNEL_ERROR; bounce_ev.flags = SNDRV_SEQ_EVENT_LENGTH_FIXED; bounce_ev.queue = SNDRV_SEQ_QUEUE_DIRECT; bounce_ev.source.client = SNDRV_SEQ_CLIENT_SYSTEM; bounce_ev.source.port = SNDRV_SEQ_PORT_SYSTEM_ANNOUNCE; bounce_ev.dest.client = client->number; bounce_ev.dest.port = event->source.port; bounce_ev.data.quote.origin = event->dest; bounce_ev.data.quote.event = event; bounce_ev.data.quote.value = -err; /* use positive value */ result = snd_seq_deliver_single_event(NULL, &bounce_ev, atomic, hop + 1); if (result < 0) { client->event_lost++; return result; } return result; } /* * rewrite the time-stamp of the event record with the curren time * of the given queue. * return non-zero if updated. */ static int update_timestamp_of_queue(struct snd_seq_event *event, int queue, int real_time) { struct snd_seq_queue *q; q = queueptr(queue); if (! q) return 0; event->queue = queue; event->flags &= ~SNDRV_SEQ_TIME_STAMP_MASK; if (real_time) { event->time.time = snd_seq_timer_get_cur_time(q->timer, true); event->flags |= SNDRV_SEQ_TIME_STAMP_REAL; } else { event->time.tick = snd_seq_timer_get_cur_tick(q->timer); event->flags |= SNDRV_SEQ_TIME_STAMP_TICK; } queuefree(q); return 1; } /* deliver a single event; called from below and UMP converter */ int __snd_seq_deliver_single_event(struct snd_seq_client *dest, struct snd_seq_client_port *dest_port, struct snd_seq_event *event, int atomic, int hop) { switch (dest->type) { case USER_CLIENT: if (!dest->data.user.fifo) return 0; return snd_seq_fifo_event_in(dest->data.user.fifo, event); case KERNEL_CLIENT: if (!dest_port->event_input) return 0; return dest_port->event_input(event, snd_seq_ev_is_direct(event), dest_port->private_data, atomic, hop); } return 0; } /* * deliver an event to the specified destination. * if filter is non-zero, client filter bitmap is tested. * * RETURN VALUE: 0 : if succeeded * <0 : error */ static int snd_seq_deliver_single_event(struct snd_seq_client *client, struct snd_seq_event *event, int atomic, int hop) { struct snd_seq_client *dest = NULL; struct snd_seq_client_port *dest_port = NULL; int result = -ENOENT; int direct; direct = snd_seq_ev_is_direct(event); dest = get_event_dest_client(event); if (dest == NULL) goto __skip; dest_port = snd_seq_port_use_ptr(dest, event->dest.port); if (dest_port == NULL) goto __skip; /* check permission */ if (! check_port_perm(dest_port, SNDRV_SEQ_PORT_CAP_WRITE)) { result = -EPERM; goto __skip; } if (dest_port->timestamping) update_timestamp_of_queue(event, dest_port->time_queue, dest_port->time_real); #if IS_ENABLED(CONFIG_SND_SEQ_UMP) if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) { if (snd_seq_ev_is_ump(event)) { result = snd_seq_deliver_from_ump(client, dest, dest_port, event, atomic, hop); goto __skip; } else if (snd_seq_client_is_ump(dest)) { result = snd_seq_deliver_to_ump(client, dest, dest_port, event, atomic, hop); goto __skip; } } #endif /* CONFIG_SND_SEQ_UMP */ result = __snd_seq_deliver_single_event(dest, dest_port, event, atomic, hop); __skip: if (dest_port) snd_seq_port_unlock(dest_port); if (dest) snd_seq_client_unlock(dest); if (result < 0 && !direct) { result = bounce_error_event(client, event, result, atomic, hop); } return result; } /* * send the event to all subscribers: */ static int __deliver_to_subscribers(struct snd_seq_client *client, struct snd_seq_event *event, struct snd_seq_client_port *src_port, int atomic, int hop) { struct snd_seq_subscribers *subs; int err, result = 0, num_ev = 0; union __snd_seq_event event_saved; size_t saved_size; struct snd_seq_port_subs_info *grp; /* save original event record */ saved_size = snd_seq_event_packet_size(event); memcpy(&event_saved, event, saved_size); grp = &src_port->c_src; /* lock list */ if (atomic) read_lock(&grp->list_lock); else down_read_nested(&grp->list_mutex, hop); list_for_each_entry(subs, &grp->list_head, src_list) { /* both ports ready? */ if (atomic_read(&subs->ref_count) != 2) continue; event->dest = subs->info.dest; if (subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIMESTAMP) /* convert time according to flag with subscription */ update_timestamp_of_queue(event, subs->info.queue, subs->info.flags & SNDRV_SEQ_PORT_SUBS_TIME_REAL); err = snd_seq_deliver_single_event(client, event, atomic, hop); if (err < 0) { /* save first error that occurs and continue */ if (!result) result = err; continue; } num_ev++; /* restore original event record */ memcpy(event, &event_saved, saved_size); } if (atomic) read_unlock(&grp->list_lock); else up_read(&grp->list_mutex); memcpy(event, &event_saved, saved_size); return (result < 0) ? result : num_ev; } static int deliver_to_subscribers(struct snd_seq_client *client, struct snd_seq_event *event, int atomic, int hop) { struct snd_seq_client_port *src_port; int ret = 0, ret2; src_port = snd_seq_port_use_ptr(client, event->source.port); if (src_port) { ret = __deliver_to_subscribers(client, event, src_port, atomic, hop); snd_seq_port_unlock(src_port); } if (client->ump_endpoint_port < 0 || event->source.port == client->ump_endpoint_port) return ret; src_port = snd_seq_port_use_ptr(client, client->ump_endpoint_port); if (!src_port) return ret; ret2 = __deliver_to_subscribers(client, event, src_port, atomic, hop); snd_seq_port_unlock(src_port); return ret2 < 0 ? ret2 : ret; } /* deliver an event to the destination port(s). * if the event is to subscribers or broadcast, the event is dispatched * to multiple targets. * * RETURN VALUE: n > 0 : the number of delivered events. * n == 0 : the event was not passed to any client. * n < 0 : error - event was not processed. */ static int snd_seq_deliver_event(struct snd_seq_client *client, struct snd_seq_event *event, int atomic, int hop) { int result; hop++; if (hop >= SNDRV_SEQ_MAX_HOPS) { pr_debug("ALSA: seq: too long delivery path (%d:%d->%d:%d)\n", event->source.client, event->source.port, event->dest.client, event->dest.port); return -EMLINK; } if (snd_seq_ev_is_variable(event) && snd_BUG_ON(atomic && (event->data.ext.len & SNDRV_SEQ_EXT_USRPTR))) return -EINVAL; if (event->queue == SNDRV_SEQ_ADDRESS_SUBSCRIBERS || event->dest.client == SNDRV_SEQ_ADDRESS_SUBSCRIBERS) result = deliver_to_subscribers(client, event, atomic, hop); else result = snd_seq_deliver_single_event(client, event, atomic, hop); return result; } /* * dispatch an event cell: * This function is called only from queue check routines in timer * interrupts or after enqueued. * The event cell shall be released or re-queued in this function. * * RETURN VALUE: n > 0 : the number of delivered events. * n == 0 : the event was not passed to any client. * n < 0 : error - event was not processed. */ int snd_seq_dispatch_event(struct snd_seq_event_cell *cell, int atomic, int hop) { struct snd_seq_client *client; int result; if (snd_BUG_ON(!cell)) return -EINVAL; client = snd_seq_client_use_ptr(cell->event.source.client); if (client == NULL) { snd_seq_cell_free(cell); /* release this cell */ return -EINVAL; } if (!snd_seq_ev_is_ump(&cell->event) && cell->event.type == SNDRV_SEQ_EVENT_NOTE) { /* NOTE event: * the event cell is re-used as a NOTE-OFF event and * enqueued again. */ struct snd_seq_event tmpev, *ev; /* reserve this event to enqueue note-off later */ tmpev = cell->event; tmpev.type = SNDRV_SEQ_EVENT_NOTEON; result = snd_seq_deliver_event(client, &tmpev, atomic, hop); /* * This was originally a note event. We now re-use the * cell for the note-off event. */ ev = &cell->event; ev->type = SNDRV_SEQ_EVENT_NOTEOFF; ev->flags |= SNDRV_SEQ_PRIORITY_HIGH; /* add the duration time */ switch (ev->flags & SNDRV_SEQ_TIME_STAMP_MASK) { case SNDRV_SEQ_TIME_STAMP_TICK: cell->event.time.tick += ev->data.note.duration; break; case SNDRV_SEQ_TIME_STAMP_REAL: /* unit for duration is ms */ ev->time.time.tv_nsec += 1000000 * (ev->data.note.duration % 1000); ev->time.time.tv_sec += ev->data.note.duration / 1000 + ev->time.time.tv_nsec / 1000000000; ev->time.time.tv_nsec %= 1000000000; break; } ev->data.note.velocity = ev->data.note.off_velocity; /* Now queue this cell as the note off event */ if (snd_seq_enqueue_event(cell, atomic, hop) < 0) snd_seq_cell_free(cell); /* release this cell */ } else { /* Normal events: * event cell is freed after processing the event */ result = snd_seq_deliver_event(client, &cell->event, atomic, hop); snd_seq_cell_free(cell); } snd_seq_client_unlock(client); return result; } /* Allocate a cell from client pool and enqueue it to queue: * if pool is empty and blocking is TRUE, sleep until a new cell is * available. */ static int snd_seq_client_enqueue_event(struct snd_seq_client *client, struct snd_seq_event *event, struct file *file, int blocking, int atomic, int hop, struct mutex *mutexp) { struct snd_seq_event_cell *cell; int err; /* special queue values - force direct passing */ if (event->queue == SNDRV_SEQ_ADDRESS_SUBSCRIBERS) { event->dest.client = SNDRV_SEQ_ADDRESS_SUBSCRIBERS; event->queue = SNDRV_SEQ_QUEUE_DIRECT; } else if (event->dest.client == SNDRV_SEQ_ADDRESS_SUBSCRIBERS) { /* check presence of source port */ struct snd_seq_client_port *src_port = snd_seq_port_use_ptr(client, event->source.port); if (src_port == NULL) return -EINVAL; snd_seq_port_unlock(src_port); } /* direct event processing without enqueued */ if (snd_seq_ev_is_direct(event)) { if (!snd_seq_ev_is_ump(event) && event->type == SNDRV_SEQ_EVENT_NOTE) return -EINVAL; /* this event must be enqueued! */ return snd_seq_deliver_event(client, event, atomic, hop); } /* Not direct, normal queuing */ if (snd_seq_queue_is_used(event->queue, client->number) <= 0) return -EINVAL; /* invalid queue */ if (! snd_seq_write_pool_allocated(client)) return -ENXIO; /* queue is not allocated */ /* allocate an event cell */ err = snd_seq_event_dup(client->pool, event, &cell, !blocking || atomic, file, mutexp); if (err < 0) return err; /* we got a cell. enqueue it. */ err = snd_seq_enqueue_event(cell, atomic, hop); if (err < 0) { snd_seq_cell_free(cell); return err; } return 0; } /* * check validity of event type and data length. * return non-zero if invalid. */ static int check_event_type_and_length(struct snd_seq_event *ev) { switch (snd_seq_ev_length_type(ev)) { case SNDRV_SEQ_EVENT_LENGTH_FIXED: if (snd_seq_ev_is_variable_type(ev)) return -EINVAL; break; case SNDRV_SEQ_EVENT_LENGTH_VARIABLE: if (! snd_seq_ev_is_variable_type(ev) || (ev->data.ext.len & ~SNDRV_SEQ_EXT_MASK) >= SNDRV_SEQ_MAX_EVENT_LEN) return -EINVAL; break; case SNDRV_SEQ_EVENT_LENGTH_VARUSR: if (! snd_seq_ev_is_direct(ev)) return -EINVAL; break; } return 0; } /* handle write() */ /* possible error values: * -ENXIO invalid client or file open mode * -ENOMEM malloc failed * -EFAULT seg. fault during copy from user space * -EINVAL invalid event * -EAGAIN no space in output pool * -EINTR interrupts while sleep * -EMLINK too many hops * others depends on return value from driver callback */ static ssize_t snd_seq_write(struct file *file, const char __user *buf, size_t count, loff_t *offset) { struct snd_seq_client *client = file->private_data; int written = 0, len; int err, handled; union __snd_seq_event __event; struct snd_seq_event *ev = &__event.legacy; if (!(snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT)) return -ENXIO; /* check client structures are in place */ if (snd_BUG_ON(!client)) return -ENXIO; if (!client->accept_output || client->pool == NULL) return -ENXIO; repeat: handled = 0; /* allocate the pool now if the pool is not allocated yet */ mutex_lock(&client->ioctl_mutex); if (client->pool->size > 0 && !snd_seq_write_pool_allocated(client)) { err = snd_seq_pool_init(client->pool); if (err < 0) goto out; } /* only process whole events */ err = -EINVAL; while (count >= sizeof(struct snd_seq_event)) { /* Read in the event header from the user */ len = sizeof(struct snd_seq_event); if (copy_from_user(ev, buf, len)) { err = -EFAULT; break; } /* read in the rest bytes for UMP events */ if (snd_seq_ev_is_ump(ev)) { if (count < sizeof(struct snd_seq_ump_event)) break; if (copy_from_user((char *)ev + len, buf + len, sizeof(struct snd_seq_ump_event) - len)) { err = -EFAULT; break; } len = sizeof(struct snd_seq_ump_event); } ev->source.client = client->number; /* fill in client number */ /* Check for extension data length */ if (check_event_type_and_length(ev)) { err = -EINVAL; break; } if (!event_is_compatible(client, ev)) { err = -EINVAL; break; } /* check for special events */ if (!snd_seq_ev_is_ump(ev)) { if (ev->type == SNDRV_SEQ_EVENT_NONE) goto __skip_event; else if (snd_seq_ev_is_reserved(ev)) { err = -EINVAL; break; } } if (snd_seq_ev_is_variable(ev)) { int extlen = ev->data.ext.len & ~SNDRV_SEQ_EXT_MASK; if ((size_t)(extlen + len) > count) { /* back out, will get an error this time or next */ err = -EINVAL; break; } /* set user space pointer */ ev->data.ext.len = extlen | SNDRV_SEQ_EXT_USRPTR; ev->data.ext.ptr = (char __force *)buf + len; len += extlen; /* increment data length */ } else { #ifdef CONFIG_COMPAT if (client->convert32 && snd_seq_ev_is_varusr(ev)) ev->data.ext.ptr = (void __force *)compat_ptr(ev->data.raw32.d[1]); #endif } /* ok, enqueue it */ err = snd_seq_client_enqueue_event(client, ev, file, !(file->f_flags & O_NONBLOCK), 0, 0, &client->ioctl_mutex); if (err < 0) break; handled++; __skip_event: /* Update pointers and counts */ count -= len; buf += len; written += len; /* let's have a coffee break if too many events are queued */ if (++handled >= 200) { mutex_unlock(&client->ioctl_mutex); goto repeat; } } out: mutex_unlock(&client->ioctl_mutex); return written ? written : err; } /* * handle polling */ static __poll_t snd_seq_poll(struct file *file, poll_table * wait) { struct snd_seq_client *client = file->private_data; __poll_t mask = 0; /* check client structures are in place */ if (snd_BUG_ON(!client)) return EPOLLERR; if ((snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_INPUT) && client->data.user.fifo) { /* check if data is available in the outqueue */ if (snd_seq_fifo_poll_wait(client->data.user.fifo, file, wait)) mask |= EPOLLIN | EPOLLRDNORM; } if (snd_seq_file_flags(file) & SNDRV_SEQ_LFLG_OUTPUT) { /* check if data is available in the pool */ if (!snd_seq_write_pool_allocated(client) || snd_seq_pool_poll_wait(client->pool, file, wait)) mask |= EPOLLOUT | EPOLLWRNORM; } return mask; } /*-----------------------------------------------------*/ static int snd_seq_ioctl_pversion(struct snd_seq_client *client, void *arg) { int *pversion = arg; *pversion = SNDRV_SEQ_VERSION; return 0; } static int snd_seq_ioctl_user_pversion(struct snd_seq_client *client, void *arg) { client->user_pversion = *(unsigned int *)arg; return 0; } static int snd_seq_ioctl_client_id(struct snd_seq_client *client, void *arg) { int *client_id = arg; *client_id = client->number; return 0; } /* SYSTEM_INFO ioctl() */ static int snd_seq_ioctl_system_info(struct snd_seq_client *client, void *arg) { struct snd_seq_system_info *info = arg; memset(info, 0, sizeof(*info)); /* fill the info fields */ info->queues = SNDRV_SEQ_MAX_QUEUES; info->clients = SNDRV_SEQ_MAX_CLIENTS; info->ports = SNDRV_SEQ_MAX_PORTS; info->channels = 256; /* fixed limit */ info->cur_clients = client_usage.cur; info->cur_queues = snd_seq_queue_get_cur_queues(); return 0; } /* RUNNING_MODE ioctl() */ static int snd_seq_ioctl_running_mode(struct snd_seq_client *client, void *arg) { struct snd_seq_running_info *info = arg; struct snd_seq_client *cptr; int err = 0; /* requested client number */ cptr = snd_seq_client_use_ptr(info->client); if (cptr == NULL) return -ENOENT; /* don't change !!! */ #ifdef SNDRV_BIG_ENDIAN if (!info->big_endian) { err = -EINVAL; goto __err; } #else if (info->big_endian) { err = -EINVAL; goto __err; } #endif if (info->cpu_mode > sizeof(long)) { err = -EINVAL; goto __err; } cptr->convert32 = (info->cpu_mode < sizeof(long)); __err: snd_seq_client_unlock(cptr); return err; } /* CLIENT_INFO ioctl() */ static void get_client_info(struct snd_seq_client *cptr, struct snd_seq_client_info *info) { info->client = cptr->number; /* fill the info fields */ info->type = cptr->type; strcpy(info->name, cptr->name); info->filter = cptr->filter; info->event_lost = cptr->event_lost; memcpy(info->event_filter, cptr->event_filter, 32); info->group_filter = cptr->group_filter; info->num_ports = cptr->num_ports; if (cptr->type == USER_CLIENT) info->pid = pid_vnr(cptr->data.user.owner); else info->pid = -1; if (cptr->type == KERNEL_CLIENT) info->card = cptr->data.kernel.card ? cptr->data.kernel.card->number : -1; else info->card = -1; info->midi_version = cptr->midi_version; memset(info->reserved, 0, sizeof(info->reserved)); } static int snd_seq_ioctl_get_client_info(struct snd_seq_client *client, void *arg) { struct snd_seq_client_info *client_info = arg; struct snd_seq_client *cptr; /* requested client number */ cptr = snd_seq_client_use_ptr(client_info->client); if (cptr == NULL) return -ENOENT; /* don't change !!! */ get_client_info(cptr, client_info); snd_seq_client_unlock(cptr); return 0; } /* CLIENT_INFO ioctl() */ static int snd_seq_ioctl_set_client_info(struct snd_seq_client *client, void *arg) { struct snd_seq_client_info *client_info = arg; /* it is not allowed to set the info fields for an another client */ if (client->number != client_info->client) return -EPERM; /* also client type must be set now */ if (client->type != client_info->type) return -EINVAL; /* check validity of midi_version field */ if (client->user_pversion >= SNDRV_PROTOCOL_VERSION(1, 0, 3) && client_info->midi_version > SNDRV_SEQ_CLIENT_UMP_MIDI_2_0) return -EINVAL; /* fill the info fields */ if (client_info->name[0]) strscpy(client->name, client_info->name, sizeof(client->name)); client->filter = client_info->filter; client->event_lost = client_info->event_lost; if (client->user_pversion >= SNDRV_PROTOCOL_VERSION(1, 0, 3)) client->midi_version = client_info->midi_version; memcpy(client->event_filter, client_info->event_filter, 32); client->group_filter = client_info->group_filter; return 0; } /* * CREATE PORT ioctl() */ static int snd_seq_ioctl_create_port(struct snd_seq_client *client, void *arg) { struct snd_seq_port_info *info = arg; struct snd_seq_client_port *port; struct snd_seq_port_callback *callback; int port_idx, err; /* it is not allowed to create the port for an another client */ if (info->addr.client != client->number) return -EPERM; if (client->type == USER_CLIENT && info->kernel) return -EINVAL; if ((info->capability & SNDRV_SEQ_PORT_CAP_UMP_ENDPOINT) && client->ump_endpoint_port >= 0) return -EBUSY; if (info->flags & SNDRV_SEQ_PORT_FLG_GIVEN_PORT) port_idx = info->addr.port; else port_idx = -1; if (port_idx >= SNDRV_SEQ_ADDRESS_UNKNOWN) return -EINVAL; err = snd_seq_create_port(client, port_idx, &port); if (err < 0) return err; if (client->type == KERNEL_CLIENT) { callback = info->kernel; if (callback) { if (callback->owner) port->owner = callback->owner; port->private_data = callback->private_data; port->private_free = callback->private_free; port->event_input = callback->event_input; port->c_src.open = callback->subscribe; port->c_src.close = callback->unsubscribe; port->c_dest.open = callback->use; port->c_dest.close = callback->unuse; } } info->addr = port->addr; snd_seq_set_port_info(port, info); if (info->capability & SNDRV_SEQ_PORT_CAP_UMP_ENDPOINT) client->ump_endpoint_port = port->addr.port; snd_seq_system_client_ev_port_start(port->addr.client, port->addr.port); snd_seq_port_unlock(port); return 0; } /* * DELETE PORT ioctl() */ static int snd_seq_ioctl_delete_port(struct snd_seq_client *client, void *arg) { struct snd_seq_port_info *info = arg; int err; /* it is not allowed to remove the port for an another client */ if (info->addr.client != client->number) return -EPERM; err = snd_seq_delete_port(client, info->addr.port); if (err >= 0) { if (client->ump_endpoint_port == info->addr.port) client->ump_endpoint_port = -1; snd_seq_system_client_ev_port_exit(client->number, info->addr.port); } return err; } /* * GET_PORT_INFO ioctl() (on any client) */ static int snd_seq_ioctl_get_port_info(struct snd_seq_client *client, void *arg) { struct snd_seq_port_info *info = arg; struct snd_seq_client *cptr; struct snd_seq_client_port *port; cptr = snd_seq_client_use_ptr(info->addr.client); if (cptr == NULL) return -ENXIO; port = snd_seq_port_use_ptr(cptr, info->addr.port); if (port == NULL) { snd_seq_client_unlock(cptr); return -ENOENT; /* don't change */ } /* get port info */ snd_seq_get_port_info(port, info); snd_seq_port_unlock(port); snd_seq_client_unlock(cptr); return 0; } /* * SET_PORT_INFO ioctl() (only ports on this/own client) */ static int snd_seq_ioctl_set_port_info(struct snd_seq_client *client, void *arg) { struct snd_seq_port_info *info = arg; struct snd_seq_client_port *port; if (info->addr.client != client->number) /* only set our own ports ! */ return -EPERM; port = snd_seq_port_use_ptr(client, info->addr.port); if (port) { snd_seq_set_port_info(port, info); snd_seq_port_unlock(port); } return 0; } /* * port subscription (connection) */ #define PERM_RD (SNDRV_SEQ_PORT_CAP_READ|SNDRV_SEQ_PORT_CAP_SUBS_READ) #define PERM_WR (SNDRV_SEQ_PORT_CAP_WRITE|SNDRV_SEQ_PORT_CAP_SUBS_WRITE) static int check_subscription_permission(struct snd_seq_client *client, struct snd_seq_client_port *sport, struct snd_seq_client_port *dport, struct snd_seq_port_subscribe *subs) { if (client->number != subs->sender.client && client->number != subs->dest.client) { /* connection by third client - check export permission */ if (check_port_perm(sport, SNDRV_SEQ_PORT_CAP_NO_EXPORT)) return -EPERM; if (check_port_perm(dport, SNDRV_SEQ_PORT_CAP_NO_EXPORT)) return -EPERM; } /* check read permission */ /* if sender or receiver is the subscribing client itself, * no permission check is necessary */ if (client->number != subs->sender.client) { if (! check_port_perm(sport, PERM_RD)) return -EPERM; } /* check write permission */ if (client->number != subs->dest.client) { if (! check_port_perm(dport, PERM_WR)) return -EPERM; } return 0; } /* * send an subscription notify event to user client: * client must be user client. */ int snd_seq_client_notify_subscription(int client, int port, struct snd_seq_port_subscribe *info, int evtype) { struct snd_seq_event event; memset(&event, 0, sizeof(event)); event.type = evtype; event.data.connect.dest = info->dest; event.data.connect.sender = info->sender; return snd_seq_system_notify(client, port, &event); /* non-atomic */ } /* * add to port's subscription list IOCTL interface */ static int snd_seq_ioctl_subscribe_port(struct snd_seq_client *client, void *arg) { struct snd_seq_port_subscribe *subs = arg; int result = -EINVAL; struct snd_seq_client *receiver = NULL, *sender = NULL; struct snd_seq_client_port *sport = NULL, *dport = NULL; receiver = snd_seq_client_use_ptr(subs->dest.client); if (!receiver) goto __end; sender = snd_seq_client_use_ptr(subs->sender.client); if (!sender) goto __end; sport = snd_seq_port_use_ptr(sender, subs->sender.port); if (!sport) goto __end; dport = snd_seq_port_use_ptr(receiver, subs->dest.port); if (!dport) goto __end; result = check_subscription_permission(client, sport, dport, subs); if (result < 0) goto __end; /* connect them */ result = snd_seq_port_connect(client, sender, sport, receiver, dport, subs); if (! result) /* broadcast announce */ snd_seq_client_notify_subscription(SNDRV_SEQ_ADDRESS_SUBSCRIBERS, 0, subs, SNDRV_SEQ_EVENT_PORT_SUBSCRIBED); __end: if (sport) snd_seq_port_unlock(sport); if (dport) snd_seq_port_unlock(dport); if (sender) snd_seq_client_unlock(sender); if (receiver) snd_seq_client_unlock(receiver); return result; } /* * remove from port's subscription list */ static int snd_seq_ioctl_unsubscribe_port(struct snd_seq_client *client, void *arg) { struct snd_seq_port_subscribe *subs = arg; int result = -ENXIO; struct snd_seq_client *receiver = NULL, *sender = NULL; struct snd_seq_client_port *sport = NULL, *dport = NULL; receiver = snd_seq_client_use_ptr(subs->dest.client); if (!receiver) goto __end; sender = snd_seq_client_use_ptr(subs->sender.client); if (!sender) goto __end; sport = snd_seq_port_use_ptr(sender, subs->sender.port); if (!sport) goto __end; dport = snd_seq_port_use_ptr(receiver, subs->dest.port); if (!dport) goto __end; result = check_subscription_permission(client, sport, dport, subs); if (result < 0) goto __end; result = snd_seq_port_disconnect(client, sender, sport, receiver, dport, subs); if (! result) /* broadcast announce */ snd_seq_client_notify_subscription(SNDRV_SEQ_ADDRESS_SUBSCRIBERS, 0, subs, SNDRV_SEQ_EVENT_PORT_UNSUBSCRIBED); __end: if (sport) snd_seq_port_unlock(sport); if (dport) snd_seq_port_unlock(dport); if (sender) snd_seq_client_unlock(sender); if (receiver) snd_seq_client_unlock(receiver); return result; } /* CREATE_QUEUE ioctl() */ static int snd_seq_ioctl_create_queue(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_info *info = arg; struct snd_seq_queue *q; q = snd_seq_queue_alloc(client->number, info->locked, info->flags); if (IS_ERR(q)) return PTR_ERR(q); info->queue = q->queue; info->locked = q->locked; info->owner = q->owner; /* set queue name */ if (!info->name[0]) snprintf(info->name, sizeof(info->name), "Queue-%d", q->queue); strscpy(q->name, info->name, sizeof(q->name)); snd_use_lock_free(&q->use_lock); return 0; } /* DELETE_QUEUE ioctl() */ static int snd_seq_ioctl_delete_queue(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_info *info = arg; return snd_seq_queue_delete(client->number, info->queue); } /* GET_QUEUE_INFO ioctl() */ static int snd_seq_ioctl_get_queue_info(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_info *info = arg; struct snd_seq_queue *q; q = queueptr(info->queue); if (q == NULL) return -EINVAL; memset(info, 0, sizeof(*info)); info->queue = q->queue; info->owner = q->owner; info->locked = q->locked; strscpy(info->name, q->name, sizeof(info->name)); queuefree(q); return 0; } /* SET_QUEUE_INFO ioctl() */ static int snd_seq_ioctl_set_queue_info(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_info *info = arg; struct snd_seq_queue *q; if (info->owner != client->number) return -EINVAL; /* change owner/locked permission */ if (snd_seq_queue_check_access(info->queue, client->number)) { if (snd_seq_queue_set_owner(info->queue, client->number, info->locked) < 0) return -EPERM; if (info->locked) snd_seq_queue_use(info->queue, client->number, 1); } else { return -EPERM; } q = queueptr(info->queue); if (! q) return -EINVAL; if (q->owner != client->number) { queuefree(q); return -EPERM; } strscpy(q->name, info->name, sizeof(q->name)); queuefree(q); return 0; } /* GET_NAMED_QUEUE ioctl() */ static int snd_seq_ioctl_get_named_queue(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_info *info = arg; struct snd_seq_queue *q; q = snd_seq_queue_find_name(info->name); if (q == NULL) return -EINVAL; info->queue = q->queue; info->owner = q->owner; info->locked = q->locked; queuefree(q); return 0; } /* GET_QUEUE_STATUS ioctl() */ static int snd_seq_ioctl_get_queue_status(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_status *status = arg; struct snd_seq_queue *queue; struct snd_seq_timer *tmr; queue = queueptr(status->queue); if (queue == NULL) return -EINVAL; memset(status, 0, sizeof(*status)); status->queue = queue->queue; tmr = queue->timer; status->events = queue->tickq->cells + queue->timeq->cells; status->time = snd_seq_timer_get_cur_time(tmr, true); status->tick = snd_seq_timer_get_cur_tick(tmr); status->running = tmr->running; status->flags = queue->flags; queuefree(queue); return 0; } /* GET_QUEUE_TEMPO ioctl() */ static int snd_seq_ioctl_get_queue_tempo(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_tempo *tempo = arg; struct snd_seq_queue *queue; struct snd_seq_timer *tmr; queue = queueptr(tempo->queue); if (queue == NULL) return -EINVAL; memset(tempo, 0, sizeof(*tempo)); tempo->queue = queue->queue; tmr = queue->timer; tempo->tempo = tmr->tempo; tempo->ppq = tmr->ppq; tempo->skew_value = tmr->skew; tempo->skew_base = tmr->skew_base; if (client->user_pversion >= SNDRV_PROTOCOL_VERSION(1, 0, 4)) tempo->tempo_base = tmr->tempo_base; queuefree(queue); return 0; } /* SET_QUEUE_TEMPO ioctl() */ int snd_seq_set_queue_tempo(int client, struct snd_seq_queue_tempo *tempo) { if (!snd_seq_queue_check_access(tempo->queue, client)) return -EPERM; return snd_seq_queue_timer_set_tempo(tempo->queue, client, tempo); } EXPORT_SYMBOL(snd_seq_set_queue_tempo); static int snd_seq_ioctl_set_queue_tempo(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_tempo *tempo = arg; int result; if (client->user_pversion < SNDRV_PROTOCOL_VERSION(1, 0, 4)) tempo->tempo_base = 0; result = snd_seq_set_queue_tempo(client->number, tempo); return result < 0 ? result : 0; } /* GET_QUEUE_TIMER ioctl() */ static int snd_seq_ioctl_get_queue_timer(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_timer *timer = arg; struct snd_seq_queue *queue; struct snd_seq_timer *tmr; queue = queueptr(timer->queue); if (queue == NULL) return -EINVAL; mutex_lock(&queue->timer_mutex); tmr = queue->timer; memset(timer, 0, sizeof(*timer)); timer->queue = queue->queue; timer->type = tmr->type; if (tmr->type == SNDRV_SEQ_TIMER_ALSA) { timer->u.alsa.id = tmr->alsa_id; timer->u.alsa.resolution = tmr->preferred_resolution; } mutex_unlock(&queue->timer_mutex); queuefree(queue); return 0; } /* SET_QUEUE_TIMER ioctl() */ static int snd_seq_ioctl_set_queue_timer(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_timer *timer = arg; int result = 0; if (timer->type != SNDRV_SEQ_TIMER_ALSA) return -EINVAL; if (snd_seq_queue_check_access(timer->queue, client->number)) { struct snd_seq_queue *q; struct snd_seq_timer *tmr; q = queueptr(timer->queue); if (q == NULL) return -ENXIO; mutex_lock(&q->timer_mutex); tmr = q->timer; snd_seq_queue_timer_close(timer->queue); tmr->type = timer->type; if (tmr->type == SNDRV_SEQ_TIMER_ALSA) { tmr->alsa_id = timer->u.alsa.id; tmr->preferred_resolution = timer->u.alsa.resolution; } result = snd_seq_queue_timer_open(timer->queue); mutex_unlock(&q->timer_mutex); queuefree(q); } else { return -EPERM; } return result; } /* GET_QUEUE_CLIENT ioctl() */ static int snd_seq_ioctl_get_queue_client(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_client *info = arg; int used; used = snd_seq_queue_is_used(info->queue, client->number); if (used < 0) return -EINVAL; info->used = used; info->client = client->number; return 0; } /* SET_QUEUE_CLIENT ioctl() */ static int snd_seq_ioctl_set_queue_client(struct snd_seq_client *client, void *arg) { struct snd_seq_queue_client *info = arg; int err; if (info->used >= 0) { err = snd_seq_queue_use(info->queue, client->number, info->used); if (err < 0) return err; } return snd_seq_ioctl_get_queue_client(client, arg); } /* GET_CLIENT_POOL ioctl() */ static int snd_seq_ioctl_get_client_pool(struct snd_seq_client *client, void *arg) { struct snd_seq_client_pool *info = arg; struct snd_seq_client *cptr; cptr = snd_seq_client_use_ptr(info->client); if (cptr == NULL) return -ENOENT; memset(info, 0, sizeof(*info)); info->client = cptr->number; info->output_pool = cptr->pool->size; info->output_room = cptr->pool->room; info->output_free = info->output_pool; info->output_free = snd_seq_unused_cells(cptr->pool); if (cptr->type == USER_CLIENT) { info->input_pool = cptr->data.user.fifo_pool_size; info->input_free = info->input_pool; info->input_free = snd_seq_fifo_unused_cells(cptr->data.user.fifo); } else { info->input_pool = 0; info->input_free = 0; } snd_seq_client_unlock(cptr); return 0; } /* SET_CLIENT_POOL ioctl() */ static int snd_seq_ioctl_set_client_pool(struct snd_seq_client *client, void *arg) { struct snd_seq_client_pool *info = arg; int rc; if (client->number != info->client) return -EINVAL; /* can't change other clients */ if (info->output_pool >= 1 && info->output_pool <= SNDRV_SEQ_MAX_EVENTS && (! snd_seq_write_pool_allocated(client) || info->output_pool != client->pool->size)) { if (snd_seq_write_pool_allocated(client)) { /* is the pool in use? */ if (atomic_read(&client->pool->counter)) return -EBUSY; /* remove all existing cells */ snd_seq_pool_mark_closing(client->pool); snd_seq_pool_done(client->pool); } client->pool->size = info->output_pool; rc = snd_seq_pool_init(client->pool); if (rc < 0) return rc; } if (client->type == USER_CLIENT && client->data.user.fifo != NULL && info->input_pool >= 1 && info->input_pool <= SNDRV_SEQ_MAX_CLIENT_EVENTS && info->input_pool != client->data.user.fifo_pool_size) { /* change pool size */ rc = snd_seq_fifo_resize(client->data.user.fifo, info->input_pool); if (rc < 0) return rc; client->data.user.fifo_pool_size = info->input_pool; } if (info->output_room >= 1 && info->output_room <= client->pool->size) { client->pool->room = info->output_room; } return snd_seq_ioctl_get_client_pool(client, arg); } /* REMOVE_EVENTS ioctl() */ static int snd_seq_ioctl_remove_events(struct snd_seq_client *client, void *arg) { struct snd_seq_remove_events *info = arg; /* * Input mostly not implemented XXX. */ if (info->remove_mode & SNDRV_SEQ_REMOVE_INPUT) { /* * No restrictions so for a user client we can clear * the whole fifo */ if (client->type == USER_CLIENT && client->data.user.fifo) snd_seq_fifo_clear(client->data.user.fifo); } if (info->remove_mode & SNDRV_SEQ_REMOVE_OUTPUT) snd_seq_queue_remove_cells(client->number, info); return 0; } /* * get subscription info */ static int snd_seq_ioctl_get_subscription(struct snd_seq_client *client, void *arg) { struct snd_seq_port_subscribe *subs = arg; int result; struct snd_seq_client *sender = NULL; struct snd_seq_client_port *sport = NULL; result = -EINVAL; sender = snd_seq_client_use_ptr(subs->sender.client); if (!sender) goto __end; sport = snd_seq_port_use_ptr(sender, subs->sender.port); if (!sport) goto __end; result = snd_seq_port_get_subscription(&sport->c_src, &subs->dest, subs); __end: if (sport) snd_seq_port_unlock(sport); if (sender) snd_seq_client_unlock(sender); return result; } /* * get subscription info - check only its presence */ static int snd_seq_ioctl_query_subs(struct snd_seq_client *client, void *arg) { struct snd_seq_query_subs *subs = arg; int result = -ENXIO; struct snd_seq_client *cptr = NULL; struct snd_seq_client_port *port = NULL; struct snd_seq_port_subs_info *group; struct list_head *p; int i; cptr = snd_seq_client_use_ptr(subs->root.client); if (!cptr) goto __end; port = snd_seq_port_use_ptr(cptr, subs->root.port); if (!port) goto __end; switch (subs->type) { case SNDRV_SEQ_QUERY_SUBS_READ: group = &port->c_src; break; case SNDRV_SEQ_QUERY_SUBS_WRITE: group = &port->c_dest; break; default: goto __end; } down_read(&group->list_mutex); /* search for the subscriber */ subs->num_subs = group->count; i = 0; result = -ENOENT; list_for_each(p, &group->list_head) { if (i++ == subs->index) { /* found! */ struct snd_seq_subscribers *s; if (subs->type == SNDRV_SEQ_QUERY_SUBS_READ) { s = list_entry(p, struct snd_seq_subscribers, src_list); subs->addr = s->info.dest; } else { s = list_entry(p, struct snd_seq_subscribers, dest_list); subs->addr = s->info.sender; } subs->flags = s->info.flags; subs->queue = s->info.queue; result = 0; break; } } up_read(&group->list_mutex); __end: if (port) snd_seq_port_unlock(port); if (cptr) snd_seq_client_unlock(cptr); return result; } /* * query next client */ static int snd_seq_ioctl_query_next_client(struct snd_seq_client *client, void *arg) { struct snd_seq_client_info *info = arg; struct snd_seq_client *cptr = NULL; /* search for next client */ if (info->client < INT_MAX) info->client++; if (info->client < 0) info->client = 0; for (; info->client < SNDRV_SEQ_MAX_CLIENTS; info->client++) { cptr = snd_seq_client_use_ptr(info->client); if (cptr) break; /* found */ } if (cptr == NULL) return -ENOENT; get_client_info(cptr, info); snd_seq_client_unlock(cptr); return 0; } /* * query next port */ static int snd_seq_ioctl_query_next_port(struct snd_seq_client *client, void *arg) { struct snd_seq_port_info *info = arg; struct snd_seq_client *cptr; struct snd_seq_client_port *port = NULL; cptr = snd_seq_client_use_ptr(info->addr.client); if (cptr == NULL) return -ENXIO; /* search for next port */ info->addr.port++; port = snd_seq_port_query_nearest(cptr, info); if (port == NULL) { snd_seq_client_unlock(cptr); return -ENOENT; } /* get port info */ info->addr = port->addr; snd_seq_get_port_info(port, info); snd_seq_port_unlock(port); snd_seq_client_unlock(cptr); return 0; } #if IS_ENABLED(CONFIG_SND_SEQ_UMP) #define NUM_UMP_INFOS (SNDRV_UMP_MAX_BLOCKS + 1) static void free_ump_info(struct snd_seq_client *client) { int i; if (!client->ump_info) return; for (i = 0; i < NUM_UMP_INFOS; i++) kfree(client->ump_info[i]); kfree(client->ump_info); client->ump_info = NULL; } static void terminate_ump_info_strings(void *p, int type) { if (type == SNDRV_SEQ_CLIENT_UMP_INFO_ENDPOINT) { struct snd_ump_endpoint_info *ep = p; ep->name[sizeof(ep->name) - 1] = 0; } else { struct snd_ump_block_info *bp = p; bp->name[sizeof(bp->name) - 1] = 0; } } #ifdef CONFIG_SND_PROC_FS static void dump_ump_info(struct snd_info_buffer *buffer, struct snd_seq_client *client) { struct snd_ump_endpoint_info *ep; struct snd_ump_block_info *bp; int i; if (!client->ump_info) return; ep = client->ump_info[SNDRV_SEQ_CLIENT_UMP_INFO_ENDPOINT]; if (ep && *ep->name) snd_iprintf(buffer, " UMP Endpoint: \"%s\"\n", ep->name); for (i = 0; i < SNDRV_UMP_MAX_BLOCKS; i++) { bp = client->ump_info[i + 1]; if (bp && *bp->name) { snd_iprintf(buffer, " UMP Block %d: \"%s\" [%s]\n", i, bp->name, bp->active ? "Active" : "Inactive"); snd_iprintf(buffer, " Groups: %d-%d\n", bp->first_group + 1, bp->first_group + bp->num_groups); } } } #endif /* UMP-specific ioctls -- called directly without data copy */ static int snd_seq_ioctl_client_ump_info(struct snd_seq_client *caller, unsigned int cmd, unsigned long arg) { struct snd_seq_client_ump_info __user *argp = (struct snd_seq_client_ump_info __user *)arg; struct snd_seq_client *cptr; int client, type, err = 0; size_t size; void *p; if (get_user(client, &argp->client) || get_user(type, &argp->type)) return -EFAULT; if (cmd == SNDRV_SEQ_IOCTL_SET_CLIENT_UMP_INFO && caller->number != client) return -EPERM; if (type < 0 || type >= NUM_UMP_INFOS) return -EINVAL; if (type == SNDRV_SEQ_CLIENT_UMP_INFO_ENDPOINT) size = sizeof(struct snd_ump_endpoint_info); else size = sizeof(struct snd_ump_block_info); cptr = snd_seq_client_use_ptr(client); if (!cptr) return -ENOENT; mutex_lock(&cptr->ioctl_mutex); if (!cptr->midi_version) { err = -EBADFD; goto error; } if (cmd == SNDRV_SEQ_IOCTL_GET_CLIENT_UMP_INFO) { if (!cptr->ump_info) p = NULL; else p = cptr->ump_info[type]; if (!p) { err = -ENODEV; goto error; } if (copy_to_user(argp->info, p, size)) { err = -EFAULT; goto error; } } else { if (cptr->type != USER_CLIENT) { err = -EBADFD; goto error; } if (!cptr->ump_info) { cptr->ump_info = kcalloc(NUM_UMP_INFOS, sizeof(void *), GFP_KERNEL); if (!cptr->ump_info) { err = -ENOMEM; goto error; } } p = memdup_user(argp->info, size); if (IS_ERR(p)) { err = PTR_ERR(p); goto error; } kfree(cptr->ump_info[type]); terminate_ump_info_strings(p, type); cptr->ump_info[type] = p; } error: mutex_unlock(&cptr->ioctl_mutex); snd_seq_client_unlock(cptr); return err; } #endif /* -------------------------------------------------------- */ static const struct ioctl_handler { unsigned int cmd; int (*func)(struct snd_seq_client *client, void *arg); } ioctl_handlers[] = { { SNDRV_SEQ_IOCTL_PVERSION, snd_seq_ioctl_pversion }, { SNDRV_SEQ_IOCTL_USER_PVERSION, snd_seq_ioctl_user_pversion }, { SNDRV_SEQ_IOCTL_CLIENT_ID, snd_seq_ioctl_client_id }, { SNDRV_SEQ_IOCTL_SYSTEM_INFO, snd_seq_ioctl_system_info }, { SNDRV_SEQ_IOCTL_RUNNING_MODE, snd_seq_ioctl_running_mode }, { SNDRV_SEQ_IOCTL_GET_CLIENT_INFO, snd_seq_ioctl_get_client_info }, { SNDRV_SEQ_IOCTL_SET_CLIENT_INFO, snd_seq_ioctl_set_client_info }, { SNDRV_SEQ_IOCTL_CREATE_PORT, snd_seq_ioctl_create_port }, { SNDRV_SEQ_IOCTL_DELETE_PORT, snd_seq_ioctl_delete_port }, { SNDRV_SEQ_IOCTL_GET_PORT_INFO, snd_seq_ioctl_get_port_info }, { SNDRV_SEQ_IOCTL_SET_PORT_INFO, snd_seq_ioctl_set_port_info }, { SNDRV_SEQ_IOCTL_SUBSCRIBE_PORT, snd_seq_ioctl_subscribe_port }, { SNDRV_SEQ_IOCTL_UNSUBSCRIBE_PORT, snd_seq_ioctl_unsubscribe_port }, { SNDRV_SEQ_IOCTL_CREATE_QUEUE, snd_seq_ioctl_create_queue }, { SNDRV_SEQ_IOCTL_DELETE_QUEUE, snd_seq_ioctl_delete_queue }, { SNDRV_SEQ_IOCTL_GET_QUEUE_INFO, snd_seq_ioctl_get_queue_info }, { SNDRV_SEQ_IOCTL_SET_QUEUE_INFO, snd_seq_ioctl_set_queue_info }, { SNDRV_SEQ_IOCTL_GET_NAMED_QUEUE, snd_seq_ioctl_get_named_queue }, { SNDRV_SEQ_IOCTL_GET_QUEUE_STATUS, snd_seq_ioctl_get_queue_status }, { SNDRV_SEQ_IOCTL_GET_QUEUE_TEMPO, snd_seq_ioctl_get_queue_tempo }, { SNDRV_SEQ_IOCTL_SET_QUEUE_TEMPO, snd_seq_ioctl_set_queue_tempo }, { SNDRV_SEQ_IOCTL_GET_QUEUE_TIMER, snd_seq_ioctl_get_queue_timer }, { SNDRV_SEQ_IOCTL_SET_QUEUE_TIMER, snd_seq_ioctl_set_queue_timer }, { SNDRV_SEQ_IOCTL_GET_QUEUE_CLIENT, snd_seq_ioctl_get_queue_client }, { SNDRV_SEQ_IOCTL_SET_QUEUE_CLIENT, snd_seq_ioctl_set_queue_client }, { SNDRV_SEQ_IOCTL_GET_CLIENT_POOL, snd_seq_ioctl_get_client_pool }, { SNDRV_SEQ_IOCTL_SET_CLIENT_POOL, snd_seq_ioctl_set_client_pool }, { SNDRV_SEQ_IOCTL_GET_SUBSCRIPTION, snd_seq_ioctl_get_subscription }, { SNDRV_SEQ_IOCTL_QUERY_NEXT_CLIENT, snd_seq_ioctl_query_next_client }, { SNDRV_SEQ_IOCTL_QUERY_NEXT_PORT, snd_seq_ioctl_query_next_port }, { SNDRV_SEQ_IOCTL_REMOVE_EVENTS, snd_seq_ioctl_remove_events }, { SNDRV_SEQ_IOCTL_QUERY_SUBS, snd_seq_ioctl_query_subs }, { 0, NULL }, }; static long snd_seq_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct snd_seq_client *client = file->private_data; /* To use kernel stack for ioctl data. */ union { int pversion; int client_id; struct snd_seq_system_info system_info; struct snd_seq_running_info running_info; struct snd_seq_client_info client_info; struct snd_seq_port_info port_info; struct snd_seq_port_subscribe port_subscribe; struct snd_seq_queue_info queue_info; struct snd_seq_queue_status queue_status; struct snd_seq_queue_tempo tempo; struct snd_seq_queue_timer queue_timer; struct snd_seq_queue_client queue_client; struct snd_seq_client_pool client_pool; struct snd_seq_remove_events remove_events; struct snd_seq_query_subs query_subs; } buf; const struct ioctl_handler *handler; unsigned long size; int err; if (snd_BUG_ON(!client)) return -ENXIO; #if IS_ENABLED(CONFIG_SND_SEQ_UMP) /* exception - handling large data */ switch (cmd) { case SNDRV_SEQ_IOCTL_GET_CLIENT_UMP_INFO: case SNDRV_SEQ_IOCTL_SET_CLIENT_UMP_INFO: return snd_seq_ioctl_client_ump_info(client, cmd, arg); } #endif for (handler = ioctl_handlers; handler->cmd > 0; ++handler) { if (handler->cmd == cmd) break; } if (handler->cmd == 0) return -ENOTTY; memset(&buf, 0, sizeof(buf)); /* * All of ioctl commands for ALSA sequencer get an argument of size * within 13 bits. We can safely pick up the size from the command. */ size = _IOC_SIZE(handler->cmd); if (handler->cmd & IOC_IN) { if (copy_from_user(&buf, (const void __user *)arg, size)) return -EFAULT; } mutex_lock(&client->ioctl_mutex); err = handler->func(client, &buf); mutex_unlock(&client->ioctl_mutex); if (err >= 0) { /* Some commands includes a bug in 'dir' field. */ if (handler->cmd == SNDRV_SEQ_IOCTL_SET_QUEUE_CLIENT || handler->cmd == SNDRV_SEQ_IOCTL_SET_CLIENT_POOL || (handler->cmd & IOC_OUT)) if (copy_to_user((void __user *)arg, &buf, size)) return -EFAULT; } return err; } #ifdef CONFIG_COMPAT #include "seq_compat.c" #else #define snd_seq_ioctl_compat NULL #endif /* -------------------------------------------------------- */ /* exported to kernel modules */ int snd_seq_create_kernel_client(struct snd_card *card, int client_index, const char *name_fmt, ...) { struct snd_seq_client *client; va_list args; if (snd_BUG_ON(in_interrupt())) return -EBUSY; if (card && client_index >= SNDRV_SEQ_CLIENTS_PER_CARD) return -EINVAL; if (card == NULL && client_index >= SNDRV_SEQ_GLOBAL_CLIENTS) return -EINVAL; mutex_lock(®ister_mutex); if (card) { client_index += SNDRV_SEQ_GLOBAL_CLIENTS + card->number * SNDRV_SEQ_CLIENTS_PER_CARD; if (client_index >= SNDRV_SEQ_DYNAMIC_CLIENTS_BEGIN) client_index = -1; } /* empty write queue as default */ client = seq_create_client1(client_index, 0); if (client == NULL) { mutex_unlock(®ister_mutex); return -EBUSY; /* failure code */ } usage_alloc(&client_usage, 1); client->accept_input = 1; client->accept_output = 1; client->data.kernel.card = card; client->user_pversion = SNDRV_SEQ_VERSION; va_start(args, name_fmt); vsnprintf(client->name, sizeof(client->name), name_fmt, args); va_end(args); client->type = KERNEL_CLIENT; mutex_unlock(®ister_mutex); /* make others aware this new client */ snd_seq_system_client_ev_client_start(client->number); /* return client number to caller */ return client->number; } EXPORT_SYMBOL(snd_seq_create_kernel_client); /* exported to kernel modules */ int snd_seq_delete_kernel_client(int client) { struct snd_seq_client *ptr; if (snd_BUG_ON(in_interrupt())) return -EBUSY; ptr = clientptr(client); if (ptr == NULL) return -EINVAL; seq_free_client(ptr); kfree(ptr); return 0; } EXPORT_SYMBOL(snd_seq_delete_kernel_client); /* * exported, called by kernel clients to enqueue events (w/o blocking) * * RETURN VALUE: zero if succeed, negative if error */ int snd_seq_kernel_client_enqueue(int client, struct snd_seq_event *ev, struct file *file, bool blocking) { struct snd_seq_client *cptr; int result; if (snd_BUG_ON(!ev)) return -EINVAL; if (!snd_seq_ev_is_ump(ev)) { if (ev->type == SNDRV_SEQ_EVENT_NONE) return 0; /* ignore this */ if (ev->type == SNDRV_SEQ_EVENT_KERNEL_ERROR) return -EINVAL; /* quoted events can't be enqueued */ } /* fill in client number */ ev->source.client = client; if (check_event_type_and_length(ev)) return -EINVAL; cptr = snd_seq_client_use_ptr(client); if (cptr == NULL) return -EINVAL; if (!cptr->accept_output) { result = -EPERM; } else { /* send it */ mutex_lock(&cptr->ioctl_mutex); result = snd_seq_client_enqueue_event(cptr, ev, file, blocking, false, 0, &cptr->ioctl_mutex); mutex_unlock(&cptr->ioctl_mutex); } snd_seq_client_unlock(cptr); return result; } EXPORT_SYMBOL(snd_seq_kernel_client_enqueue); /* * exported, called by kernel clients to dispatch events directly to other * clients, bypassing the queues. Event time-stamp will be updated. * * RETURN VALUE: negative = delivery failed, * zero, or positive: the number of delivered events */ int snd_seq_kernel_client_dispatch(int client, struct snd_seq_event * ev, int atomic, int hop) { struct snd_seq_client *cptr; int result; if (snd_BUG_ON(!ev)) return -EINVAL; /* fill in client number */ ev->queue = SNDRV_SEQ_QUEUE_DIRECT; ev->source.client = client; if (check_event_type_and_length(ev)) return -EINVAL; cptr = snd_seq_client_use_ptr(client); if (cptr == NULL) return -EINVAL; if (!cptr->accept_output) result = -EPERM; else result = snd_seq_deliver_event(cptr, ev, atomic, hop); snd_seq_client_unlock(cptr); return result; } EXPORT_SYMBOL(snd_seq_kernel_client_dispatch); /** * snd_seq_kernel_client_ctl - operate a command for a client with data in * kernel space. * @clientid: A numerical ID for a client. * @cmd: An ioctl(2) command for ALSA sequencer operation. * @arg: A pointer to data in kernel space. * * Against its name, both kernel/application client can be handled by this * kernel API. A pointer of 'arg' argument should be in kernel space. * * Return: 0 at success. Negative error code at failure. */ int snd_seq_kernel_client_ctl(int clientid, unsigned int cmd, void *arg) { const struct ioctl_handler *handler; struct snd_seq_client *client; client = clientptr(clientid); if (client == NULL) return -ENXIO; for (handler = ioctl_handlers; handler->cmd > 0; ++handler) { if (handler->cmd == cmd) return handler->func(client, arg); } pr_debug("ALSA: seq unknown ioctl() 0x%x (type='%c', number=0x%02x)\n", cmd, _IOC_TYPE(cmd), _IOC_NR(cmd)); return -ENOTTY; } EXPORT_SYMBOL(snd_seq_kernel_client_ctl); /* exported (for OSS emulator) */ int snd_seq_kernel_client_write_poll(int clientid, struct file *file, poll_table *wait) { struct snd_seq_client *client; client = clientptr(clientid); if (client == NULL) return -ENXIO; if (! snd_seq_write_pool_allocated(client)) return 1; if (snd_seq_pool_poll_wait(client->pool, file, wait)) return 1; return 0; } EXPORT_SYMBOL(snd_seq_kernel_client_write_poll); /* get a sequencer client object; for internal use from a kernel client */ struct snd_seq_client *snd_seq_kernel_client_get(int id) { return snd_seq_client_use_ptr(id); } EXPORT_SYMBOL_GPL(snd_seq_kernel_client_get); /* put a sequencer client object; for internal use from a kernel client */ void snd_seq_kernel_client_put(struct snd_seq_client *cptr) { if (cptr) snd_seq_client_unlock(cptr); } EXPORT_SYMBOL_GPL(snd_seq_kernel_client_put); /*---------------------------------------------------------------------------*/ #ifdef CONFIG_SND_PROC_FS /* * /proc interface */ static void snd_seq_info_dump_subscribers(struct snd_info_buffer *buffer, struct snd_seq_port_subs_info *group, int is_src, char *msg) { struct list_head *p; struct snd_seq_subscribers *s; int count = 0; down_read(&group->list_mutex); if (list_empty(&group->list_head)) { up_read(&group->list_mutex); return; } snd_iprintf(buffer, msg); list_for_each(p, &group->list_head) { if (is_src) s = list_entry(p, struct snd_seq_subscribers, src_list); else s = list_entry(p, struct snd_seq_subscribers, dest_list); if (count++) snd_iprintf(buffer, ", "); snd_iprintf(buffer, "%d:%d", is_src ? s->info.dest.client : s->info.sender.client, is_src ? s->info.dest.port : s->info.sender.port); if (s->info.flags & SNDRV_SEQ_PORT_SUBS_TIMESTAMP) snd_iprintf(buffer, "[%c:%d]", ((s->info.flags & SNDRV_SEQ_PORT_SUBS_TIME_REAL) ? 'r' : 't'), s->info.queue); if (group->exclusive) snd_iprintf(buffer, "[ex]"); } up_read(&group->list_mutex); snd_iprintf(buffer, "\n"); } #define FLAG_PERM_RD(perm) ((perm) & SNDRV_SEQ_PORT_CAP_READ ? ((perm) & SNDRV_SEQ_PORT_CAP_SUBS_READ ? 'R' : 'r') : '-') #define FLAG_PERM_WR(perm) ((perm) & SNDRV_SEQ_PORT_CAP_WRITE ? ((perm) & SNDRV_SEQ_PORT_CAP_SUBS_WRITE ? 'W' : 'w') : '-') #define FLAG_PERM_EX(perm) ((perm) & SNDRV_SEQ_PORT_CAP_NO_EXPORT ? '-' : 'e') #define FLAG_PERM_DUPLEX(perm) ((perm) & SNDRV_SEQ_PORT_CAP_DUPLEX ? 'X' : '-') static const char *port_direction_name(unsigned char dir) { static const char *names[4] = { "-", "In", "Out", "In/Out" }; if (dir > SNDRV_SEQ_PORT_DIR_BIDIRECTION) return "Invalid"; return names[dir]; } static void snd_seq_info_dump_ports(struct snd_info_buffer *buffer, struct snd_seq_client *client) { struct snd_seq_client_port *p; mutex_lock(&client->ports_mutex); list_for_each_entry(p, &client->ports_list_head, list) { if (p->capability & SNDRV_SEQ_PORT_CAP_INACTIVE) continue; snd_iprintf(buffer, " Port %3d : \"%s\" (%c%c%c%c) [%s]", p->addr.port, p->name, FLAG_PERM_RD(p->capability), FLAG_PERM_WR(p->capability), FLAG_PERM_EX(p->capability), FLAG_PERM_DUPLEX(p->capability), port_direction_name(p->direction)); #if IS_ENABLED(CONFIG_SND_SEQ_UMP) if (snd_seq_client_is_midi2(client) && p->is_midi1) snd_iprintf(buffer, " [MIDI1]"); #endif snd_iprintf(buffer, "\n"); snd_seq_info_dump_subscribers(buffer, &p->c_src, 1, " Connecting To: "); snd_seq_info_dump_subscribers(buffer, &p->c_dest, 0, " Connected From: "); } mutex_unlock(&client->ports_mutex); } static const char *midi_version_string(unsigned int version) { switch (version) { case SNDRV_SEQ_CLIENT_LEGACY_MIDI: return "Legacy"; case SNDRV_SEQ_CLIENT_UMP_MIDI_1_0: return "UMP MIDI1"; case SNDRV_SEQ_CLIENT_UMP_MIDI_2_0: return "UMP MIDI2"; default: return "Unknown"; } } /* exported to seq_info.c */ void snd_seq_info_clients_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { int c; struct snd_seq_client *client; snd_iprintf(buffer, "Client info\n"); snd_iprintf(buffer, " cur clients : %d\n", client_usage.cur); snd_iprintf(buffer, " peak clients : %d\n", client_usage.peak); snd_iprintf(buffer, " max clients : %d\n", SNDRV_SEQ_MAX_CLIENTS); snd_iprintf(buffer, "\n"); /* list the client table */ for (c = 0; c < SNDRV_SEQ_MAX_CLIENTS; c++) { client = snd_seq_client_use_ptr(c); if (client == NULL) continue; if (client->type == NO_CLIENT) { snd_seq_client_unlock(client); continue; } snd_iprintf(buffer, "Client %3d : \"%s\" [%s %s]\n", c, client->name, client->type == USER_CLIENT ? "User" : "Kernel", midi_version_string(client->midi_version)); #if IS_ENABLED(CONFIG_SND_SEQ_UMP) dump_ump_info(buffer, client); #endif snd_seq_info_dump_ports(buffer, client); if (snd_seq_write_pool_allocated(client)) { snd_iprintf(buffer, " Output pool :\n"); snd_seq_info_pool(buffer, client->pool, " "); } if (client->type == USER_CLIENT && client->data.user.fifo && client->data.user.fifo->pool) { snd_iprintf(buffer, " Input pool :\n"); snd_seq_info_pool(buffer, client->data.user.fifo->pool, " "); } snd_seq_client_unlock(client); } } #endif /* CONFIG_SND_PROC_FS */ /*---------------------------------------------------------------------------*/ /* * REGISTRATION PART */ static const struct file_operations snd_seq_f_ops = { .owner = THIS_MODULE, .read = snd_seq_read, .write = snd_seq_write, .open = snd_seq_open, .release = snd_seq_release, .poll = snd_seq_poll, .unlocked_ioctl = snd_seq_ioctl, .compat_ioctl = snd_seq_ioctl_compat, }; static struct device *seq_dev; /* * register sequencer device */ int __init snd_sequencer_device_init(void) { int err; err = snd_device_alloc(&seq_dev, NULL); if (err < 0) return err; dev_set_name(seq_dev, "seq"); mutex_lock(®ister_mutex); err = snd_register_device(SNDRV_DEVICE_TYPE_SEQUENCER, NULL, 0, &snd_seq_f_ops, NULL, seq_dev); mutex_unlock(®ister_mutex); if (err < 0) { put_device(seq_dev); return err; } return 0; } /* * unregister sequencer device */ void snd_sequencer_device_done(void) { snd_unregister_device(seq_dev); put_device(seq_dev); } |
1 1 1 2 1 2 1 1 1 1 3 1 2 3 2 1 28 1 27 1 8 16 3 14 2 13 1 9 4 9 6 2 1 3 3 3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 | // SPDX-License-Identifier: GPL-2.0-or-later #include <net/genetlink.h> #include "br_private.h" #include "br_private_cfm.h" static const struct nla_policy br_cfm_mep_create_policy[IFLA_BRIDGE_CFM_MEP_CREATE_MAX + 1] = { [IFLA_BRIDGE_CFM_MEP_CREATE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX] = { .type = NLA_U32 }, }; static const struct nla_policy br_cfm_mep_delete_policy[IFLA_BRIDGE_CFM_MEP_DELETE_MAX + 1] = { [IFLA_BRIDGE_CFM_MEP_DELETE_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE] = { .type = NLA_U32 }, }; static const struct nla_policy br_cfm_mep_config_policy[IFLA_BRIDGE_CFM_MEP_CONFIG_MAX + 1] = { [IFLA_BRIDGE_CFM_MEP_CONFIG_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC] = NLA_POLICY_ETH_ADDR, [IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL] = NLA_POLICY_MAX(NLA_U32, 7), [IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID] = NLA_POLICY_MAX(NLA_U32, 0x1FFF), }; static const struct nla_policy br_cfm_cc_config_policy[IFLA_BRIDGE_CFM_CC_CONFIG_MAX + 1] = { [IFLA_BRIDGE_CFM_CC_CONFIG_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID] = { .type = NLA_BINARY, .len = CFM_MAID_LENGTH }, }; static const struct nla_policy br_cfm_cc_peer_mep_policy[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1] = { [IFLA_BRIDGE_CFM_CC_PEER_MEP_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_PEER_MEPID] = NLA_POLICY_MAX(NLA_U32, 0x1FFF), }; static const struct nla_policy br_cfm_cc_rdi_policy[IFLA_BRIDGE_CFM_CC_RDI_MAX + 1] = { [IFLA_BRIDGE_CFM_CC_RDI_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_CC_RDI_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_RDI_RDI] = { .type = NLA_U32 }, }; static const struct nla_policy br_cfm_cc_ccm_tx_policy[IFLA_BRIDGE_CFM_CC_CCM_TX_MAX + 1] = { [IFLA_BRIDGE_CFM_CC_CCM_TX_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC] = NLA_POLICY_ETH_ADDR, [IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE] = { .type = NLA_U8 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV] = { .type = NLA_U32 }, [IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE] = { .type = NLA_U8 }, }; static const struct nla_policy br_cfm_policy[IFLA_BRIDGE_CFM_MAX + 1] = { [IFLA_BRIDGE_CFM_UNSPEC] = { .type = NLA_REJECT }, [IFLA_BRIDGE_CFM_MEP_CREATE] = NLA_POLICY_NESTED(br_cfm_mep_create_policy), [IFLA_BRIDGE_CFM_MEP_DELETE] = NLA_POLICY_NESTED(br_cfm_mep_delete_policy), [IFLA_BRIDGE_CFM_MEP_CONFIG] = NLA_POLICY_NESTED(br_cfm_mep_config_policy), [IFLA_BRIDGE_CFM_CC_CONFIG] = NLA_POLICY_NESTED(br_cfm_cc_config_policy), [IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD] = NLA_POLICY_NESTED(br_cfm_cc_peer_mep_policy), [IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE] = NLA_POLICY_NESTED(br_cfm_cc_peer_mep_policy), [IFLA_BRIDGE_CFM_CC_RDI] = NLA_POLICY_NESTED(br_cfm_cc_rdi_policy), [IFLA_BRIDGE_CFM_CC_CCM_TX] = NLA_POLICY_NESTED(br_cfm_cc_ccm_tx_policy), }; static int br_mep_create_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_CREATE_MAX + 1]; struct br_cfm_mep_create create; u32 instance; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_CREATE_MAX, attr, br_cfm_mep_create_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN]) { NL_SET_ERR_MSG_MOD(extack, "Missing DOMAIN attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION]) { NL_SET_ERR_MSG_MOD(extack, "Missing DIRECTION attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX]) { NL_SET_ERR_MSG_MOD(extack, "Missing IFINDEX attribute"); return -EINVAL; } memset(&create, 0, sizeof(create)); instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE]); create.domain = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN]); create.direction = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION]); create.ifindex = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX]); return br_cfm_mep_create(br, instance, &create, extack); } static int br_mep_delete_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_DELETE_MAX + 1]; u32 instance; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_DELETE_MAX, attr, br_cfm_mep_delete_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_DELETE_INSTANCE]); return br_cfm_mep_delete(br, instance, extack); } static int br_mep_config_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MAX + 1]; struct br_cfm_mep_config config; u32 instance; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MEP_CONFIG_MAX, attr, br_cfm_mep_config_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC]) { NL_SET_ERR_MSG_MOD(extack, "Missing UNICAST_MAC attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL]) { NL_SET_ERR_MSG_MOD(extack, "Missing MDLEVEL attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID]) { NL_SET_ERR_MSG_MOD(extack, "Missing MEPID attribute"); return -EINVAL; } memset(&config, 0, sizeof(config)); instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE]); nla_memcpy(&config.unicast_mac.addr, tb[IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC], sizeof(config.unicast_mac.addr)); config.mdlevel = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL]); config.mepid = nla_get_u32(tb[IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID]); return br_cfm_mep_config_set(br, instance, &config, extack); } static int br_cc_config_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_CC_CONFIG_MAX + 1]; struct br_cfm_cc_config config; u32 instance; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_CONFIG_MAX, attr, br_cfm_cc_config_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE]) { NL_SET_ERR_MSG_MOD(extack, "Missing ENABLE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL]) { NL_SET_ERR_MSG_MOD(extack, "Missing INTERVAL attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID]) { NL_SET_ERR_MSG_MOD(extack, "Missing MAID attribute"); return -EINVAL; } memset(&config, 0, sizeof(config)); instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE]); config.enable = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE]); config.exp_interval = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL]); nla_memcpy(&config.exp_maid.data, tb[IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID], sizeof(config.exp_maid.data)); return br_cfm_cc_config_set(br, instance, &config, extack); } static int br_cc_peer_mep_add_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1]; u32 instance, peer_mep_id; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, attr, br_cfm_cc_peer_mep_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]) { NL_SET_ERR_MSG_MOD(extack, "Missing PEER_MEP_ID attribute"); return -EINVAL; } instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]); peer_mep_id = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]); return br_cfm_cc_peer_mep_add(br, instance, peer_mep_id, extack); } static int br_cc_peer_mep_remove_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX + 1]; u32 instance, peer_mep_id; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_PEER_MEP_MAX, attr, br_cfm_cc_peer_mep_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]) { NL_SET_ERR_MSG_MOD(extack, "Missing PEER_MEP_ID attribute"); return -EINVAL; } instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE]); peer_mep_id = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_PEER_MEPID]); return br_cfm_cc_peer_mep_remove(br, instance, peer_mep_id, extack); } static int br_cc_rdi_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_CC_RDI_MAX + 1]; u32 instance, rdi; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_RDI_MAX, attr, br_cfm_cc_rdi_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_RDI_RDI]) { NL_SET_ERR_MSG_MOD(extack, "Missing RDI attribute"); return -EINVAL; } instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_INSTANCE]); rdi = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_RDI_RDI]); return br_cfm_cc_rdi_set(br, instance, rdi, extack); } static int br_cc_ccm_tx_parse(struct net_bridge *br, struct nlattr *attr, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_CC_CCM_TX_MAX + 1]; struct br_cfm_cc_ccm_tx_info tx_info; u32 instance; int err; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_CC_CCM_TX_MAX, attr, br_cfm_cc_ccm_tx_policy, extack); if (err) return err; if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]) { NL_SET_ERR_MSG_MOD(extack, "Missing INSTANCE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC]) { NL_SET_ERR_MSG_MOD(extack, "Missing DMAC attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE]) { NL_SET_ERR_MSG_MOD(extack, "Missing SEQ_NO_UPDATE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD]) { NL_SET_ERR_MSG_MOD(extack, "Missing PERIOD attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV]) { NL_SET_ERR_MSG_MOD(extack, "Missing IF_TLV attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE]) { NL_SET_ERR_MSG_MOD(extack, "Missing IF_TLV_VALUE attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV]) { NL_SET_ERR_MSG_MOD(extack, "Missing PORT_TLV attribute"); return -EINVAL; } if (!tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE]) { NL_SET_ERR_MSG_MOD(extack, "Missing PORT_TLV_VALUE attribute"); return -EINVAL; } memset(&tx_info, 0, sizeof(tx_info)); instance = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE]); nla_memcpy(&tx_info.dmac.addr, tb[IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC], sizeof(tx_info.dmac.addr)); tx_info.seq_no_update = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE]); tx_info.period = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD]); tx_info.if_tlv = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV]); tx_info.if_tlv_value = nla_get_u8(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE]); tx_info.port_tlv = nla_get_u32(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV]); tx_info.port_tlv_value = nla_get_u8(tb[IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE]); return br_cfm_cc_ccm_tx(br, instance, &tx_info, extack); } int br_cfm_parse(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *attr, int cmd, struct netlink_ext_ack *extack) { struct nlattr *tb[IFLA_BRIDGE_CFM_MAX + 1]; int err; /* When this function is called for a port then the br pointer is * invalid, therefor set the br to point correctly */ if (p) br = p->br; err = nla_parse_nested(tb, IFLA_BRIDGE_CFM_MAX, attr, br_cfm_policy, extack); if (err) return err; if (tb[IFLA_BRIDGE_CFM_MEP_CREATE]) { err = br_mep_create_parse(br, tb[IFLA_BRIDGE_CFM_MEP_CREATE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_MEP_DELETE]) { err = br_mep_delete_parse(br, tb[IFLA_BRIDGE_CFM_MEP_DELETE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_MEP_CONFIG]) { err = br_mep_config_parse(br, tb[IFLA_BRIDGE_CFM_MEP_CONFIG], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_CC_CONFIG]) { err = br_cc_config_parse(br, tb[IFLA_BRIDGE_CFM_CC_CONFIG], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD]) { err = br_cc_peer_mep_add_parse(br, tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_ADD], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE]) { err = br_cc_peer_mep_remove_parse(br, tb[IFLA_BRIDGE_CFM_CC_PEER_MEP_REMOVE], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_CC_RDI]) { err = br_cc_rdi_parse(br, tb[IFLA_BRIDGE_CFM_CC_RDI], extack); if (err) return err; } if (tb[IFLA_BRIDGE_CFM_CC_CCM_TX]) { err = br_cc_ccm_tx_parse(br, tb[IFLA_BRIDGE_CFM_CC_CCM_TX], extack); if (err) return err; } return 0; } int br_cfm_config_fill_info(struct sk_buff *skb, struct net_bridge *br) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; struct nlattr *tb; hlist_for_each_entry_rcu(mep, &br->mep_list, head) { tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_CREATE_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_DOMAIN, mep->create.domain)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_DIRECTION, mep->create.direction)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CREATE_IFINDEX, mep->create.ifindex)) goto nla_put_failure; nla_nest_end(skb, tb); tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_UNICAST_MAC, sizeof(mep->config.unicast_mac.addr), mep->config.unicast_mac.addr)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_MDLEVEL, mep->config.mdlevel)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_CONFIG_MEPID, mep->config.mepid)) goto nla_put_failure; nla_nest_end(skb, tb); tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_CONFIG_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_ENABLE, mep->cc_config.enable)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CONFIG_EXP_INTERVAL, mep->cc_config.exp_interval)) goto nla_put_failure; if (nla_put(skb, IFLA_BRIDGE_CFM_CC_CONFIG_EXP_MAID, sizeof(mep->cc_config.exp_maid.data), mep->cc_config.exp_maid.data)) goto nla_put_failure; nla_nest_end(skb, tb); tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_RDI_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_RDI_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_RDI_RDI, mep->rdi)) goto nla_put_failure; nla_nest_end(skb, tb); tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_DMAC, sizeof(mep->cc_ccm_tx_info.dmac), mep->cc_ccm_tx_info.dmac.addr)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_SEQ_NO_UPDATE, mep->cc_ccm_tx_info.seq_no_update)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PERIOD, mep->cc_ccm_tx_info.period)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV, mep->cc_ccm_tx_info.if_tlv)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_IF_TLV_VALUE, mep->cc_ccm_tx_info.if_tlv_value)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV, mep->cc_ccm_tx_info.port_tlv)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_CCM_TX_PORT_TLV_VALUE, mep->cc_ccm_tx_info.port_tlv_value)) goto nla_put_failure; nla_nest_end(skb, tb); hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) { tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_PEER_MEP_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_MEP_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_MEPID, peer_mep->mepid)) goto nla_put_failure; nla_nest_end(skb, tb); } } return 0; nla_put_failure: nla_nest_cancel(skb, tb); nla_info_failure: return -EMSGSIZE; } int br_cfm_status_fill_info(struct sk_buff *skb, struct net_bridge *br, bool getlink) { struct br_cfm_peer_mep *peer_mep; struct br_cfm_mep *mep; struct nlattr *tb; hlist_for_each_entry_rcu(mep, &br->mep_list, head) { tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_MEP_STATUS_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_STATUS_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_STATUS_OPCODE_UNEXP_SEEN, mep->status.opcode_unexp_seen)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_STATUS_VERSION_UNEXP_SEEN, mep->status.version_unexp_seen)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_MEP_STATUS_RX_LEVEL_LOW_SEEN, mep->status.rx_level_low_seen)) goto nla_put_failure; /* Only clear if this is a GETLINK */ if (getlink) { /* Clear all 'seen' indications */ mep->status.opcode_unexp_seen = false; mep->status.version_unexp_seen = false; mep->status.rx_level_low_seen = false; } nla_nest_end(skb, tb); hlist_for_each_entry_rcu(peer_mep, &mep->peer_mep_list, head) { tb = nla_nest_start(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_INFO); if (!tb) goto nla_info_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_INSTANCE, mep->instance)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_PEER_MEPID, peer_mep->mepid)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_CCM_DEFECT, peer_mep->cc_status.ccm_defect)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_RDI, peer_mep->cc_status.rdi)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_PORT_TLV_VALUE, peer_mep->cc_status.port_tlv_value)) goto nla_put_failure; if (nla_put_u8(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_IF_TLV_VALUE, peer_mep->cc_status.if_tlv_value)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEEN, peer_mep->cc_status.seen)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_TLV_SEEN, peer_mep->cc_status.tlv_seen)) goto nla_put_failure; if (nla_put_u32(skb, IFLA_BRIDGE_CFM_CC_PEER_STATUS_SEQ_UNEXP_SEEN, peer_mep->cc_status.seq_unexp_seen)) goto nla_put_failure; if (getlink) { /* Only clear if this is a GETLINK */ /* Clear all 'seen' indications */ peer_mep->cc_status.seen = false; peer_mep->cc_status.tlv_seen = false; peer_mep->cc_status.seq_unexp_seen = false; } nla_nest_end(skb, tb); } } return 0; nla_put_failure: nla_nest_cancel(skb, tb); nla_info_failure: return -EMSGSIZE; } |
46 46 2 2 2 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 | // SPDX-License-Identifier: GPL-2.0-only /* * kernel/freezer.c - Function to freeze a process * * Originally from kernel/power/process.c */ #include <linux/interrupt.h> #include <linux/suspend.h> #include <linux/export.h> #include <linux/syscalls.h> #include <linux/freezer.h> #include <linux/kthread.h> /* total number of freezing conditions in effect */ DEFINE_STATIC_KEY_FALSE(freezer_active); EXPORT_SYMBOL(freezer_active); /* * indicate whether PM freezing is in effect, protected by * system_transition_mutex */ bool pm_freezing; bool pm_nosig_freezing; /* protects freezing and frozen transitions */ static DEFINE_SPINLOCK(freezer_lock); /** * freezing_slow_path - slow path for testing whether a task needs to be frozen * @p: task to be tested * * This function is called by freezing() if freezer_active isn't zero * and tests whether @p needs to enter and stay in frozen state. Can be * called under any context. The freezers are responsible for ensuring the * target tasks see the updated state. */ bool freezing_slow_path(struct task_struct *p) { if (p->flags & (PF_NOFREEZE | PF_SUSPEND_TASK)) return false; if (test_tsk_thread_flag(p, TIF_MEMDIE)) return false; if (pm_nosig_freezing || cgroup_freezing(p)) return true; if (pm_freezing && !(p->flags & PF_KTHREAD)) return true; return false; } EXPORT_SYMBOL(freezing_slow_path); bool frozen(struct task_struct *p) { return READ_ONCE(p->__state) & TASK_FROZEN; } /* Refrigerator is place where frozen processes are stored :-). */ bool __refrigerator(bool check_kthr_stop) { unsigned int state = get_current_state(); bool was_frozen = false; pr_debug("%s entered refrigerator\n", current->comm); WARN_ON_ONCE(state && !(state & TASK_NORMAL)); for (;;) { bool freeze; raw_spin_lock_irq(¤t->pi_lock); WRITE_ONCE(current->__state, TASK_FROZEN); /* unstale saved_state so that __thaw_task() will wake us up */ current->saved_state = TASK_RUNNING; raw_spin_unlock_irq(¤t->pi_lock); spin_lock_irq(&freezer_lock); freeze = freezing(current) && !(check_kthr_stop && kthread_should_stop()); spin_unlock_irq(&freezer_lock); if (!freeze) break; was_frozen = true; schedule(); } __set_current_state(TASK_RUNNING); pr_debug("%s left refrigerator\n", current->comm); return was_frozen; } EXPORT_SYMBOL(__refrigerator); static void fake_signal_wake_up(struct task_struct *p) { unsigned long flags; if (lock_task_sighand(p, &flags)) { signal_wake_up(p, 0); unlock_task_sighand(p, &flags); } } static int __set_task_frozen(struct task_struct *p, void *arg) { unsigned int state = READ_ONCE(p->__state); /* * Allow freezing the sched_delayed tasks; they will not execute until * ttwu() fixes them up, so it is safe to swap their state now, instead * of waiting for them to get fully dequeued. */ if (task_is_runnable(p)) return 0; if (p != current && task_curr(p)) return 0; if (!(state & (TASK_FREEZABLE | __TASK_STOPPED | __TASK_TRACED))) return 0; /* * Only TASK_NORMAL can be augmented with TASK_FREEZABLE, since they * can suffer spurious wakeups. */ if (state & TASK_FREEZABLE) WARN_ON_ONCE(!(state & TASK_NORMAL)); #ifdef CONFIG_LOCKDEP /* * It's dangerous to freeze with locks held; there be dragons there. */ if (!(state & __TASK_FREEZABLE_UNSAFE)) WARN_ON_ONCE(debug_locks && p->lockdep_depth); #endif p->saved_state = p->__state; WRITE_ONCE(p->__state, TASK_FROZEN); return TASK_FROZEN; } static bool __freeze_task(struct task_struct *p) { /* TASK_FREEZABLE|TASK_STOPPED|TASK_TRACED -> TASK_FROZEN */ return task_call_func(p, __set_task_frozen, NULL); } /** * freeze_task - send a freeze request to given task * @p: task to send the request to * * If @p is freezing, the freeze request is sent either by sending a fake * signal (if it's not a kernel thread) or waking it up (if it's a kernel * thread). * * RETURNS: * %false, if @p is not freezing or already frozen; %true, otherwise */ bool freeze_task(struct task_struct *p) { unsigned long flags; spin_lock_irqsave(&freezer_lock, flags); if (!freezing(p) || frozen(p) || __freeze_task(p)) { spin_unlock_irqrestore(&freezer_lock, flags); return false; } if (!(p->flags & PF_KTHREAD)) fake_signal_wake_up(p); else wake_up_state(p, TASK_NORMAL); spin_unlock_irqrestore(&freezer_lock, flags); return true; } /* * Restore the saved_state before the task entered freezer. For typical task * in the __refrigerator(), saved_state == TASK_RUNNING so nothing happens * here. For tasks which were TASK_NORMAL | TASK_FREEZABLE, their initial state * is restored unless they got an expected wakeup (see ttwu_state_match()). * Returns 1 if the task state was restored. */ static int __restore_freezer_state(struct task_struct *p, void *arg) { unsigned int state = p->saved_state; if (state != TASK_RUNNING) { WRITE_ONCE(p->__state, state); p->saved_state = TASK_RUNNING; return 1; } return 0; } void __thaw_task(struct task_struct *p) { unsigned long flags; spin_lock_irqsave(&freezer_lock, flags); if (WARN_ON_ONCE(freezing(p))) goto unlock; if (!frozen(p) || task_call_func(p, __restore_freezer_state, NULL)) goto unlock; wake_up_state(p, TASK_FROZEN); unlock: spin_unlock_irqrestore(&freezer_lock, flags); } /** * set_freezable - make %current freezable * * Mark %current freezable and enter refrigerator if necessary. */ bool set_freezable(void) { might_sleep(); /* * Modify flags while holding freezer_lock. This ensures the * freezer notices that we aren't frozen yet or the freezing * condition is visible to try_to_freeze() below. */ spin_lock_irq(&freezer_lock); current->flags &= ~PF_NOFREEZE; spin_unlock_irq(&freezer_lock); return try_to_freeze(); } EXPORT_SYMBOL(set_freezable); |
11 1 1 11 7 1 4 6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 | // SPDX-License-Identifier: GPL-2.0-or-later /* * Cryptographic API. * * CRC32C chksum * *@Article{castagnoli-crc, * author = { Guy Castagnoli and Stefan Braeuer and Martin Herrman}, * title = {{Optimization of Cyclic Redundancy-Check Codes with 24 * and 32 Parity Bits}}, * journal = IEEE Transactions on Communication, * year = {1993}, * volume = {41}, * number = {6}, * pages = {}, * month = {June}, *} * Used by the iSCSI driver, possibly others, and derived from * the iscsi-crc.c module of the linux-iscsi driver at * http://linux-iscsi.sourceforge.net. * * Following the example of lib/crc32, this function is intended to be * flexible and useful for all users. Modules that currently have their * own crc32c, but hopefully may be able to use this one are: * net/sctp (please add all your doco to here if you change to * use this one!) * <endoflist> * * Copyright (c) 2004 Cisco Systems, Inc. * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> */ #include <linux/unaligned.h> #include <crypto/internal/hash.h> #include <linux/init.h> #include <linux/module.h> #include <linux/string.h> #include <linux/kernel.h> #include <linux/crc32.h> #define CHKSUM_BLOCK_SIZE 1 #define CHKSUM_DIGEST_SIZE 4 struct chksum_ctx { u32 key; }; struct chksum_desc_ctx { u32 crc; }; /* * Steps through buffer one byte at a time, calculates reflected * crc using table. */ static int chksum_init(struct shash_desc *desc) { struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); ctx->crc = mctx->key; return 0; } /* * Setting the seed allows arbitrary accumulators and flexible XOR policy * If your algorithm starts with ~0, then XOR with ~0 before you set * the seed. */ static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct chksum_ctx *mctx = crypto_shash_ctx(tfm); if (keylen != sizeof(mctx->key)) return -EINVAL; mctx->key = get_unaligned_le32(key); return 0; } static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); ctx->crc = __crc32c_le_base(ctx->crc, data, length); return 0; } static int chksum_update_arch(struct shash_desc *desc, const u8 *data, unsigned int length) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); ctx->crc = __crc32c_le(ctx->crc, data, length); return 0; } static int chksum_final(struct shash_desc *desc, u8 *out) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); put_unaligned_le32(~ctx->crc, out); return 0; } static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { put_unaligned_le32(~__crc32c_le_base(*crcp, data, len), out); return 0; } static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len, u8 *out) { put_unaligned_le32(~__crc32c_le(*crcp, data, len), out); return 0; } static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); return __chksum_finup(&ctx->crc, data, len, out); } static int chksum_finup_arch(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); return __chksum_finup_arch(&ctx->crc, data, len, out); } static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); return __chksum_finup(&mctx->key, data, length, out); } static int chksum_digest_arch(struct shash_desc *desc, const u8 *data, unsigned int length, u8 *out) { struct chksum_ctx *mctx = crypto_shash_ctx(desc->tfm); return __chksum_finup_arch(&mctx->key, data, length, out); } static int crc32c_cra_init(struct crypto_tfm *tfm) { struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); mctx->key = ~0; return 0; } static struct shash_alg algs[] = {{ .digestsize = CHKSUM_DIGEST_SIZE, .setkey = chksum_setkey, .init = chksum_init, .update = chksum_update, .final = chksum_final, .finup = chksum_finup, .digest = chksum_digest, .descsize = sizeof(struct chksum_desc_ctx), .base.cra_name = "crc32c", .base.cra_driver_name = "crc32c-generic", .base.cra_priority = 100, .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .base.cra_blocksize = CHKSUM_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct chksum_ctx), .base.cra_module = THIS_MODULE, .base.cra_init = crc32c_cra_init, }, { .digestsize = CHKSUM_DIGEST_SIZE, .setkey = chksum_setkey, .init = chksum_init, .update = chksum_update_arch, .final = chksum_final, .finup = chksum_finup_arch, .digest = chksum_digest_arch, .descsize = sizeof(struct chksum_desc_ctx), .base.cra_name = "crc32c", .base.cra_driver_name = "crc32c-" __stringify(ARCH), .base.cra_priority = 150, .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY, .base.cra_blocksize = CHKSUM_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct chksum_ctx), .base.cra_module = THIS_MODULE, .base.cra_init = crc32c_cra_init, }}; static int __init crc32c_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ return crypto_register_shashes(algs, 1 + (&__crc32c_le != &__crc32c_le_base)); } static void __exit crc32c_mod_fini(void) { crypto_unregister_shashes(algs, 1 + (&__crc32c_le != &__crc32c_le_base)); } subsys_initcall(crc32c_mod_init); module_exit(crc32c_mod_fini); MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>"); MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c"); MODULE_LICENSE("GPL"); MODULE_ALIAS_CRYPTO("crc32c"); MODULE_ALIAS_CRYPTO("crc32c-generic"); |
1 1 1 1 1 1 1 1 1 1 1 1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 | // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2003-2013 Jozsef Kadlecsik <kadlec@netfilter.org> */ /* Kernel module implementing an IP set type: the hash:net type */ #include <linux/jhash.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/ipset/pfxlen.h> #include <linux/netfilter/ipset/ip_set.h> #include <linux/netfilter/ipset/ip_set_hash.h> #define IPSET_TYPE_REV_MIN 0 /* 1 Range as input support for IPv4 added */ /* 2 nomatch flag support added */ /* 3 Counters support added */ /* 4 Comments support added */ /* 5 Forceadd support added */ /* 6 skbinfo support added */ #define IPSET_TYPE_REV_MAX 7 /* bucketsize, initval support added */ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>"); IP_SET_MODULE_DESC("hash:net", IPSET_TYPE_REV_MIN, IPSET_TYPE_REV_MAX); MODULE_ALIAS("ip_set_hash:net"); /* Type specific function prefix */ #define HTYPE hash_net #define IP_SET_HASH_WITH_NETS /* IPv4 variant */ /* Member elements */ struct hash_net4_elem { __be32 ip; u16 padding0; u8 nomatch; u8 cidr; }; /* Common functions */ static bool hash_net4_data_equal(const struct hash_net4_elem *ip1, const struct hash_net4_elem *ip2, u32 *multi) { return ip1->ip == ip2->ip && ip1->cidr == ip2->cidr; } static int hash_net4_do_data_match(const struct hash_net4_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_net4_data_set_flags(struct hash_net4_elem *elem, u32 flags) { elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; } static void hash_net4_data_reset_flags(struct hash_net4_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_net4_data_netmask(struct hash_net4_elem *elem, u8 cidr) { elem->ip &= ip_set_netmask(cidr); elem->cidr = cidr; } static bool hash_net4_data_list(struct sk_buff *skb, const struct hash_net4_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) || nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_net4_data_next(struct hash_net4_elem *next, const struct hash_net4_elem *d) { next->ip = d->ip; } #define MTYPE hash_net4 #define HOST_MASK 32 #include "ip_set_hash_gen.h" static int hash_net4_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_net4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) e.cidr = HOST_MASK; ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip); e.ip &= ip_set_netmask(e.cidr); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { struct hash_net4 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net4_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); u32 ip = 0, ip_to = 0, i = 0; int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP], &ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (tb[IPSET_ATTR_CIDR]) { e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!e.cidr || e.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) { e.ip = htonl(ip & ip_set_hostmask(e.cidr)); ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP_TO], &ip_to); if (ret) return ret; if (ip_to < ip) swap(ip, ip_to); if (ip + UINT_MAX == ip_to) return -IPSET_ERR_HASH_RANGE; } if (retried) ip = ntohl(h->next.ip); do { i++; e.ip = htonl(ip); if (i > IPSET_MAX_RANGE) { hash_net4_data_next(&h->next, &e); return -ERANGE; } ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); if (ret && !ip_set_eexist(ret, flags)) return ret; ret = 0; } while (ip++ < ip_to); return ret; } /* IPv6 variant */ struct hash_net6_elem { union nf_inet_addr ip; u16 padding0; u8 nomatch; u8 cidr; }; /* Common functions */ static bool hash_net6_data_equal(const struct hash_net6_elem *ip1, const struct hash_net6_elem *ip2, u32 *multi) { return ipv6_addr_equal(&ip1->ip.in6, &ip2->ip.in6) && ip1->cidr == ip2->cidr; } static int hash_net6_do_data_match(const struct hash_net6_elem *elem) { return elem->nomatch ? -ENOTEMPTY : 1; } static void hash_net6_data_set_flags(struct hash_net6_elem *elem, u32 flags) { elem->nomatch = (flags >> 16) & IPSET_FLAG_NOMATCH; } static void hash_net6_data_reset_flags(struct hash_net6_elem *elem, u8 *flags) { swap(*flags, elem->nomatch); } static void hash_net6_data_netmask(struct hash_net6_elem *elem, u8 cidr) { ip6_netmask(&elem->ip, cidr); elem->cidr = cidr; } static bool hash_net6_data_list(struct sk_buff *skb, const struct hash_net6_elem *data) { u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0; if (nla_put_ipaddr6(skb, IPSET_ATTR_IP, &data->ip.in6) || nla_put_u8(skb, IPSET_ATTR_CIDR, data->cidr) || (flags && nla_put_net32(skb, IPSET_ATTR_CADT_FLAGS, htonl(flags)))) goto nla_put_failure; return false; nla_put_failure: return true; } static void hash_net6_data_next(struct hash_net6_elem *next, const struct hash_net6_elem *d) { } #undef MTYPE #undef HOST_MASK #define MTYPE hash_net6 #define HOST_MASK 128 #define IP_SET_EMIT_CREATE #include "ip_set_hash_gen.h" static int hash_net6_kadt(struct ip_set *set, const struct sk_buff *skb, const struct xt_action_param *par, enum ipset_adt adt, struct ip_set_adt_opt *opt) { const struct hash_net6 *h = set->data; ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { .cidr = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK), }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); if (e.cidr == 0) return -EINVAL; if (adt == IPSET_TEST) e.cidr = HOST_MASK; ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6); ip6_netmask(&e.ip, e.cidr); return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); } static int hash_net6_uadt(struct ip_set *set, struct nlattr *tb[], enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) { ipset_adtfn adtfn = set->variant->adt[adt]; struct hash_net6_elem e = { .cidr = HOST_MASK }; struct ip_set_ext ext = IP_SET_INIT_UEXT(set); int ret; if (tb[IPSET_ATTR_LINENO]) *lineno = nla_get_u32(tb[IPSET_ATTR_LINENO]); if (unlikely(!tb[IPSET_ATTR_IP] || !ip_set_optattr_netorder(tb, IPSET_ATTR_CADT_FLAGS))) return -IPSET_ERR_PROTOCOL; if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; ret = ip_set_get_ipaddr6(tb[IPSET_ATTR_IP], &e.ip); if (ret) return ret; ret = ip_set_get_extensions(set, tb, &ext); if (ret) return ret; if (tb[IPSET_ATTR_CIDR]) { e.cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (!e.cidr || e.cidr > HOST_MASK) return -IPSET_ERR_INVALID_CIDR; } ip6_netmask(&e.ip, e.cidr); if (tb[IPSET_ATTR_CADT_FLAGS]) { u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]); if (cadt_flags & IPSET_FLAG_NOMATCH) flags |= (IPSET_FLAG_NOMATCH << 16); } ret = adtfn(set, &e, &ext, &ext, flags); return ip_set_enomatch(ret, flags, adt, set) ? -ret : ip_set_eexist(ret, flags) ? 0 : ret; } static struct ip_set_type hash_net_type __read_mostly = { .name = "hash:net", .protocol = IPSET_PROTOCOL, .features = IPSET_TYPE_IP | IPSET_TYPE_NOMATCH, .dimension = IPSET_DIM_ONE, .family = NFPROTO_UNSPEC, .revision_min = IPSET_TYPE_REV_MIN, .revision_max = IPSET_TYPE_REV_MAX, .create_flags[IPSET_TYPE_REV_MAX] = IPSET_CREATE_FLAG_BUCKETSIZE, .create = hash_net_create, .create_policy = { [IPSET_ATTR_HASHSIZE] = { .type = NLA_U32 }, [IPSET_ATTR_MAXELEM] = { .type = NLA_U32 }, [IPSET_ATTR_INITVAL] = { .type = NLA_U32 }, [IPSET_ATTR_BUCKETSIZE] = { .type = NLA_U8 }, [IPSET_ATTR_RESIZE] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, }, .adt_policy = { [IPSET_ATTR_IP] = { .type = NLA_NESTED }, [IPSET_ATTR_IP_TO] = { .type = NLA_NESTED }, [IPSET_ATTR_CIDR] = { .type = NLA_U8 }, [IPSET_ATTR_TIMEOUT] = { .type = NLA_U32 }, [IPSET_ATTR_LINENO] = { .type = NLA_U32 }, [IPSET_ATTR_CADT_FLAGS] = { .type = NLA_U32 }, [IPSET_ATTR_BYTES] = { .type = NLA_U64 }, [IPSET_ATTR_PACKETS] = { .type = NLA_U64 }, [IPSET_ATTR_COMMENT] = { .type = NLA_NUL_STRING, .len = IPSET_MAX_COMMENT_SIZE }, [IPSET_ATTR_SKBMARK] = { .type = NLA_U64 }, [IPSET_ATTR_SKBPRIO] = { .type = NLA_U32 }, [IPSET_ATTR_SKBQUEUE] = { .type = NLA_U16 }, }, .me = THIS_MODULE, }; static int __init hash_net_init(void) { return ip_set_type_register(&hash_net_type); } static void __exit hash_net_fini(void) { rcu_barrier(); ip_set_type_unregister(&hash_net_type); } module_init(hash_net_init); module_exit(hash_net_fini); |
304 2 3 204 24 223 2 5 1 1 1 1364 1156 281 7 1578 1080 87 527 72 504 53 17 528 504 53 53 53 53 53 53 47 17 47 46 45 45 44 44 44 44 50 524 10 525 751 251 428 92 427 93 86 87 72 72 1486 8 8 24 24 15 15 15 15 7 7 376 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 | // SPDX-License-Identifier: GPL-2.0-only /* * Integrity Measurement Architecture * * Copyright (C) 2005,2006,2007,2008 IBM Corporation * * Authors: * Reiner Sailer <sailer@watson.ibm.com> * Serge Hallyn <serue@us.ibm.com> * Kylene Hall <kylene@us.ibm.com> * Mimi Zohar <zohar@us.ibm.com> * * File: ima_main.c * implements the IMA hooks: ima_bprm_check, ima_file_mmap, * and ima_file_check. */ #include <linux/module.h> #include <linux/file.h> #include <linux/binfmts.h> #include <linux/kernel_read_file.h> #include <linux/mount.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/xattr.h> #include <linux/ima.h> #include <linux/fs.h> #include <linux/iversion.h> #include <linux/evm.h> #include "ima.h" #ifdef CONFIG_IMA_APPRAISE int ima_appraise = IMA_APPRAISE_ENFORCE; #else int ima_appraise; #endif int __ro_after_init ima_hash_algo = HASH_ALGO_SHA1; static int hash_setup_done; static struct notifier_block ima_lsm_policy_notifier = { .notifier_call = ima_lsm_policy_change, }; static int __init hash_setup(char *str) { struct ima_template_desc *template_desc = ima_template_desc_current(); int i; if (hash_setup_done) return 1; if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) { if (strncmp(str, "sha1", 4) == 0) { ima_hash_algo = HASH_ALGO_SHA1; } else if (strncmp(str, "md5", 3) == 0) { ima_hash_algo = HASH_ALGO_MD5; } else { pr_err("invalid hash algorithm \"%s\" for template \"%s\"", str, IMA_TEMPLATE_IMA_NAME); return 1; } goto out; } i = match_string(hash_algo_name, HASH_ALGO__LAST, str); if (i < 0) { pr_err("invalid hash algorithm \"%s\"", str); return 1; } ima_hash_algo = i; out: hash_setup_done = 1; return 1; } __setup("ima_hash=", hash_setup); enum hash_algo ima_get_current_hash_algo(void) { return ima_hash_algo; } /* Prevent mmap'ing a file execute that is already mmap'ed write */ static int mmap_violation_check(enum ima_hooks func, struct file *file, char **pathbuf, const char **pathname, char *filename) { struct inode *inode; int rc = 0; if ((func == MMAP_CHECK || func == MMAP_CHECK_REQPROT) && mapping_writably_mapped(file->f_mapping)) { rc = -ETXTBSY; inode = file_inode(file); if (!*pathbuf) /* ima_rdwr_violation possibly pre-fetched */ *pathname = ima_d_path(&file->f_path, pathbuf, filename); integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, *pathname, "mmap_file", "mmapped_writers", rc, 0); } return rc; } /* * ima_rdwr_violation_check * * Only invalidate the PCR for measured files: * - Opening a file for write when already open for read, * results in a time of measure, time of use (ToMToU) error. * - Opening a file for read when already open for write, * could result in a file measurement error. * */ static void ima_rdwr_violation_check(struct file *file, struct ima_iint_cache *iint, int must_measure, char **pathbuf, const char **pathname, char *filename) { struct inode *inode = file_inode(file); fmode_t mode = file->f_mode; bool send_tomtou = false, send_writers = false; if (mode & FMODE_WRITE) { if (atomic_read(&inode->i_readcount) && IS_IMA(inode)) { if (!iint) iint = ima_iint_find(inode); /* IMA_MEASURE is set from reader side */ if (iint && test_bit(IMA_MUST_MEASURE, &iint->atomic_flags)) send_tomtou = true; } } else { if (must_measure) set_bit(IMA_MUST_MEASURE, &iint->atomic_flags); if (inode_is_open_for_write(inode) && must_measure) send_writers = true; } if (!send_tomtou && !send_writers) return; *pathname = ima_d_path(&file->f_path, pathbuf, filename); if (send_tomtou) ima_add_violation(file, *pathname, iint, "invalid_pcr", "ToMToU"); if (send_writers) ima_add_violation(file, *pathname, iint, "invalid_pcr", "open_writers"); } static void ima_check_last_writer(struct ima_iint_cache *iint, struct inode *inode, struct file *file) { fmode_t mode = file->f_mode; bool update; if (!(mode & FMODE_WRITE)) return; mutex_lock(&iint->mutex); if (atomic_read(&inode->i_writecount) == 1) { struct kstat stat; update = test_and_clear_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); if ((iint->flags & IMA_NEW_FILE) || vfs_getattr_nosec(&file->f_path, &stat, STATX_CHANGE_COOKIE, AT_STATX_SYNC_AS_STAT) || !(stat.result_mask & STATX_CHANGE_COOKIE) || stat.change_cookie != iint->real_inode.version) { iint->flags &= ~(IMA_DONE_MASK | IMA_NEW_FILE); iint->measured_pcrs = 0; if (update) ima_update_xattr(iint, file); } } mutex_unlock(&iint->mutex); } /** * ima_file_free - called on __fput() * @file: pointer to file structure being freed * * Flag files that changed, based on i_version */ static void ima_file_free(struct file *file) { struct inode *inode = file_inode(file); struct ima_iint_cache *iint; if (!ima_policy_flag || !S_ISREG(inode->i_mode)) return; iint = ima_iint_find(inode); if (!iint) return; ima_check_last_writer(iint, inode, file); } static int process_measurement(struct file *file, const struct cred *cred, struct lsm_prop *prop, char *buf, loff_t size, int mask, enum ima_hooks func) { struct inode *real_inode, *inode = file_inode(file); struct ima_iint_cache *iint = NULL; struct ima_template_desc *template_desc = NULL; struct inode *metadata_inode; char *pathbuf = NULL; char filename[NAME_MAX]; const char *pathname = NULL; int rc = 0, action, must_appraise = 0; int pcr = CONFIG_IMA_MEASURE_PCR_IDX; struct evm_ima_xattr_data *xattr_value = NULL; struct modsig *modsig = NULL; int xattr_len = 0; bool violation_check; enum hash_algo hash_algo; unsigned int allowed_algos = 0; if (!ima_policy_flag || !S_ISREG(inode->i_mode)) return 0; /* Return an IMA_MEASURE, IMA_APPRAISE, IMA_AUDIT action * bitmask based on the appraise/audit/measurement policy. * Included is the appraise submask. */ action = ima_get_action(file_mnt_idmap(file), inode, cred, prop, mask, func, &pcr, &template_desc, NULL, &allowed_algos); violation_check = ((func == FILE_CHECK || func == MMAP_CHECK || func == MMAP_CHECK_REQPROT) && (ima_policy_flag & IMA_MEASURE)); if (!action && !violation_check) return 0; must_appraise = action & IMA_APPRAISE; /* Is the appraise rule hook specific? */ if (action & IMA_FILE_APPRAISE) func = FILE_CHECK; inode_lock(inode); if (action) { iint = ima_inode_get(inode); if (!iint) rc = -ENOMEM; } if (!rc && violation_check) ima_rdwr_violation_check(file, iint, action & IMA_MEASURE, &pathbuf, &pathname, filename); inode_unlock(inode); if (rc) goto out; if (!action) goto out; mutex_lock(&iint->mutex); if (test_and_clear_bit(IMA_CHANGE_ATTR, &iint->atomic_flags)) /* reset appraisal flags if ima_inode_post_setattr was called */ iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED | IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK | IMA_NONACTION_FLAGS); /* * Re-evaulate the file if either the xattr has changed or the * kernel has no way of detecting file change on the filesystem. * (Limited to privileged mounted filesystems.) */ if (test_and_clear_bit(IMA_CHANGE_XATTR, &iint->atomic_flags) || ((inode->i_sb->s_iflags & SB_I_IMA_UNVERIFIABLE_SIGNATURE) && !(inode->i_sb->s_iflags & SB_I_UNTRUSTED_MOUNTER) && !(action & IMA_FAIL_UNVERIFIABLE_SIGS))) { iint->flags &= ~IMA_DONE_MASK; iint->measured_pcrs = 0; } /* * On stacked filesystems, detect and re-evaluate file data and * metadata changes. */ real_inode = d_real_inode(file_dentry(file)); if (real_inode != inode && (action & IMA_DO_MASK) && (iint->flags & IMA_DONE_MASK)) { if (!IS_I_VERSION(real_inode) || integrity_inode_attrs_changed(&iint->real_inode, real_inode)) { iint->flags &= ~IMA_DONE_MASK; iint->measured_pcrs = 0; } /* * Reset the EVM status when metadata changed. */ metadata_inode = d_inode(d_real(file_dentry(file), D_REAL_METADATA)); if (evm_metadata_changed(inode, metadata_inode)) iint->flags &= ~(IMA_APPRAISED | IMA_APPRAISED_SUBMASK); } /* Determine if already appraised/measured based on bitmask * (IMA_MEASURE, IMA_MEASURED, IMA_XXXX_APPRAISE, IMA_XXXX_APPRAISED, * IMA_AUDIT, IMA_AUDITED) */ iint->flags |= action; action &= IMA_DO_MASK; action &= ~((iint->flags & (IMA_DONE_MASK ^ IMA_MEASURED)) >> 1); /* If target pcr is already measured, unset IMA_MEASURE action */ if ((action & IMA_MEASURE) && (iint->measured_pcrs & (0x1 << pcr))) action ^= IMA_MEASURE; /* HASH sets the digital signature and update flags, nothing else */ if ((action & IMA_HASH) && !(test_bit(IMA_DIGSIG, &iint->atomic_flags))) { xattr_len = ima_read_xattr(file_dentry(file), &xattr_value, xattr_len); if ((xattr_value && xattr_len > 2) && (xattr_value->type == EVM_IMA_XATTR_DIGSIG)) set_bit(IMA_DIGSIG, &iint->atomic_flags); iint->flags |= IMA_HASHED; action ^= IMA_HASH; set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); } /* Nothing to do, just return existing appraised status */ if (!action) { if (must_appraise) { rc = mmap_violation_check(func, file, &pathbuf, &pathname, filename); if (!rc) rc = ima_get_cache_status(iint, func); } goto out_locked; } if ((action & IMA_APPRAISE_SUBMASK) || strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0) { /* read 'security.ima' */ xattr_len = ima_read_xattr(file_dentry(file), &xattr_value, xattr_len); /* * Read the appended modsig if allowed by the policy, and allow * an additional measurement list entry, if needed, based on the * template format and whether the file was already measured. */ if (iint->flags & IMA_MODSIG_ALLOWED) { rc = ima_read_modsig(func, buf, size, &modsig); if (!rc && ima_template_has_modsig(template_desc) && iint->flags & IMA_MEASURED) action |= IMA_MEASURE; } } hash_algo = ima_get_hash_algo(xattr_value, xattr_len); rc = ima_collect_measurement(iint, file, buf, size, hash_algo, modsig); if (rc != 0 && rc != -EBADF && rc != -EINVAL) goto out_locked; if (!pathbuf) /* ima_rdwr_violation possibly pre-fetched */ pathname = ima_d_path(&file->f_path, &pathbuf, filename); if (action & IMA_MEASURE) ima_store_measurement(iint, file, pathname, xattr_value, xattr_len, modsig, pcr, template_desc); if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) { rc = ima_check_blacklist(iint, modsig, pcr); if (rc != -EPERM) { inode_lock(inode); rc = ima_appraise_measurement(func, iint, file, pathname, xattr_value, xattr_len, modsig); inode_unlock(inode); } if (!rc) rc = mmap_violation_check(func, file, &pathbuf, &pathname, filename); } if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); if ((file->f_flags & O_DIRECT) && (iint->flags & IMA_PERMIT_DIRECTIO)) rc = 0; /* Ensure the digest was generated using an allowed algorithm */ if (rc == 0 && must_appraise && allowed_algos != 0 && (allowed_algos & (1U << hash_algo)) == 0) { rc = -EACCES; integrity_audit_msg(AUDIT_INTEGRITY_DATA, file_inode(file), pathname, "collect_data", "denied-hash-algorithm", rc, 0); } out_locked: if ((mask & MAY_WRITE) && test_bit(IMA_DIGSIG, &iint->atomic_flags) && !(iint->flags & IMA_NEW_FILE)) rc = -EACCES; mutex_unlock(&iint->mutex); kfree(xattr_value); ima_free_modsig(modsig); out: if (pathbuf) __putname(pathbuf); if (must_appraise) { if (rc && (ima_appraise & IMA_APPRAISE_ENFORCE)) return -EACCES; if (file->f_mode & FMODE_WRITE) set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); } return 0; } /** * ima_file_mmap - based on policy, collect/store measurement. * @file: pointer to the file to be measured (May be NULL) * @reqprot: protection requested by the application * @prot: protection that will be applied by the kernel * @flags: operational flags * * Measure files being mmapped executable based on the ima_must_measure() * policy decision. * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) { struct lsm_prop prop; int ret; if (!file) return 0; security_current_getlsmprop_subj(&prop); if (reqprot & PROT_EXEC) { ret = process_measurement(file, current_cred(), &prop, NULL, 0, MAY_EXEC, MMAP_CHECK_REQPROT); if (ret) return ret; } if (prot & PROT_EXEC) return process_measurement(file, current_cred(), &prop, NULL, 0, MAY_EXEC, MMAP_CHECK); return 0; } /** * ima_file_mprotect - based on policy, limit mprotect change * @vma: vm_area_struct protection is set to * @reqprot: protection requested by the application * @prot: protection that will be applied by the kernel * * Files can be mmap'ed read/write and later changed to execute to circumvent * IMA's mmap appraisal policy rules. Due to locking issues (mmap semaphore * would be taken before i_mutex), files can not be measured or appraised at * this point. Eliminate this integrity gap by denying the mprotect * PROT_EXECUTE change, if an mmap appraise policy rule exists. * * On mprotect change success, return 0. On failure, return -EACESS. */ static int ima_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { struct ima_template_desc *template = NULL; struct file *file; char filename[NAME_MAX]; char *pathbuf = NULL; const char *pathname = NULL; struct inode *inode; struct lsm_prop prop; int result = 0; int action; int pcr; /* Is mprotect making an mmap'ed file executable? */ if (!(ima_policy_flag & IMA_APPRAISE) || !vma->vm_file || !(prot & PROT_EXEC) || (vma->vm_flags & VM_EXEC)) return 0; security_current_getlsmprop_subj(&prop); inode = file_inode(vma->vm_file); action = ima_get_action(file_mnt_idmap(vma->vm_file), inode, current_cred(), &prop, MAY_EXEC, MMAP_CHECK, &pcr, &template, NULL, NULL); action |= ima_get_action(file_mnt_idmap(vma->vm_file), inode, current_cred(), &prop, MAY_EXEC, MMAP_CHECK_REQPROT, &pcr, &template, NULL, NULL); /* Is the mmap'ed file in policy? */ if (!(action & (IMA_MEASURE | IMA_APPRAISE_SUBMASK))) return 0; if (action & IMA_APPRAISE_SUBMASK) result = -EPERM; file = vma->vm_file; pathname = ima_d_path(&file->f_path, &pathbuf, filename); integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, pathname, "collect_data", "failed-mprotect", result, 0); if (pathbuf) __putname(pathbuf); return result; } /** * ima_bprm_check - based on policy, collect/store measurement. * @bprm: contains the linux_binprm structure * * The OS protects against an executable file, already open for write, * from being executed in deny_write_access() and an executable file, * already open for execute, from being modified in get_write_access(). * So we can be certain that what we verify and measure here is actually * what is being executed. * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_bprm_check(struct linux_binprm *bprm) { int ret; struct lsm_prop prop; security_current_getlsmprop_subj(&prop); ret = process_measurement(bprm->file, current_cred(), &prop, NULL, 0, MAY_EXEC, BPRM_CHECK); if (ret) return ret; security_cred_getlsmprop(bprm->cred, &prop); return process_measurement(bprm->file, bprm->cred, &prop, NULL, 0, MAY_EXEC, CREDS_CHECK); } /** * ima_file_check - based on policy, collect/store measurement. * @file: pointer to the file to be measured * @mask: contains MAY_READ, MAY_WRITE, MAY_EXEC or MAY_APPEND * * Measure files based on the ima_must_measure() policy decision. * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_file_check(struct file *file, int mask) { struct lsm_prop prop; security_current_getlsmprop_subj(&prop); return process_measurement(file, current_cred(), &prop, NULL, 0, mask & (MAY_READ | MAY_WRITE | MAY_EXEC | MAY_APPEND), FILE_CHECK); } static int __ima_inode_hash(struct inode *inode, struct file *file, char *buf, size_t buf_size) { struct ima_iint_cache *iint = NULL, tmp_iint; int rc, hash_algo; if (ima_policy_flag) { iint = ima_iint_find(inode); if (iint) mutex_lock(&iint->mutex); } if ((!iint || !(iint->flags & IMA_COLLECTED)) && file) { if (iint) mutex_unlock(&iint->mutex); memset(&tmp_iint, 0, sizeof(tmp_iint)); mutex_init(&tmp_iint.mutex); rc = ima_collect_measurement(&tmp_iint, file, NULL, 0, ima_hash_algo, NULL); if (rc < 0) { /* ima_hash could be allocated in case of failure. */ if (rc != -ENOMEM) kfree(tmp_iint.ima_hash); return -EOPNOTSUPP; } iint = &tmp_iint; mutex_lock(&iint->mutex); } if (!iint) return -EOPNOTSUPP; /* * ima_file_hash can be called when ima_collect_measurement has still * not been called, we might not always have a hash. */ if (!iint->ima_hash || !(iint->flags & IMA_COLLECTED)) { mutex_unlock(&iint->mutex); return -EOPNOTSUPP; } if (buf) { size_t copied_size; copied_size = min_t(size_t, iint->ima_hash->length, buf_size); memcpy(buf, iint->ima_hash->digest, copied_size); } hash_algo = iint->ima_hash->algo; mutex_unlock(&iint->mutex); if (iint == &tmp_iint) kfree(iint->ima_hash); return hash_algo; } /** * ima_file_hash - return a measurement of the file * @file: pointer to the file * @buf: buffer in which to store the hash * @buf_size: length of the buffer * * On success, return the hash algorithm (as defined in the enum hash_algo). * If buf is not NULL, this function also outputs the hash into buf. * If the hash is larger than buf_size, then only buf_size bytes will be copied. * It generally just makes sense to pass a buffer capable of holding the largest * possible hash: IMA_MAX_DIGEST_SIZE. * The file hash returned is based on the entire file, including the appended * signature. * * If the measurement cannot be performed, return -EOPNOTSUPP. * If the parameters are incorrect, return -EINVAL. */ int ima_file_hash(struct file *file, char *buf, size_t buf_size) { if (!file) return -EINVAL; return __ima_inode_hash(file_inode(file), file, buf, buf_size); } EXPORT_SYMBOL_GPL(ima_file_hash); /** * ima_inode_hash - return the stored measurement if the inode has been hashed * and is in the iint cache. * @inode: pointer to the inode * @buf: buffer in which to store the hash * @buf_size: length of the buffer * * On success, return the hash algorithm (as defined in the enum hash_algo). * If buf is not NULL, this function also outputs the hash into buf. * If the hash is larger than buf_size, then only buf_size bytes will be copied. * It generally just makes sense to pass a buffer capable of holding the largest * possible hash: IMA_MAX_DIGEST_SIZE. * The hash returned is based on the entire contents, including the appended * signature. * * If IMA is disabled or if no measurement is available, return -EOPNOTSUPP. * If the parameters are incorrect, return -EINVAL. */ int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size) { if (!inode) return -EINVAL; return __ima_inode_hash(inode, NULL, buf, buf_size); } EXPORT_SYMBOL_GPL(ima_inode_hash); /** * ima_post_create_tmpfile - mark newly created tmpfile as new * @idmap: idmap of the mount the inode was found from * @inode: inode of the newly created tmpfile * * No measuring, appraising or auditing of newly created tmpfiles is needed. * Skip calling process_measurement(), but indicate which newly, created * tmpfiles are in policy. */ static void ima_post_create_tmpfile(struct mnt_idmap *idmap, struct inode *inode) { struct ima_iint_cache *iint; int must_appraise; if (!ima_policy_flag || !S_ISREG(inode->i_mode)) return; must_appraise = ima_must_appraise(idmap, inode, MAY_ACCESS, FILE_CHECK); if (!must_appraise) return; /* Nothing to do if we can't allocate memory */ iint = ima_inode_get(inode); if (!iint) return; /* needed for writing the security xattrs */ set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); iint->ima_file_status = INTEGRITY_PASS; } /** * ima_post_path_mknod - mark as a new inode * @idmap: idmap of the mount the inode was found from * @dentry: newly created dentry * * Mark files created via the mknodat syscall as new, so that the * file data can be written later. */ static void ima_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry) { struct ima_iint_cache *iint; struct inode *inode = dentry->d_inode; int must_appraise; if (!ima_policy_flag || !S_ISREG(inode->i_mode)) return; must_appraise = ima_must_appraise(idmap, inode, MAY_ACCESS, FILE_CHECK); if (!must_appraise) return; /* Nothing to do if we can't allocate memory */ iint = ima_inode_get(inode); if (!iint) return; /* needed for re-opening empty files */ iint->flags |= IMA_NEW_FILE; } /** * ima_read_file - pre-measure/appraise hook decision based on policy * @file: pointer to the file to be measured/appraised/audit * @read_id: caller identifier * @contents: whether a subsequent call will be made to ima_post_read_file() * * Permit reading a file based on policy. The policy rules are written * in terms of the policy identifier. Appraising the integrity of * a file requires a file descriptor. * * For permission return 0, otherwise return -EACCES. */ static int ima_read_file(struct file *file, enum kernel_read_file_id read_id, bool contents) { enum ima_hooks func; struct lsm_prop prop; /* * Do devices using pre-allocated memory run the risk of the * firmware being accessible to the device prior to the completion * of IMA's signature verification any more than when using two * buffers? It may be desirable to include the buffer address * in this API and walk all the dma_map_single() mappings to check. */ /* * There will be a call made to ima_post_read_file() with * a filled buffer, so we don't need to perform an extra * read early here. */ if (contents) return 0; /* Read entire file for all partial reads. */ func = read_idmap[read_id] ?: FILE_CHECK; security_current_getlsmprop_subj(&prop); return process_measurement(file, current_cred(), &prop, NULL, 0, MAY_READ, func); } const int read_idmap[READING_MAX_ID] = { [READING_FIRMWARE] = FIRMWARE_CHECK, [READING_MODULE] = MODULE_CHECK, [READING_KEXEC_IMAGE] = KEXEC_KERNEL_CHECK, [READING_KEXEC_INITRAMFS] = KEXEC_INITRAMFS_CHECK, [READING_POLICY] = POLICY_CHECK }; /** * ima_post_read_file - in memory collect/appraise/audit measurement * @file: pointer to the file to be measured/appraised/audit * @buf: pointer to in memory file contents * @size: size of in memory file contents * @read_id: caller identifier * * Measure/appraise/audit in memory file based on policy. Policy rules * are written in terms of a policy identifier. * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_post_read_file(struct file *file, char *buf, loff_t size, enum kernel_read_file_id read_id) { enum ima_hooks func; struct lsm_prop prop; /* permit signed certs */ if (!file && read_id == READING_X509_CERTIFICATE) return 0; if (!file || !buf || size == 0) { /* should never happen */ if (ima_appraise & IMA_APPRAISE_ENFORCE) return -EACCES; return 0; } func = read_idmap[read_id] ?: FILE_CHECK; security_current_getlsmprop_subj(&prop); return process_measurement(file, current_cred(), &prop, buf, size, MAY_READ, func); } /** * ima_load_data - appraise decision based on policy * @id: kernel load data caller identifier * @contents: whether the full contents will be available in a later * call to ima_post_load_data(). * * Callers of this LSM hook can not measure, appraise, or audit the * data provided by userspace. Enforce policy rules requiring a file * signature (eg. kexec'ed kernel image). * * For permission return 0, otherwise return -EACCES. */ static int ima_load_data(enum kernel_load_data_id id, bool contents) { bool ima_enforce, sig_enforce; ima_enforce = (ima_appraise & IMA_APPRAISE_ENFORCE) == IMA_APPRAISE_ENFORCE; switch (id) { case LOADING_KEXEC_IMAGE: if (IS_ENABLED(CONFIG_KEXEC_SIG) && arch_ima_get_secureboot()) { pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n"); return -EACCES; } if (ima_enforce && (ima_appraise & IMA_APPRAISE_KEXEC)) { pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n"); return -EACCES; /* INTEGRITY_UNKNOWN */ } break; case LOADING_FIRMWARE: if (ima_enforce && (ima_appraise & IMA_APPRAISE_FIRMWARE) && !contents) { pr_err("Prevent firmware sysfs fallback loading.\n"); return -EACCES; /* INTEGRITY_UNKNOWN */ } break; case LOADING_MODULE: sig_enforce = is_module_sig_enforced(); if (ima_enforce && (!sig_enforce && (ima_appraise & IMA_APPRAISE_MODULES))) { pr_err("impossible to appraise a module without a file descriptor. sig_enforce kernel parameter might help\n"); return -EACCES; /* INTEGRITY_UNKNOWN */ } break; default: break; } return 0; } /** * ima_post_load_data - appraise decision based on policy * @buf: pointer to in memory file contents * @size: size of in memory file contents * @load_id: kernel load data caller identifier * @description: @load_id-specific description of contents * * Measure/appraise/audit in memory buffer based on policy. Policy rules * are written in terms of a policy identifier. * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_post_load_data(char *buf, loff_t size, enum kernel_load_data_id load_id, char *description) { if (load_id == LOADING_FIRMWARE) { if ((ima_appraise & IMA_APPRAISE_FIRMWARE) && (ima_appraise & IMA_APPRAISE_ENFORCE)) { pr_err("Prevent firmware loading_store.\n"); return -EACCES; /* INTEGRITY_UNKNOWN */ } return 0; } /* * Measure the init_module syscall buffer containing the ELF image. */ if (load_id == LOADING_MODULE) ima_measure_critical_data("modules", "init_module", buf, size, true, NULL, 0); return 0; } /** * process_buffer_measurement - Measure the buffer or the buffer data hash * @idmap: idmap of the mount the inode was found from * @inode: inode associated with the object being measured (NULL for KEY_CHECK) * @buf: pointer to the buffer that needs to be added to the log. * @size: size of buffer(in bytes). * @eventname: event name to be used for the buffer entry. * @func: IMA hook * @pcr: pcr to extend the measurement * @func_data: func specific data, may be NULL * @buf_hash: measure buffer data hash * @digest: buffer digest will be written to * @digest_len: buffer length * * Based on policy, either the buffer data or buffer data hash is measured * * Return: 0 if the buffer has been successfully measured, 1 if the digest * has been written to the passed location but not added to a measurement entry, * a negative value otherwise. */ int process_buffer_measurement(struct mnt_idmap *idmap, struct inode *inode, const void *buf, int size, const char *eventname, enum ima_hooks func, int pcr, const char *func_data, bool buf_hash, u8 *digest, size_t digest_len) { int ret = 0; const char *audit_cause = "ENOMEM"; struct ima_template_entry *entry = NULL; struct ima_iint_cache iint = {}; struct ima_event_data event_data = {.iint = &iint, .filename = eventname, .buf = buf, .buf_len = size}; struct ima_template_desc *template; struct ima_max_digest_data hash; struct ima_digest_data *hash_hdr = container_of(&hash.hdr, struct ima_digest_data, hdr); char digest_hash[IMA_MAX_DIGEST_SIZE]; int digest_hash_len = hash_digest_size[ima_hash_algo]; int violation = 0; int action = 0; struct lsm_prop prop; if (digest && digest_len < digest_hash_len) return -EINVAL; if (!ima_policy_flag && !digest) return -ENOENT; template = ima_template_desc_buf(); if (!template) { ret = -EINVAL; audit_cause = "ima_template_desc_buf"; goto out; } /* * Both LSM hooks and auxilary based buffer measurements are * based on policy. To avoid code duplication, differentiate * between the LSM hooks and auxilary buffer measurements, * retrieving the policy rule information only for the LSM hook * buffer measurements. */ if (func) { security_current_getlsmprop_subj(&prop); action = ima_get_action(idmap, inode, current_cred(), &prop, 0, func, &pcr, &template, func_data, NULL); if (!(action & IMA_MEASURE) && !digest) return -ENOENT; } if (!pcr) pcr = CONFIG_IMA_MEASURE_PCR_IDX; iint.ima_hash = hash_hdr; iint.ima_hash->algo = ima_hash_algo; iint.ima_hash->length = hash_digest_size[ima_hash_algo]; ret = ima_calc_buffer_hash(buf, size, iint.ima_hash); if (ret < 0) { audit_cause = "hashing_error"; goto out; } if (buf_hash) { memcpy(digest_hash, hash_hdr->digest, digest_hash_len); ret = ima_calc_buffer_hash(digest_hash, digest_hash_len, iint.ima_hash); if (ret < 0) { audit_cause = "hashing_error"; goto out; } event_data.buf = digest_hash; event_data.buf_len = digest_hash_len; } if (digest) memcpy(digest, iint.ima_hash->digest, digest_hash_len); if (!ima_policy_flag || (func && !(action & IMA_MEASURE))) return 1; ret = ima_alloc_init_template(&event_data, &entry, template); if (ret < 0) { audit_cause = "alloc_entry"; goto out; } ret = ima_store_template(entry, violation, NULL, event_data.buf, pcr); if (ret < 0) { audit_cause = "store_entry"; ima_free_template_entry(entry); } out: if (ret < 0) integrity_audit_message(AUDIT_INTEGRITY_PCR, NULL, eventname, func_measure_str(func), audit_cause, ret, 0, ret); return ret; } /** * ima_kexec_cmdline - measure kexec cmdline boot args * @kernel_fd: file descriptor of the kexec kernel being loaded * @buf: pointer to buffer * @size: size of buffer * * Buffers can only be measured, not appraised. */ void ima_kexec_cmdline(int kernel_fd, const void *buf, int size) { if (!buf || !size) return; CLASS(fd, f)(kernel_fd); if (fd_empty(f)) return; process_buffer_measurement(file_mnt_idmap(fd_file(f)), file_inode(fd_file(f)), buf, size, "kexec-cmdline", KEXEC_CMDLINE, 0, NULL, false, NULL, 0); } /** * ima_measure_critical_data - measure kernel integrity critical data * @event_label: unique event label for grouping and limiting critical data * @event_name: event name for the record in the IMA measurement list * @buf: pointer to buffer data * @buf_len: length of buffer data (in bytes) * @hash: measure buffer data hash * @digest: buffer digest will be written to * @digest_len: buffer length * * Measure data critical to the integrity of the kernel into the IMA log * and extend the pcr. Examples of critical data could be various data * structures, policies, and states stored in kernel memory that can * impact the integrity of the system. * * Return: 0 if the buffer has been successfully measured, 1 if the digest * has been written to the passed location but not added to a measurement entry, * a negative value otherwise. */ int ima_measure_critical_data(const char *event_label, const char *event_name, const void *buf, size_t buf_len, bool hash, u8 *digest, size_t digest_len) { if (!event_name || !event_label || !buf || !buf_len) return -ENOPARAM; return process_buffer_measurement(&nop_mnt_idmap, NULL, buf, buf_len, event_name, CRITICAL_DATA, 0, event_label, hash, digest, digest_len); } EXPORT_SYMBOL_GPL(ima_measure_critical_data); #ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS /** * ima_kernel_module_request - Prevent crypto-pkcs1(rsa,*) requests * @kmod_name: kernel module name * * Avoid a verification loop where verifying the signature of the modprobe * binary requires executing modprobe itself. Since the modprobe iint->mutex * is already held when the signature verification is performed, a deadlock * occurs as soon as modprobe is executed within the critical region, since * the same lock cannot be taken again. * * This happens when public_key_verify_signature(), in case of RSA algorithm, * use alg_name to store internal information in order to construct an * algorithm on the fly, but crypto_larval_lookup() will try to use alg_name * in order to load a kernel module with same name. * * Since we don't have any real "crypto-pkcs1(rsa,*)" kernel modules, * we are safe to fail such module request from crypto_larval_lookup(), and * avoid the verification loop. * * Return: Zero if it is safe to load the kernel module, -EINVAL otherwise. */ static int ima_kernel_module_request(char *kmod_name) { if (strncmp(kmod_name, "crypto-pkcs1(rsa,", 17) == 0) return -EINVAL; return 0; } #endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */ static int __init init_ima(void) { int error; ima_appraise_parse_cmdline(); ima_init_template_list(); hash_setup(CONFIG_IMA_DEFAULT_HASH); error = ima_init(); if (error && strcmp(hash_algo_name[ima_hash_algo], CONFIG_IMA_DEFAULT_HASH) != 0) { pr_info("Allocating %s failed, going to use default hash algorithm %s\n", hash_algo_name[ima_hash_algo], CONFIG_IMA_DEFAULT_HASH); hash_setup_done = 0; hash_setup(CONFIG_IMA_DEFAULT_HASH); error = ima_init(); } if (error) return error; error = register_blocking_lsm_notifier(&ima_lsm_policy_notifier); if (error) pr_warn("Couldn't register LSM notifier, error %d\n", error); if (!error) ima_update_policy_flags(); return error; } static struct security_hook_list ima_hooks[] __ro_after_init = { LSM_HOOK_INIT(bprm_check_security, ima_bprm_check), LSM_HOOK_INIT(file_post_open, ima_file_check), LSM_HOOK_INIT(inode_post_create_tmpfile, ima_post_create_tmpfile), LSM_HOOK_INIT(file_release, ima_file_free), LSM_HOOK_INIT(mmap_file, ima_file_mmap), LSM_HOOK_INIT(file_mprotect, ima_file_mprotect), LSM_HOOK_INIT(kernel_load_data, ima_load_data), LSM_HOOK_INIT(kernel_post_load_data, ima_post_load_data), LSM_HOOK_INIT(kernel_read_file, ima_read_file), LSM_HOOK_INIT(kernel_post_read_file, ima_post_read_file), LSM_HOOK_INIT(path_post_mknod, ima_post_path_mknod), #ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS LSM_HOOK_INIT(key_post_create_or_update, ima_post_key_create_or_update), #endif #ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS LSM_HOOK_INIT(kernel_module_request, ima_kernel_module_request), #endif LSM_HOOK_INIT(inode_free_security_rcu, ima_inode_free_rcu), }; static const struct lsm_id ima_lsmid = { .name = "ima", .id = LSM_ID_IMA, }; static int __init init_ima_lsm(void) { ima_iintcache_init(); security_add_hooks(ima_hooks, ARRAY_SIZE(ima_hooks), &ima_lsmid |