Total coverage: 231330 (12%)of 2013830
1 1 1 1 1 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 // SPDX-License-Identifier: GPL-2.0-only /* * This file contains functions assisting in mapping VFS to 9P2000 * * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/cred.h> #include <linux/parser.h> #include <linux/slab.h> #include <linux/seq_file.h> #include <net/9p/9p.h> #include <net/9p/client.h> #include <net/9p/transport.h> #include "v9fs.h" #include "v9fs_vfs.h" #include "cache.h" static DEFINE_SPINLOCK(v9fs_sessionlist_lock); static LIST_HEAD(v9fs_sessionlist); struct kmem_cache *v9fs_inode_cache; /* * Option Parsing (code inspired by NFS code) * NOTE: each transport will parse its own options */ enum { /* Options that take integer arguments */ Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid, /* String options */ Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag, /* Options that take no arguments */ Opt_nodevmap, Opt_noxattr, Opt_directio, Opt_ignoreqv, /* Access options */ Opt_access, Opt_posixacl, /* Lock timeout option */ Opt_locktimeout, /* Error token */ Opt_err }; static const match_table_t tokens = { {Opt_debug, "debug=%x"}, {Opt_dfltuid, "dfltuid=%u"}, {Opt_dfltgid, "dfltgid=%u"}, {Opt_afid, "afid=%u"}, {Opt_uname, "uname=%s"}, {Opt_remotename, "aname=%s"}, {Opt_nodevmap, "nodevmap"}, {Opt_noxattr, "noxattr"}, {Opt_directio, "directio"}, {Opt_ignoreqv, "ignoreqv"}, {Opt_cache, "cache=%s"}, {Opt_cachetag, "cachetag=%s"}, {Opt_access, "access=%s"}, {Opt_posixacl, "posixacl"}, {Opt_locktimeout, "locktimeout=%u"}, {Opt_err, NULL} }; /* Interpret mount options for cache mode */ static int get_cache_mode(char *s) { int version = -EINVAL; if (!strcmp(s, "loose")) { version = CACHE_SC_LOOSE; p9_debug(P9_DEBUG_9P, "Cache mode: loose\n"); } else if (!strcmp(s, "fscache")) { version = CACHE_SC_FSCACHE; p9_debug(P9_DEBUG_9P, "Cache mode: fscache\n"); } else if (!strcmp(s, "mmap")) { version = CACHE_SC_MMAP; p9_debug(P9_DEBUG_9P, "Cache mode: mmap\n"); } else if (!strcmp(s, "readahead")) { version = CACHE_SC_READAHEAD; p9_debug(P9_DEBUG_9P, "Cache mode: readahead\n"); } else if (!strcmp(s, "none")) { version = CACHE_SC_NONE; p9_debug(P9_DEBUG_9P, "Cache mode: none\n"); } else if (kstrtoint(s, 0, &version) != 0) { version = -EINVAL; pr_info("Unknown Cache mode or invalid value %s\n", s); } return version; } /* * Display the mount options in /proc/mounts. */ int v9fs_show_options(struct seq_file *m, struct dentry *root) { struct v9fs_session_info *v9ses = root->d_sb->s_fs_info; if (v9ses->debug) seq_printf(m, ",debug=%x", v9ses->debug); if (!uid_eq(v9ses->dfltuid, V9FS_DEFUID)) seq_printf(m, ",dfltuid=%u", from_kuid_munged(&init_user_ns, v9ses->dfltuid)); if (!gid_eq(v9ses->dfltgid, V9FS_DEFGID)) seq_printf(m, ",dfltgid=%u", from_kgid_munged(&init_user_ns, v9ses->dfltgid)); if (v9ses->afid != ~0) seq_printf(m, ",afid=%u", v9ses->afid); if (strcmp(v9ses->uname, V9FS_DEFUSER) != 0) seq_printf(m, ",uname=%s", v9ses->uname); if (strcmp(v9ses->aname, V9FS_DEFANAME) != 0) seq_printf(m, ",aname=%s", v9ses->aname); if (v9ses->nodev) seq_puts(m, ",nodevmap"); if (v9ses->cache) seq_printf(m, ",cache=%x", v9ses->cache); #ifdef CONFIG_9P_FSCACHE if (v9ses->cachetag && (v9ses->cache & CACHE_FSCACHE)) seq_printf(m, ",cachetag=%s", v9ses->cachetag); #endif switch (v9ses->flags & V9FS_ACCESS_MASK) { case V9FS_ACCESS_USER: seq_puts(m, ",access=user"); break; case V9FS_ACCESS_ANY: seq_puts(m, ",access=any"); break; case V9FS_ACCESS_CLIENT: seq_puts(m, ",access=client"); break; case V9FS_ACCESS_SINGLE: seq_printf(m, ",access=%u", from_kuid_munged(&init_user_ns, v9ses->uid)); break; } if (v9ses->flags & V9FS_IGNORE_QV) seq_puts(m, ",ignoreqv"); if (v9ses->flags & V9FS_DIRECT_IO) seq_puts(m, ",directio"); if (v9ses->flags & V9FS_POSIX_ACL) seq_puts(m, ",posixacl"); if (v9ses->flags & V9FS_NO_XATTR) seq_puts(m, ",noxattr"); return p9_show_client_options(m, v9ses->clnt); } /** * v9fs_parse_options - parse mount options into session structure * @v9ses: existing v9fs session information * @opts: The mount option string * * Return 0 upon success, -ERRNO upon failure. */ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts) { char *options, *tmp_options; substring_t args[MAX_OPT_ARGS]; char *p; int option = 0; char *s; int ret = 0; /* setup defaults */ v9ses->afid = ~0; v9ses->debug = 0; v9ses->cache = CACHE_NONE; #ifdef CONFIG_9P_FSCACHE v9ses->cachetag = NULL; #endif v9ses->session_lock_timeout = P9_LOCK_TIMEOUT; if (!opts) return 0; tmp_options = kstrdup(opts, GFP_KERNEL); if (!tmp_options) { ret = -ENOMEM; goto fail_option_alloc; } options = tmp_options; while ((p = strsep(&options, ",")) != NULL) { int token, r; if (!*p) continue; token = match_token(p, tokens, args); switch (token) { case Opt_debug: r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); ret = r; } else { v9ses->debug = option; #ifdef CONFIG_NET_9P_DEBUG p9_debug_level = option; #endif } break; case Opt_dfltuid: r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); ret = r; continue; } v9ses->dfltuid = make_kuid(current_user_ns(), option); if (!uid_valid(v9ses->dfltuid)) { p9_debug(P9_DEBUG_ERROR, "uid field, but not a uid?\n"); ret = -EINVAL; } break; case Opt_dfltgid: r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); ret = r; continue; } v9ses->dfltgid = make_kgid(current_user_ns(), option); if (!gid_valid(v9ses->dfltgid)) { p9_debug(P9_DEBUG_ERROR, "gid field, but not a gid?\n"); ret = -EINVAL; } break; case Opt_afid: r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); ret = r; } else { v9ses->afid = option; } break; case Opt_uname: kfree(v9ses->uname); v9ses->uname = match_strdup(&args[0]); if (!v9ses->uname) { ret = -ENOMEM; goto free_and_return; } break; case Opt_remotename: kfree(v9ses->aname); v9ses->aname = match_strdup(&args[0]); if (!v9ses->aname) { ret = -ENOMEM; goto free_and_return; } break; case Opt_nodevmap: v9ses->nodev = 1; break; case Opt_noxattr: v9ses->flags |= V9FS_NO_XATTR; break; case Opt_directio: v9ses->flags |= V9FS_DIRECT_IO; break; case Opt_ignoreqv: v9ses->flags |= V9FS_IGNORE_QV; break; case Opt_cachetag: #ifdef CONFIG_9P_FSCACHE kfree(v9ses->cachetag); v9ses->cachetag = match_strdup(&args[0]); if (!v9ses->cachetag) { ret = -ENOMEM; goto free_and_return; } #endif break; case Opt_cache: s = match_strdup(&args[0]); if (!s) { ret = -ENOMEM; p9_debug(P9_DEBUG_ERROR, "problem allocating copy of cache arg\n"); goto free_and_return; } r = get_cache_mode(s); if (r < 0) ret = r; else v9ses->cache = r; kfree(s); break; case Opt_access: s = match_strdup(&args[0]); if (!s) { ret = -ENOMEM; p9_debug(P9_DEBUG_ERROR, "problem allocating copy of access arg\n"); goto free_and_return; } v9ses->flags &= ~V9FS_ACCESS_MASK; if (strcmp(s, "user") == 0) v9ses->flags |= V9FS_ACCESS_USER; else if (strcmp(s, "any") == 0) v9ses->flags |= V9FS_ACCESS_ANY; else if (strcmp(s, "client") == 0) { v9ses->flags |= V9FS_ACCESS_CLIENT; } else { uid_t uid; v9ses->flags |= V9FS_ACCESS_SINGLE; r = kstrtouint(s, 10, &uid); if (r) { ret = r; pr_info("Unknown access argument %s: %d\n", s, r); kfree(s); continue; } v9ses->uid = make_kuid(current_user_ns(), uid); if (!uid_valid(v9ses->uid)) { ret = -EINVAL; pr_info("Unknown uid %s\n", s); } } kfree(s); break; case Opt_posixacl: #ifdef CONFIG_9P_FS_POSIX_ACL v9ses->flags |= V9FS_POSIX_ACL; #else p9_debug(P9_DEBUG_ERROR, "Not defined CONFIG_9P_FS_POSIX_ACL. Ignoring posixacl option\n"); #endif break; case Opt_locktimeout: r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, "integer field, but no integer?\n"); ret = r; continue; } if (option < 1) { p9_debug(P9_DEBUG_ERROR, "locktimeout must be a greater than zero integer.\n"); ret = -EINVAL; continue; } v9ses->session_lock_timeout = (long)option * HZ; break; default: continue; } } free_and_return: kfree(tmp_options); fail_option_alloc: return ret; } /** * v9fs_session_init - initialize session * @v9ses: session information structure * @dev_name: device being mounted * @data: options * */ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, const char *dev_name, char *data) { struct p9_fid *fid; int rc = -ENOMEM; v9ses->uname = kstrdup(V9FS_DEFUSER, GFP_KERNEL); if (!v9ses->uname) goto err_names; v9ses->aname = kstrdup(V9FS_DEFANAME, GFP_KERNEL); if (!v9ses->aname) goto err_names; init_rwsem(&v9ses->rename_sem); v9ses->uid = INVALID_UID; v9ses->dfltuid = V9FS_DEFUID; v9ses->dfltgid = V9FS_DEFGID; v9ses->clnt = p9_client_create(dev_name, data); if (IS_ERR(v9ses->clnt)) { rc = PTR_ERR(v9ses->clnt); p9_debug(P9_DEBUG_ERROR, "problem initializing 9p client\n"); goto err_names; } v9ses->flags = V9FS_ACCESS_USER; if (p9_is_proto_dotl(v9ses->clnt)) { v9ses->flags = V9FS_ACCESS_CLIENT; v9ses->flags |= V9FS_PROTO_2000L; } else if (p9_is_proto_dotu(v9ses->clnt)) { v9ses->flags |= V9FS_PROTO_2000U; } rc = v9fs_parse_options(v9ses, data); if (rc < 0) goto err_clnt; v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; if (!v9fs_proto_dotl(v9ses) && ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) { /* * We support ACCESS_CLIENT only for dotl. * Fall back to ACCESS_USER */ v9ses->flags &= ~V9FS_ACCESS_MASK; v9ses->flags |= V9FS_ACCESS_USER; } /* FIXME: for legacy mode, fall back to V9FS_ACCESS_ANY */ if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) && ((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) { v9ses->flags &= ~V9FS_ACCESS_MASK; v9ses->flags |= V9FS_ACCESS_ANY; v9ses->uid = INVALID_UID; } if (!v9fs_proto_dotl(v9ses) || !((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) { /* * We support ACL checks on client only if the protocol is * 9P2000.L and access is V9FS_ACCESS_CLIENT. */ v9ses->flags &= ~V9FS_ACL_MASK; } fid = p9_client_attach(v9ses->clnt, NULL, v9ses->uname, INVALID_UID, v9ses->aname); if (IS_ERR(fid)) { rc = PTR_ERR(fid); p9_debug(P9_DEBUG_ERROR, "cannot attach\n"); goto err_clnt; } if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_SINGLE) fid->uid = v9ses->uid; else fid->uid = INVALID_UID; #ifdef CONFIG_9P_FSCACHE /* register the session for caching */ if (v9ses->cache & CACHE_FSCACHE) { rc = v9fs_cache_session_get_cookie(v9ses, dev_name); if (rc < 0) goto err_clnt; } #endif spin_lock(&v9fs_sessionlist_lock); list_add(&v9ses->slist, &v9fs_sessionlist); spin_unlock(&v9fs_sessionlist_lock); return fid; err_clnt: #ifdef CONFIG_9P_FSCACHE kfree(v9ses->cachetag); #endif p9_client_destroy(v9ses->clnt); err_names: kfree(v9ses->uname); kfree(v9ses->aname); return ERR_PTR(rc); } /** * v9fs_session_close - shutdown a session * @v9ses: session information structure * */ void v9fs_session_close(struct v9fs_session_info *v9ses) { if (v9ses->clnt) { p9_client_destroy(v9ses->clnt); v9ses->clnt = NULL; } #ifdef CONFIG_9P_FSCACHE fscache_relinquish_volume(v9fs_session_cache(v9ses), NULL, false); kfree(v9ses->cachetag); #endif kfree(v9ses->uname); kfree(v9ses->aname); spin_lock(&v9fs_sessionlist_lock); list_del(&v9ses->slist); spin_unlock(&v9fs_sessionlist_lock); } /** * v9fs_session_cancel - terminate a session * @v9ses: session to terminate * * mark transport as disconnected and cancel all pending requests. */ void v9fs_session_cancel(struct v9fs_session_info *v9ses) { p9_debug(P9_DEBUG_ERROR, "cancel session %p\n", v9ses); p9_client_disconnect(v9ses->clnt); } /** * v9fs_session_begin_cancel - Begin terminate of a session * @v9ses: session to terminate * * After this call we don't allow any request other than clunk. */ void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses) { p9_debug(P9_DEBUG_ERROR, "begin cancel session %p\n", v9ses); p9_client_begin_disconnect(v9ses->clnt); } static struct kobject *v9fs_kobj; #ifdef CONFIG_9P_FSCACHE /* * List caches associated with a session */ static ssize_t caches_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { ssize_t n = 0, count = 0, limit = PAGE_SIZE; struct v9fs_session_info *v9ses; spin_lock(&v9fs_sessionlist_lock); list_for_each_entry(v9ses, &v9fs_sessionlist, slist) { if (v9ses->cachetag) { n = snprintf(buf + count, limit, "%s\n", v9ses->cachetag); if (n < 0) { count = n; break; } count += n; limit -= n; } } spin_unlock(&v9fs_sessionlist_lock); return count; } static struct kobj_attribute v9fs_attr_cache = __ATTR_RO(caches); #endif /* CONFIG_9P_FSCACHE */ static struct attribute *v9fs_attrs[] = { #ifdef CONFIG_9P_FSCACHE &v9fs_attr_cache.attr, #endif NULL, }; static const struct attribute_group v9fs_attr_group = { .attrs = v9fs_attrs, }; /** * v9fs_sysfs_init - Initialize the v9fs sysfs interface * */ static int __init v9fs_sysfs_init(void) { int ret; v9fs_kobj = kobject_create_and_add("9p", fs_kobj); if (!v9fs_kobj) return -ENOMEM; ret = sysfs_create_group(v9fs_kobj, &v9fs_attr_group); if (ret) { kobject_put(v9fs_kobj); return ret; } return 0; } /** * v9fs_sysfs_cleanup - Unregister the v9fs sysfs interface * */ static void v9fs_sysfs_cleanup(void) { sysfs_remove_group(v9fs_kobj, &v9fs_attr_group); kobject_put(v9fs_kobj); } static void v9fs_inode_init_once(void *foo) { struct v9fs_inode *v9inode = (struct v9fs_inode *)foo; memset(&v9inode->qid, 0, sizeof(v9inode->qid)); inode_init_once(&v9inode->netfs.inode); } /** * v9fs_init_inode_cache - initialize a cache for 9P * Returns 0 on success. */ static int v9fs_init_inode_cache(void) { v9fs_inode_cache = kmem_cache_create("v9fs_inode_cache", sizeof(struct v9fs_inode), 0, (SLAB_RECLAIM_ACCOUNT| SLAB_ACCOUNT), v9fs_inode_init_once); if (!v9fs_inode_cache) return -ENOMEM; return 0; } /** * v9fs_destroy_inode_cache - destroy the cache of 9P inode * */ static void v9fs_destroy_inode_cache(void) { /* * Make sure all delayed rcu free inodes are flushed before we * destroy cache. */ rcu_barrier(); kmem_cache_destroy(v9fs_inode_cache); } /** * init_v9fs - Initialize module * */ static int __init init_v9fs(void) { int err; pr_info("Installing v9fs 9p2000 file system support\n"); /* TODO: Setup list of registered transport modules */ err = v9fs_init_inode_cache(); if (err < 0) { pr_err("Failed to register v9fs for caching\n"); return err; } err = v9fs_sysfs_init(); if (err < 0) { pr_err("Failed to register with sysfs\n"); goto out_cache; } err = register_filesystem(&v9fs_fs_type); if (err < 0) { pr_err("Failed to register filesystem\n"); goto out_sysfs_cleanup; } return 0; out_sysfs_cleanup: v9fs_sysfs_cleanup(); out_cache: v9fs_destroy_inode_cache(); return err; } /** * exit_v9fs - shutdown module * */ static void __exit exit_v9fs(void) { v9fs_sysfs_cleanup(); v9fs_destroy_inode_cache(); unregister_filesystem(&v9fs_fs_type); } module_init(init_v9fs) module_exit(exit_v9fs) MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>"); MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>"); MODULE_AUTHOR("Ron Minnich <rminnich@lanl.gov>"); MODULE_DESCRIPTION("9P Client File System"); MODULE_LICENSE("GPL");
15 15 15 15 15 14 15 15 14 15 14 14 15 15 15 15 15 1 14 14 15 15 15 15 15 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 // SPDX-License-Identifier: GPL-2.0-or-later /* PKCS#7 parser * * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) */ #define pr_fmt(fmt) "PKCS7: "fmt #include <linux/kernel.h> #include <linux/module.h> #include <linux/export.h> #include <linux/slab.h> #include <linux/err.h> #include <linux/oid_registry.h> #include <crypto/public_key.h> #include "pkcs7_parser.h" #include "pkcs7.asn1.h" MODULE_DESCRIPTION("PKCS#7 parser"); MODULE_AUTHOR("Red Hat, Inc."); MODULE_LICENSE("GPL"); struct pkcs7_parse_context { struct pkcs7_message *msg; /* Message being constructed */ struct pkcs7_signed_info *sinfo; /* SignedInfo being constructed */ struct pkcs7_signed_info **ppsinfo; struct x509_certificate *certs; /* Certificate cache */ struct x509_certificate **ppcerts; unsigned long data; /* Start of data */ enum OID last_oid; /* Last OID encountered */ unsigned x509_index; unsigned sinfo_index; const void *raw_serial; unsigned raw_serial_size; unsigned raw_issuer_size; const void *raw_issuer; const void *raw_skid; unsigned raw_skid_size; bool expect_skid; }; /* * Free a signed information block. */ static void pkcs7_free_signed_info(struct pkcs7_signed_info *sinfo) { if (sinfo) { public_key_signature_free(sinfo->sig); kfree(sinfo); } } /** * pkcs7_free_message - Free a PKCS#7 message * @pkcs7: The PKCS#7 message to free */ void pkcs7_free_message(struct pkcs7_message *pkcs7) { struct x509_certificate *cert; struct pkcs7_signed_info *sinfo; if (pkcs7) { while (pkcs7->certs) { cert = pkcs7->certs; pkcs7->certs = cert->next; x509_free_certificate(cert); } while (pkcs7->crl) { cert = pkcs7->crl; pkcs7->crl = cert->next; x509_free_certificate(cert); } while (pkcs7->signed_infos) { sinfo = pkcs7->signed_infos; pkcs7->signed_infos = sinfo->next; pkcs7_free_signed_info(sinfo); } kfree(pkcs7); } } EXPORT_SYMBOL_GPL(pkcs7_free_message); /* * Check authenticatedAttributes are provided or not provided consistently. */ static int pkcs7_check_authattrs(struct pkcs7_message *msg) { struct pkcs7_signed_info *sinfo; bool want = false; sinfo = msg->signed_infos; if (!sinfo) goto inconsistent; if (sinfo->authattrs) { want = true; msg->have_authattrs = true; } for (sinfo = sinfo->next; sinfo; sinfo = sinfo->next) if (!!sinfo->authattrs != want) goto inconsistent; return 0; inconsistent: pr_warn("Inconsistently supplied authAttrs\n"); return -EINVAL; } /** * pkcs7_parse_message - Parse a PKCS#7 message * @data: The raw binary ASN.1 encoded message to be parsed * @datalen: The size of the encoded message */ struct pkcs7_message *pkcs7_parse_message(const void *data, size_t datalen) { struct pkcs7_parse_context *ctx; struct pkcs7_message *msg = ERR_PTR(-ENOMEM); int ret; ctx = kzalloc(sizeof(struct pkcs7_parse_context), GFP_KERNEL); if (!ctx) goto out_no_ctx; ctx->msg = kzalloc(sizeof(struct pkcs7_message), GFP_KERNEL); if (!ctx->msg) goto out_no_msg; ctx->sinfo = kzalloc(sizeof(struct pkcs7_signed_info), GFP_KERNEL); if (!ctx->sinfo) goto out_no_sinfo; ctx->sinfo->sig = kzalloc(sizeof(struct public_key_signature), GFP_KERNEL); if (!ctx->sinfo->sig) goto out_no_sig; ctx->data = (unsigned long)data; ctx->ppcerts = &ctx->certs; ctx->ppsinfo = &ctx->msg->signed_infos; /* Attempt to decode the signature */ ret = asn1_ber_decoder(&pkcs7_decoder, ctx, data, datalen); if (ret < 0) { msg = ERR_PTR(ret); goto out; } ret = pkcs7_check_authattrs(ctx->msg); if (ret < 0) { msg = ERR_PTR(ret); goto out; } msg = ctx->msg; ctx->msg = NULL; out: while (ctx->certs) { struct x509_certificate *cert = ctx->certs; ctx->certs = cert->next; x509_free_certificate(cert); } out_no_sig: pkcs7_free_signed_info(ctx->sinfo); out_no_sinfo: pkcs7_free_message(ctx->msg); out_no_msg: kfree(ctx); out_no_ctx: return msg; } EXPORT_SYMBOL_GPL(pkcs7_parse_message); /** * pkcs7_get_content_data - Get access to the PKCS#7 content * @pkcs7: The preparsed PKCS#7 message to access * @_data: Place to return a pointer to the data * @_data_len: Place to return the data length * @_headerlen: Size of ASN.1 header not included in _data * * Get access to the data content of the PKCS#7 message. The size of the * header of the ASN.1 object that contains it is also provided and can be used * to adjust *_data and *_data_len to get the entire object. * * Returns -ENODATA if the data object was missing from the message. */ int pkcs7_get_content_data(const struct pkcs7_message *pkcs7, const void **_data, size_t *_data_len, size_t *_headerlen) { if (!pkcs7->data) return -ENODATA; *_data = pkcs7->data; *_data_len = pkcs7->data_len; if (_headerlen) *_headerlen = pkcs7->data_hdrlen; return 0; } EXPORT_SYMBOL_GPL(pkcs7_get_content_data); /* * Note an OID when we find one for later processing when we know how * to interpret it. */ int pkcs7_note_OID(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->last_oid = look_up_OID(value, vlen); if (ctx->last_oid == OID__NR) { char buffer[50]; sprint_oid(value, vlen, buffer, sizeof(buffer)); printk("PKCS7: Unknown OID: [%lu] %s\n", (unsigned long)value - ctx->data, buffer); } return 0; } /* * Note the digest algorithm for the signature. */ int pkcs7_sig_note_digest_algo(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; switch (ctx->last_oid) { case OID_sha1: ctx->sinfo->sig->hash_algo = "sha1"; break; case OID_sha256: ctx->sinfo->sig->hash_algo = "sha256"; break; case OID_sha384: ctx->sinfo->sig->hash_algo = "sha384"; break; case OID_sha512: ctx->sinfo->sig->hash_algo = "sha512"; break; case OID_sha224: ctx->sinfo->sig->hash_algo = "sha224"; break; case OID_sm3: ctx->sinfo->sig->hash_algo = "sm3"; break; case OID_gost2012Digest256: ctx->sinfo->sig->hash_algo = "streebog256"; break; case OID_gost2012Digest512: ctx->sinfo->sig->hash_algo = "streebog512"; break; case OID_sha3_256: ctx->sinfo->sig->hash_algo = "sha3-256"; break; case OID_sha3_384: ctx->sinfo->sig->hash_algo = "sha3-384"; break; case OID_sha3_512: ctx->sinfo->sig->hash_algo = "sha3-512"; break; default: printk("Unsupported digest algo: %u\n", ctx->last_oid); return -ENOPKG; } return 0; } /* * Note the public key algorithm for the signature. */ int pkcs7_sig_note_pkey_algo(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; switch (ctx->last_oid) { case OID_rsaEncryption: ctx->sinfo->sig->pkey_algo = "rsa"; ctx->sinfo->sig->encoding = "pkcs1"; break; case OID_id_ecdsa_with_sha1: case OID_id_ecdsa_with_sha224: case OID_id_ecdsa_with_sha256: case OID_id_ecdsa_with_sha384: case OID_id_ecdsa_with_sha512: case OID_id_ecdsa_with_sha3_256: case OID_id_ecdsa_with_sha3_384: case OID_id_ecdsa_with_sha3_512: ctx->sinfo->sig->pkey_algo = "ecdsa"; ctx->sinfo->sig->encoding = "x962"; break; case OID_gost2012PKey256: case OID_gost2012PKey512: ctx->sinfo->sig->pkey_algo = "ecrdsa"; ctx->sinfo->sig->encoding = "raw"; break; default: printk("Unsupported pkey algo: %u\n", ctx->last_oid); return -ENOPKG; } return 0; } /* * We only support signed data [RFC2315 sec 9]. */ int pkcs7_check_content_type(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; if (ctx->last_oid != OID_signed_data) { pr_warn("Only support pkcs7_signedData type\n"); return -EINVAL; } return 0; } /* * Note the SignedData version */ int pkcs7_note_signeddata_version(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; unsigned version; if (vlen != 1) goto unsupported; ctx->msg->version = version = *(const u8 *)value; switch (version) { case 1: /* PKCS#7 SignedData [RFC2315 sec 9.1] * CMS ver 1 SignedData [RFC5652 sec 5.1] */ break; case 3: /* CMS ver 3 SignedData [RFC2315 sec 5.1] */ break; default: goto unsupported; } return 0; unsupported: pr_warn("Unsupported SignedData version\n"); return -EINVAL; } /* * Note the SignerInfo version */ int pkcs7_note_signerinfo_version(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; unsigned version; if (vlen != 1) goto unsupported; version = *(const u8 *)value; switch (version) { case 1: /* PKCS#7 SignerInfo [RFC2315 sec 9.2] * CMS ver 1 SignerInfo [RFC5652 sec 5.3] */ if (ctx->msg->version != 1) goto version_mismatch; ctx->expect_skid = false; break; case 3: /* CMS ver 3 SignerInfo [RFC2315 sec 5.3] */ if (ctx->msg->version == 1) goto version_mismatch; ctx->expect_skid = true; break; default: goto unsupported; } return 0; unsupported: pr_warn("Unsupported SignerInfo version\n"); return -EINVAL; version_mismatch: pr_warn("SignedData-SignerInfo version mismatch\n"); return -EBADMSG; } /* * Extract a certificate and store it in the context. */ int pkcs7_extract_cert(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct x509_certificate *x509; if (tag != ((ASN1_UNIV << 6) | ASN1_CONS_BIT | ASN1_SEQ)) { pr_debug("Cert began with tag %02x at %lu\n", tag, (unsigned long)ctx - ctx->data); return -EBADMSG; } /* We have to correct for the header so that the X.509 parser can start * from the beginning. Note that since X.509 stipulates DER, there * probably shouldn't be an EOC trailer - but it is in PKCS#7 (which * stipulates BER). */ value -= hdrlen; vlen += hdrlen; if (((u8*)value)[1] == 0x80) vlen += 2; /* Indefinite length - there should be an EOC */ x509 = x509_cert_parse(value, vlen); if (IS_ERR(x509)) return PTR_ERR(x509); x509->index = ++ctx->x509_index; pr_debug("Got cert %u for %s\n", x509->index, x509->subject); pr_debug("- fingerprint %*phN\n", x509->id->len, x509->id->data); *ctx->ppcerts = x509; ctx->ppcerts = &x509->next; return 0; } /* * Save the certificate list */ int pkcs7_note_certificate_list(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; pr_devel("Got cert list (%02x)\n", tag); *ctx->ppcerts = ctx->msg->certs; ctx->msg->certs = ctx->certs; ctx->certs = NULL; ctx->ppcerts = &ctx->certs; return 0; } /* * Note the content type. */ int pkcs7_note_content(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; if (ctx->last_oid != OID_data && ctx->last_oid != OID_msIndirectData) { pr_warn("Unsupported data type %d\n", ctx->last_oid); return -EINVAL; } ctx->msg->data_type = ctx->last_oid; return 0; } /* * Extract the data from the message and store that and its content type OID in * the context. */ int pkcs7_note_data(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; pr_debug("Got data\n"); ctx->msg->data = value; ctx->msg->data_len = vlen; ctx->msg->data_hdrlen = hdrlen; return 0; } /* * Parse authenticated attributes. */ int pkcs7_sig_note_authenticated_attr(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct pkcs7_signed_info *sinfo = ctx->sinfo; enum OID content_type; pr_devel("AuthAttr: %02x %zu [%*ph]\n", tag, vlen, (unsigned)vlen, value); switch (ctx->last_oid) { case OID_contentType: if (__test_and_set_bit(sinfo_has_content_type, &sinfo->aa_set)) goto repeated; content_type = look_up_OID(value, vlen); if (content_type != ctx->msg->data_type) { pr_warn("Mismatch between global data type (%d) and sinfo %u (%d)\n", ctx->msg->data_type, sinfo->index, content_type); return -EBADMSG; } return 0; case OID_signingTime: if (__test_and_set_bit(sinfo_has_signing_time, &sinfo->aa_set)) goto repeated; /* Should we check that the signing time is consistent * with the signer's X.509 cert? */ return x509_decode_time(&sinfo->signing_time, hdrlen, tag, value, vlen); case OID_messageDigest: if (__test_and_set_bit(sinfo_has_message_digest, &sinfo->aa_set)) goto repeated; if (tag != ASN1_OTS) return -EBADMSG; sinfo->msgdigest = value; sinfo->msgdigest_len = vlen; return 0; case OID_smimeCapabilites: if (__test_and_set_bit(sinfo_has_smime_caps, &sinfo->aa_set)) goto repeated; if (ctx->msg->data_type != OID_msIndirectData) { pr_warn("S/MIME Caps only allowed with Authenticode\n"); return -EKEYREJECTED; } return 0; /* Microsoft SpOpusInfo seems to be contain cont[0] 16-bit BE * char URLs and cont[1] 8-bit char URLs. * * Microsoft StatementType seems to contain a list of OIDs that * are also used as extendedKeyUsage types in X.509 certs. */ case OID_msSpOpusInfo: if (__test_and_set_bit(sinfo_has_ms_opus_info, &sinfo->aa_set)) goto repeated; goto authenticode_check; case OID_msStatementType: if (__test_and_set_bit(sinfo_has_ms_statement_type, &sinfo->aa_set)) goto repeated; authenticode_check: if (ctx->msg->data_type != OID_msIndirectData) { pr_warn("Authenticode AuthAttrs only allowed with Authenticode\n"); return -EKEYREJECTED; } /* I'm not sure how to validate these */ return 0; default: return 0; } repeated: /* We permit max one item per AuthenticatedAttribute and no repeats */ pr_warn("Repeated/multivalue AuthAttrs not permitted\n"); return -EKEYREJECTED; } /* * Note the set of auth attributes for digestion purposes [RFC2315 sec 9.3] */ int pkcs7_sig_note_set_of_authattrs(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct pkcs7_signed_info *sinfo = ctx->sinfo; if (!test_bit(sinfo_has_content_type, &sinfo->aa_set) || !test_bit(sinfo_has_message_digest, &sinfo->aa_set)) { pr_warn("Missing required AuthAttr\n"); return -EBADMSG; } if (ctx->msg->data_type != OID_msIndirectData && test_bit(sinfo_has_ms_opus_info, &sinfo->aa_set)) { pr_warn("Unexpected Authenticode AuthAttr\n"); return -EBADMSG; } /* We need to switch the 'CONT 0' to a 'SET OF' when we digest */ sinfo->authattrs = value - (hdrlen - 1); sinfo->authattrs_len = vlen + (hdrlen - 1); return 0; } /* * Note the issuing certificate serial number */ int pkcs7_sig_note_serial(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->raw_serial = value; ctx->raw_serial_size = vlen; return 0; } /* * Note the issuer's name */ int pkcs7_sig_note_issuer(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->raw_issuer = value; ctx->raw_issuer_size = vlen; return 0; } /* * Note the issuing cert's subjectKeyIdentifier */ int pkcs7_sig_note_skid(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; pr_devel("SKID: %02x %zu [%*ph]\n", tag, vlen, (unsigned)vlen, value); ctx->raw_skid = value; ctx->raw_skid_size = vlen; return 0; } /* * Note the signature data */ int pkcs7_sig_note_signature(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; ctx->sinfo->sig->s = kmemdup(value, vlen, GFP_KERNEL); if (!ctx->sinfo->sig->s) return -ENOMEM; ctx->sinfo->sig->s_size = vlen; return 0; } /* * Note a signature information block */ int pkcs7_note_signed_info(void *context, size_t hdrlen, unsigned char tag, const void *value, size_t vlen) { struct pkcs7_parse_context *ctx = context; struct pkcs7_signed_info *sinfo = ctx->sinfo; struct asymmetric_key_id *kid; if (ctx->msg->data_type == OID_msIndirectData && !sinfo->authattrs) { pr_warn("Authenticode requires AuthAttrs\n"); return -EBADMSG; } /* Generate cert issuer + serial number key ID */ if (!ctx->expect_skid) { kid = asymmetric_key_generate_id(ctx->raw_serial, ctx->raw_serial_size, ctx->raw_issuer, ctx->raw_issuer_size); } else { kid = asymmetric_key_generate_id(ctx->raw_skid, ctx->raw_skid_size, "", 0); } if (IS_ERR(kid)) return PTR_ERR(kid); pr_devel("SINFO KID: %u [%*phN]\n", kid->len, kid->len, kid->data); sinfo->sig->auth_ids[0] = kid; sinfo->index = ++ctx->sinfo_index; *ctx->ppsinfo = sinfo; ctx->ppsinfo = &sinfo->next; ctx->sinfo = kzalloc(sizeof(struct pkcs7_signed_info), GFP_KERNEL); if (!ctx->sinfo) return -ENOMEM; ctx->sinfo->sig = kzalloc(sizeof(struct public_key_signature), GFP_KERNEL); if (!ctx->sinfo->sig) return -ENOMEM; return 0; }
19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 /* SPDX-License-Identifier: GPL-2.0 */ #include <linux/pagemap.h> #include <linux/blkdev.h> #include "../blk.h" /* * add_gd_partition adds a partitions details to the devices partition * description. */ struct parsed_partitions { struct gendisk *disk; char name[BDEVNAME_SIZE]; struct { sector_t from; sector_t size; int flags; bool has_info; struct partition_meta_info info; } *parts; int next; int limit; bool access_beyond_eod; char *pp_buf; }; typedef struct { struct folio *v; } Sector; void *read_part_sector(struct parsed_partitions *state, sector_t n, Sector *p); static inline void put_dev_sector(Sector p) { folio_put(p.v); } static inline void put_partition(struct parsed_partitions *p, int n, sector_t from, sector_t size) { if (n < p->limit) { char tmp[1 + BDEVNAME_SIZE + 10 + 1]; p->parts[n].from = from; p->parts[n].size = size; snprintf(tmp, sizeof(tmp), " %s%d", p->name, n); strlcat(p->pp_buf, tmp, PAGE_SIZE); } } /* detection routines go here in alphabetical order: */ int adfspart_check_ADFS(struct parsed_partitions *state); int adfspart_check_CUMANA(struct parsed_partitions *state); int adfspart_check_EESOX(struct parsed_partitions *state); int adfspart_check_ICS(struct parsed_partitions *state); int adfspart_check_POWERTEC(struct parsed_partitions *state); int aix_partition(struct parsed_partitions *state); int amiga_partition(struct parsed_partitions *state); int atari_partition(struct parsed_partitions *state); int cmdline_partition(struct parsed_partitions *state); int efi_partition(struct parsed_partitions *state); int ibm_partition(struct parsed_partitions *); int karma_partition(struct parsed_partitions *state); int ldm_partition(struct parsed_partitions *state); int mac_partition(struct parsed_partitions *state); int msdos_partition(struct parsed_partitions *state); int of_partition(struct parsed_partitions *state); int osf_partition(struct parsed_partitions *state); int sgi_partition(struct parsed_partitions *state); int sun_partition(struct parsed_partitions *state); int sysv68_partition(struct parsed_partitions *state); int ultrix_partition(struct parsed_partitions *state);
1696 1115 651 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _NET_RPS_H #define _NET_RPS_H #include <linux/types.h> #include <linux/static_key.h> #include <net/sock.h> #include <net/hotdata.h> #ifdef CONFIG_RPS extern struct static_key_false rps_needed; extern struct static_key_false rfs_needed; /* * This structure holds an RPS map which can be of variable length. The * map is an array of CPUs. */ struct rps_map { unsigned int len; struct rcu_head rcu; u16 cpus[]; }; #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16))) /* * The rps_dev_flow structure contains the mapping of a flow to a CPU, the * tail pointer for that CPU's input queue at the time of last enqueue, a * hardware filter index, and the hash of the flow if aRFS is enabled. */ struct rps_dev_flow { u16 cpu; u16 filter; unsigned int last_qtail; #ifdef CONFIG_RFS_ACCEL u32 hash; #endif }; #define RPS_NO_FILTER 0xffff /* * The rps_dev_flow_table structure contains a table of flow mappings. */ struct rps_dev_flow_table { u8 log; struct rcu_head rcu; struct rps_dev_flow flows[]; }; #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \ ((_num) * sizeof(struct rps_dev_flow))) /* * The rps_sock_flow_table contains mappings of flows to the last CPU * on which they were processed by the application (set in recvmsg). * Each entry is a 32bit value. Upper part is the high-order bits * of flow hash, lower part is CPU number. * rps_cpu_mask is used to partition the space, depending on number of * possible CPUs : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1 * For example, if 64 CPUs are possible, rps_cpu_mask = 0x3f, * meaning we use 32-6=26 bits for the hash. */ struct rps_sock_flow_table { struct rcu_head rcu; u32 mask; u32 ents[] ____cacheline_aligned_in_smp; }; #define RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num])) #define RPS_NO_CPU 0xffff static inline void rps_record_sock_flow(struct rps_sock_flow_table *table, u32 hash) { unsigned int index = hash & table->mask; u32 val = hash & ~net_hotdata.rps_cpu_mask; /* We only give a hint, preemption can change CPU under us */ val |= raw_smp_processor_id(); /* The following WRITE_ONCE() is paired with the READ_ONCE() * here, and another one in get_rps_cpu(). */ if (READ_ONCE(table->ents[index]) != val) WRITE_ONCE(table->ents[index], val); } static inline void _sock_rps_record_flow_hash(__u32 hash) { struct rps_sock_flow_table *sock_flow_table; if (!hash) return; rcu_read_lock(); sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); if (sock_flow_table) rps_record_sock_flow(sock_flow_table, hash); rcu_read_unlock(); } static inline void _sock_rps_record_flow(const struct sock *sk) { /* Reading sk->sk_rxhash might incur an expensive cache line * miss. * * TCP_ESTABLISHED does cover almost all states where RFS * might be useful, and is cheaper [1] than testing : * IPv4: inet_sk(sk)->inet_daddr * IPv6: ipv6_addr_any(&sk->sk_v6_daddr) * OR an additional socket flag * [1] : sk_state and sk_prot are in the same cache line. */ if (sk->sk_state == TCP_ESTABLISHED) { /* This READ_ONCE() is paired with the WRITE_ONCE() * from sock_rps_save_rxhash() and sock_rps_reset_rxhash(). */ _sock_rps_record_flow_hash(READ_ONCE(sk->sk_rxhash)); } } static inline void _sock_rps_delete_flow(const struct sock *sk) { struct rps_sock_flow_table *table; u32 hash, index; hash = READ_ONCE(sk->sk_rxhash); if (!hash) return; rcu_read_lock(); table = rcu_dereference(net_hotdata.rps_sock_flow_table); if (table) { index = hash & table->mask; if (READ_ONCE(table->ents[index]) != RPS_NO_CPU) WRITE_ONCE(table->ents[index], RPS_NO_CPU); } rcu_read_unlock(); } #endif /* CONFIG_RPS */ static inline bool rfs_is_needed(void) { #ifdef CONFIG_RPS return static_branch_unlikely(&rfs_needed); #else return false; #endif } static inline void sock_rps_record_flow_hash(__u32 hash) { #ifdef CONFIG_RPS if (!rfs_is_needed()) return; _sock_rps_record_flow_hash(hash); #endif } static inline void sock_rps_record_flow(const struct sock *sk) { #ifdef CONFIG_RPS if (!rfs_is_needed()) return; _sock_rps_record_flow(sk); #endif } static inline void sock_rps_delete_flow(const struct sock *sk) { #ifdef CONFIG_RPS if (!rfs_is_needed()) return; _sock_rps_delete_flow(sk); #endif } static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd) { #ifdef CONFIG_RPS return ++sd->input_queue_tail; #else return 0; #endif } static inline void rps_input_queue_tail_save(u32 *dest, u32 tail) { #ifdef CONFIG_RPS WRITE_ONCE(*dest, tail); #endif } static inline void rps_input_queue_head_add(struct softnet_data *sd, int val) { #ifdef CONFIG_RPS WRITE_ONCE(sd->input_queue_head, sd->input_queue_head + val); #endif } static inline void rps_input_queue_head_incr(struct softnet_data *sd) { rps_input_queue_head_add(sd, 1); } #endif /* _NET_RPS_H */
2 1 3 2 1 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 // SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) International Business Machines Corp., 2000-2004 * Portions Copyright (C) Christoph Hellwig, 2001-2002 */ #include <linux/fs.h> #include <linux/ctype.h> #include <linux/module.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/uaccess.h> #include "jfs_incore.h" #include "jfs_filsys.h" #include "jfs_debug.h" #ifdef PROC_FS_JFS /* see jfs_debug.h */ #ifdef CONFIG_JFS_DEBUG static int jfs_loglevel_proc_show(struct seq_file *m, void *v) { seq_printf(m, "%d\n", jfsloglevel); return 0; } static int jfs_loglevel_proc_open(struct inode *inode, struct file *file) { return single_open(file, jfs_loglevel_proc_show, NULL); } static ssize_t jfs_loglevel_proc_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) { char c; if (get_user(c, buffer)) return -EFAULT; /* yes, I know this is an ASCIIism. --hch */ if (c < '0' || c > '9') return -EINVAL; jfsloglevel = c - '0'; return count; } static const struct proc_ops jfs_loglevel_proc_ops = { .proc_open = jfs_loglevel_proc_open, .proc_read = seq_read, .proc_lseek = seq_lseek, .proc_release = single_release, .proc_write = jfs_loglevel_proc_write, }; #endif void jfs_proc_init(void) { struct proc_dir_entry *base; base = proc_mkdir("fs/jfs", NULL); if (!base) return; #ifdef CONFIG_JFS_STATISTICS proc_create_single("lmstats", 0, base, jfs_lmstats_proc_show); proc_create_single("txstats", 0, base, jfs_txstats_proc_show); proc_create_single("xtstat", 0, base, jfs_xtstat_proc_show); proc_create_single("mpstat", 0, base, jfs_mpstat_proc_show); #endif #ifdef CONFIG_JFS_DEBUG proc_create_single("TxAnchor", 0, base, jfs_txanchor_proc_show); proc_create("loglevel", 0, base, &jfs_loglevel_proc_ops); #endif } void jfs_proc_clean(void) { remove_proc_subtree("fs/jfs", NULL); } #endif /* PROC_FS_JFS */
3 7 7 7 3 3 3 3 3 3 3 3 3 3 3 3 3 6 6 6 6 3 3 3 1 1 19 19 19 27 27 27 9 10 10 10 10 78 78 78 18 3 11 69 4 4 4 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 2 2 18 18 18 18 18 18 18 9 18 18 3 3 2 3 2 2 12 12 12 12 1 1 1 12 12 6 6 6 6 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 1 2 2 2 2 1 1 4 1 3 3 3 3 2 2 2 1 1 1 1 1 1 2 2 2 103 103 100 104 103 96 96 3 3 4 4 2 2 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Copyright (C) 2011 ProFUSION Embedded Systems Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth HCI core. */ #include <linux/export.h> #include <linux/rfkill.h> #include <linux/debugfs.h> #include <linux/crypto.h> #include <linux/kcov.h> #include <linux/property.h> #include <linux/suspend.h> #include <linux/wait.h> #include <linux/unaligned.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> #include <net/bluetooth/l2cap.h> #include <net/bluetooth/mgmt.h> #include "hci_debugfs.h" #include "smp.h" #include "leds.h" #include "msft.h" #include "aosp.h" #include "hci_codec.h" static void hci_rx_work(struct work_struct *work); static void hci_cmd_work(struct work_struct *work); static void hci_tx_work(struct work_struct *work); /* HCI device list */ LIST_HEAD(hci_dev_list); DEFINE_RWLOCK(hci_dev_list_lock); /* HCI callback list */ LIST_HEAD(hci_cb_list); DEFINE_MUTEX(hci_cb_list_lock); /* HCI ID Numbering */ static DEFINE_IDA(hci_index_ida); /* Get HCI device by index. * Device is held on return. */ static struct hci_dev *__hci_dev_get(int index, int *srcu_index) { struct hci_dev *hdev = NULL, *d; BT_DBG("%d", index); if (index < 0) return NULL; read_lock(&hci_dev_list_lock); list_for_each_entry(d, &hci_dev_list, list) { if (d->id == index) { hdev = hci_dev_hold(d); if (srcu_index) *srcu_index = srcu_read_lock(&d->srcu); break; } } read_unlock(&hci_dev_list_lock); return hdev; } struct hci_dev *hci_dev_get(int index) { return __hci_dev_get(index, NULL); } static struct hci_dev *hci_dev_get_srcu(int index, int *srcu_index) { return __hci_dev_get(index, srcu_index); } static void hci_dev_put_srcu(struct hci_dev *hdev, int srcu_index) { srcu_read_unlock(&hdev->srcu, srcu_index); hci_dev_put(hdev); } /* ---- Inquiry support ---- */ bool hci_discovery_active(struct hci_dev *hdev) { struct discovery_state *discov = &hdev->discovery; switch (discov->state) { case DISCOVERY_FINDING: case DISCOVERY_RESOLVING: return true; default: return false; } } void hci_discovery_set_state(struct hci_dev *hdev, int state) { int old_state = hdev->discovery.state; if (old_state == state) return; hdev->discovery.state = state; switch (state) { case DISCOVERY_STOPPED: hci_update_passive_scan(hdev); if (old_state != DISCOVERY_STARTING) mgmt_discovering(hdev, 0); break; case DISCOVERY_STARTING: break; case DISCOVERY_FINDING: mgmt_discovering(hdev, 1); break; case DISCOVERY_RESOLVING: break; case DISCOVERY_STOPPING: break; } bt_dev_dbg(hdev, "state %u -> %u", old_state, state); } void hci_inquiry_cache_flush(struct hci_dev *hdev) { struct discovery_state *cache = &hdev->discovery; struct inquiry_entry *p, *n; list_for_each_entry_safe(p, n, &cache->all, all) { list_del(&p->all); kfree(p); } INIT_LIST_HEAD(&cache->unknown); INIT_LIST_HEAD(&cache->resolve); } struct inquiry_entry *hci_inquiry_cache_lookup(struct hci_dev *hdev, bdaddr_t *bdaddr) { struct discovery_state *cache = &hdev->discovery; struct inquiry_entry *e; BT_DBG("cache %p, %pMR", cache, bdaddr); list_for_each_entry(e, &cache->all, all) { if (!bacmp(&e->data.bdaddr, bdaddr)) return e; } return NULL; } struct inquiry_entry *hci_inquiry_cache_lookup_unknown(struct hci_dev *hdev, bdaddr_t *bdaddr) { struct discovery_state *cache = &hdev->discovery; struct inquiry_entry *e; BT_DBG("cache %p, %pMR", cache, bdaddr); list_for_each_entry(e, &cache->unknown, list) { if (!bacmp(&e->data.bdaddr, bdaddr)) return e; } return NULL; } struct inquiry_entry *hci_inquiry_cache_lookup_resolve(struct hci_dev *hdev, bdaddr_t *bdaddr, int state) { struct discovery_state *cache = &hdev->discovery; struct inquiry_entry *e; BT_DBG("cache %p bdaddr %pMR state %d", cache, bdaddr, state); list_for_each_entry(e, &cache->resolve, list) { if (!bacmp(bdaddr, BDADDR_ANY) && e->name_state == state) return e; if (!bacmp(&e->data.bdaddr, bdaddr)) return e; } return NULL; } void hci_inquiry_cache_update_resolve(struct hci_dev *hdev, struct inquiry_entry *ie) { struct discovery_state *cache = &hdev->discovery; struct list_head *pos = &cache->resolve; struct inquiry_entry *p; list_del(&ie->list); list_for_each_entry(p, &cache->resolve, list) { if (p->name_state != NAME_PENDING && abs(p->data.rssi) >= abs(ie->data.rssi)) break; pos = &p->list; } list_add(&ie->list, pos); } u32 hci_inquiry_cache_update(struct hci_dev *hdev, struct inquiry_data *data, bool name_known) { struct discovery_state *cache = &hdev->discovery; struct inquiry_entry *ie; u32 flags = 0; BT_DBG("cache %p, %pMR", cache, &data->bdaddr); hci_remove_remote_oob_data(hdev, &data->bdaddr, BDADDR_BREDR); if (!data->ssp_mode) flags |= MGMT_DEV_FOUND_LEGACY_PAIRING; ie = hci_inquiry_cache_lookup(hdev, &data->bdaddr); if (ie) { if (!ie->data.ssp_mode) flags |= MGMT_DEV_FOUND_LEGACY_PAIRING; if (ie->name_state == NAME_NEEDED && data->rssi != ie->data.rssi) { ie->data.rssi = data->rssi; hci_inquiry_cache_update_resolve(hdev, ie); } goto update; } /* Entry not in the cache. Add new one. */ ie = kzalloc(sizeof(*ie), GFP_KERNEL); if (!ie) { flags |= MGMT_DEV_FOUND_CONFIRM_NAME; goto done; } list_add(&ie->all, &cache->all); if (name_known) { ie->name_state = NAME_KNOWN; } else { ie->name_state = NAME_NOT_KNOWN; list_add(&ie->list, &cache->unknown); } update: if (name_known && ie->name_state != NAME_KNOWN && ie->name_state != NAME_PENDING) { ie->name_state = NAME_KNOWN; list_del(&ie->list); } memcpy(&ie->data, data, sizeof(*data)); ie->timestamp = jiffies; cache->timestamp = jiffies; if (ie->name_state == NAME_NOT_KNOWN) flags |= MGMT_DEV_FOUND_CONFIRM_NAME; done: return flags; } static int inquiry_cache_dump(struct hci_dev *hdev, int num, __u8 *buf) { struct discovery_state *cache = &hdev->discovery; struct inquiry_info *info = (struct inquiry_info *) buf; struct inquiry_entry *e; int copied = 0; list_for_each_entry(e, &cache->all, all) { struct inquiry_data *data = &e->data; if (copied >= num) break; bacpy(&info->bdaddr, &data->bdaddr); info->pscan_rep_mode = data->pscan_rep_mode; info->pscan_period_mode = data->pscan_period_mode; info->pscan_mode = data->pscan_mode; memcpy(info->dev_class, data->dev_class, 3); info->clock_offset = data->clock_offset; info++; copied++; } BT_DBG("cache %p, copied %d", cache, copied); return copied; } int hci_inquiry(void __user *arg) { __u8 __user *ptr = arg; struct hci_inquiry_req ir; struct hci_dev *hdev; int err = 0, do_inquiry = 0, max_rsp; __u8 *buf; if (copy_from_user(&ir, ptr, sizeof(ir))) return -EFAULT; hdev = hci_dev_get(ir.dev_id); if (!hdev) return -ENODEV; if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { err = -EOPNOTSUPP; goto done; } if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; } /* Restrict maximum inquiry length to 60 seconds */ if (ir.length > 60) { err = -EINVAL; goto done; } hci_dev_lock(hdev); if (inquiry_cache_age(hdev) > INQUIRY_CACHE_AGE_MAX || inquiry_cache_empty(hdev) || ir.flags & IREQ_CACHE_FLUSH) { hci_inquiry_cache_flush(hdev); do_inquiry = 1; } hci_dev_unlock(hdev); if (do_inquiry) { hci_req_sync_lock(hdev); err = hci_inquiry_sync(hdev, ir.length, ir.num_rsp); hci_req_sync_unlock(hdev); if (err < 0) goto done; /* Wait until Inquiry procedure finishes (HCI_INQUIRY flag is * cleared). If it is interrupted by a signal, return -EINTR. */ if (wait_on_bit(&hdev->flags, HCI_INQUIRY, TASK_INTERRUPTIBLE)) { err = -EINTR; goto done; } } /* for unlimited number of responses we will use buffer with * 255 entries */ max_rsp = (ir.num_rsp == 0) ? 255 : ir.num_rsp; /* cache_dump can't sleep. Therefore we allocate temp buffer and then * copy it to the user space. */ buf = kmalloc_array(max_rsp, sizeof(struct inquiry_info), GFP_KERNEL); if (!buf) { err = -ENOMEM; goto done; } hci_dev_lock(hdev); ir.num_rsp = inquiry_cache_dump(hdev, max_rsp, buf); hci_dev_unlock(hdev); BT_DBG("num_rsp %d", ir.num_rsp); if (!copy_to_user(ptr, &ir, sizeof(ir))) { ptr += sizeof(ir); if (copy_to_user(ptr, buf, sizeof(struct inquiry_info) * ir.num_rsp)) err = -EFAULT; } else err = -EFAULT; kfree(buf); done: hci_dev_put(hdev); return err; } static int hci_dev_do_open(struct hci_dev *hdev) { int ret = 0; BT_DBG("%s %p", hdev->name, hdev); hci_req_sync_lock(hdev); ret = hci_dev_open_sync(hdev); hci_req_sync_unlock(hdev); return ret; } /* ---- HCI ioctl helpers ---- */ int hci_dev_open(__u16 dev) { struct hci_dev *hdev; int err; hdev = hci_dev_get(dev); if (!hdev) return -ENODEV; /* Devices that are marked as unconfigured can only be powered * up as user channel. Trying to bring them up as normal devices * will result into a failure. Only user channel operation is * possible. * * When this function is called for a user channel, the flag * HCI_USER_CHANNEL will be set first before attempting to * open the device. */ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED) && !hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EOPNOTSUPP; goto done; } /* We need to ensure that no other power on/off work is pending * before proceeding to call hci_dev_do_open. This is * particularly important if the setup procedure has not yet * completed. */ if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) cancel_delayed_work(&hdev->power_off); /* After this call it is guaranteed that the setup procedure * has finished. This means that error conditions like RFKILL * or no valid public or static random address apply. */ flush_workqueue(hdev->req_workqueue); /* For controllers not using the management interface and that * are brought up using legacy ioctl, set the HCI_BONDABLE bit * so that pairing works for them. Once the management interface * is in use this bit will be cleared again and userspace has * to explicitly enable it. */ if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && !hci_dev_test_flag(hdev, HCI_MGMT)) hci_dev_set_flag(hdev, HCI_BONDABLE); err = hci_dev_do_open(hdev); done: hci_dev_put(hdev); return err; } int hci_dev_do_close(struct hci_dev *hdev) { int err; BT_DBG("%s %p", hdev->name, hdev); hci_req_sync_lock(hdev); err = hci_dev_close_sync(hdev); hci_req_sync_unlock(hdev); return err; } int hci_dev_close(__u16 dev) { struct hci_dev *hdev; int err; hdev = hci_dev_get(dev); if (!hdev) return -ENODEV; if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } cancel_work_sync(&hdev->power_on); if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) cancel_delayed_work(&hdev->power_off); err = hci_dev_do_close(hdev); done: hci_dev_put(hdev); return err; } static int hci_dev_do_reset(struct hci_dev *hdev) { int ret; BT_DBG("%s %p", hdev->name, hdev); hci_req_sync_lock(hdev); /* Drop queues */ skb_queue_purge(&hdev->rx_q); skb_queue_purge(&hdev->cmd_q); /* Cancel these to avoid queueing non-chained pending work */ hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); /* Wait for * * if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) * queue_delayed_work(&hdev->{cmd,ncmd}_timer) * * inside RCU section to see the flag or complete scheduling. */ synchronize_rcu(); /* Explicitly cancel works in case scheduled after setting the flag. */ cancel_delayed_work(&hdev->cmd_timer); cancel_delayed_work(&hdev->ncmd_timer); /* Avoid potential lockdep warnings from the *_flush() calls by * ensuring the workqueue is empty up front. */ drain_workqueue(hdev->workqueue); hci_dev_lock(hdev); hci_inquiry_cache_flush(hdev); hci_conn_hash_flush(hdev); hci_dev_unlock(hdev); if (hdev->flush) hdev->flush(hdev); hci_dev_clear_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); atomic_set(&hdev->cmd_cnt, 1); hdev->acl_cnt = 0; hdev->sco_cnt = 0; hdev->le_cnt = 0; hdev->iso_cnt = 0; ret = hci_reset_sync(hdev); hci_req_sync_unlock(hdev); return ret; } int hci_dev_reset(__u16 dev) { struct hci_dev *hdev; int err, srcu_index; hdev = hci_dev_get_srcu(dev, &srcu_index); if (!hdev) return -ENODEV; if (!test_bit(HCI_UP, &hdev->flags)) { err = -ENETDOWN; goto done; } if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { err = -EOPNOTSUPP; goto done; } err = hci_dev_do_reset(hdev); done: hci_dev_put_srcu(hdev, srcu_index); return err; } int hci_dev_reset_stat(__u16 dev) { struct hci_dev *hdev; int ret = 0; hdev = hci_dev_get(dev); if (!hdev) return -ENODEV; if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { ret = -EBUSY; goto done; } if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { ret = -EOPNOTSUPP; goto done; } memset(&hdev->stat, 0, sizeof(struct hci_dev_stats)); done: hci_dev_put(hdev); return ret; } static void hci_update_passive_scan_state(struct hci_dev *hdev, u8 scan) { bool conn_changed, discov_changed; BT_DBG("%s scan 0x%02x", hdev->name, scan); if ((scan & SCAN_PAGE)) conn_changed = !hci_dev_test_and_set_flag(hdev, HCI_CONNECTABLE); else conn_changed = hci_dev_test_and_clear_flag(hdev, HCI_CONNECTABLE); if ((scan & SCAN_INQUIRY)) { discov_changed = !hci_dev_test_and_set_flag(hdev, HCI_DISCOVERABLE); } else { hci_dev_clear_flag(hdev, HCI_LIMITED_DISCOVERABLE); discov_changed = hci_dev_test_and_clear_flag(hdev, HCI_DISCOVERABLE); } if (!hci_dev_test_flag(hdev, HCI_MGMT)) return; if (conn_changed || discov_changed) { /* In case this was disabled through mgmt */ hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) hci_update_adv_data(hdev, hdev->cur_adv_instance); mgmt_new_settings(hdev); } } int hci_dev_cmd(unsigned int cmd, void __user *arg) { struct hci_dev *hdev; struct hci_dev_req dr; __le16 policy; int err = 0; if (copy_from_user(&dr, arg, sizeof(dr))) return -EFAULT; hdev = hci_dev_get(dr.dev_id); if (!hdev) return -ENODEV; if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { err = -EBUSY; goto done; } if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { err = -EOPNOTSUPP; goto done; } if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) { err = -EOPNOTSUPP; goto done; } switch (cmd) { case HCISETAUTH: err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &dr.dev_opt, HCI_CMD_TIMEOUT); break; case HCISETENCRYPT: if (!lmp_encrypt_capable(hdev)) { err = -EOPNOTSUPP; break; } if (!test_bit(HCI_AUTH, &hdev->flags)) { /* Auth must be enabled first */ err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_AUTH_ENABLE, 1, &dr.dev_opt, HCI_CMD_TIMEOUT); if (err) break; } err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_ENCRYPT_MODE, 1, &dr.dev_opt, HCI_CMD_TIMEOUT); break; case HCISETSCAN: err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &dr.dev_opt, HCI_CMD_TIMEOUT); /* Ensure that the connectable and discoverable states * get correctly modified as this was a non-mgmt change. */ if (!err) hci_update_passive_scan_state(hdev, dr.dev_opt); break; case HCISETLINKPOL: policy = cpu_to_le16(dr.dev_opt); err = hci_cmd_sync_status(hdev, HCI_OP_WRITE_DEF_LINK_POLICY, 2, &policy, HCI_CMD_TIMEOUT); break; case HCISETLINKMODE: hdev->link_mode = ((__u16) dr.dev_opt) & (HCI_LM_MASTER | HCI_LM_ACCEPT); break; case HCISETPTYPE: if (hdev->pkt_type == (__u16) dr.dev_opt) break; hdev->pkt_type = (__u16) dr.dev_opt; mgmt_phy_configuration_changed(hdev, NULL); break; case HCISETACLMTU: hdev->acl_mtu = *((__u16 *) &dr.dev_opt + 1); hdev->acl_pkts = *((__u16 *) &dr.dev_opt + 0); break; case HCISETSCOMTU: hdev->sco_mtu = *((__u16 *) &dr.dev_opt + 1); hdev->sco_pkts = *((__u16 *) &dr.dev_opt + 0); break; default: err = -EINVAL; break; } done: hci_dev_put(hdev); return err; } int hci_get_dev_list(void __user *arg) { struct hci_dev *hdev; struct hci_dev_list_req *dl; struct hci_dev_req *dr; int n = 0, err; __u16 dev_num; if (get_user(dev_num, (__u16 __user *) arg)) return -EFAULT; if (!dev_num || dev_num > (PAGE_SIZE * 2) / sizeof(*dr)) return -EINVAL; dl = kzalloc(struct_size(dl, dev_req, dev_num), GFP_KERNEL); if (!dl) return -ENOMEM; dl->dev_num = dev_num; dr = dl->dev_req; read_lock(&hci_dev_list_lock); list_for_each_entry(hdev, &hci_dev_list, list) { unsigned long flags = hdev->flags; /* When the auto-off is configured it means the transport * is running, but in that case still indicate that the * device is actually down. */ if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) flags &= ~BIT(HCI_UP); dr[n].dev_id = hdev->id; dr[n].dev_opt = flags; if (++n >= dev_num) break; } read_unlock(&hci_dev_list_lock); dl->dev_num = n; err = copy_to_user(arg, dl, struct_size(dl, dev_req, n)); kfree(dl); return err ? -EFAULT : 0; } int hci_get_dev_info(void __user *arg) { struct hci_dev *hdev; struct hci_dev_info di; unsigned long flags; int err = 0; if (copy_from_user(&di, arg, sizeof(di))) return -EFAULT; hdev = hci_dev_get(di.dev_id); if (!hdev) return -ENODEV; /* When the auto-off is configured it means the transport * is running, but in that case still indicate that the * device is actually down. */ if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) flags = hdev->flags & ~BIT(HCI_UP); else flags = hdev->flags; strscpy(di.name, hdev->name, sizeof(di.name)); di.bdaddr = hdev->bdaddr; di.type = (hdev->bus & 0x0f); di.flags = flags; di.pkt_type = hdev->pkt_type; if (lmp_bredr_capable(hdev)) { di.acl_mtu = hdev->acl_mtu; di.acl_pkts = hdev->acl_pkts; di.sco_mtu = hdev->sco_mtu; di.sco_pkts = hdev->sco_pkts; } else { di.acl_mtu = hdev->le_mtu; di.acl_pkts = hdev->le_pkts; di.sco_mtu = 0; di.sco_pkts = 0; } di.link_policy = hdev->link_policy; di.link_mode = hdev->link_mode; memcpy(&di.stat, &hdev->stat, sizeof(di.stat)); memcpy(&di.features, &hdev->features, sizeof(di.features)); if (copy_to_user(arg, &di, sizeof(di))) err = -EFAULT; hci_dev_put(hdev); return err; } /* ---- Interface to HCI drivers ---- */ static int hci_dev_do_poweroff(struct hci_dev *hdev) { int err; BT_DBG("%s %p", hdev->name, hdev); hci_req_sync_lock(hdev); err = hci_set_powered_sync(hdev, false); hci_req_sync_unlock(hdev); return err; } static int hci_rfkill_set_block(void *data, bool blocked) { struct hci_dev *hdev = data; int err; BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return -EBUSY; if (blocked == hci_dev_test_flag(hdev, HCI_RFKILLED)) return 0; if (blocked) { hci_dev_set_flag(hdev, HCI_RFKILLED); if (!hci_dev_test_flag(hdev, HCI_SETUP) && !hci_dev_test_flag(hdev, HCI_CONFIG)) { err = hci_dev_do_poweroff(hdev); if (err) { bt_dev_err(hdev, "Error when powering off device on rfkill (%d)", err); /* Make sure the device is still closed even if * anything during power off sequence (eg. * disconnecting devices) failed. */ hci_dev_do_close(hdev); } } } else { hci_dev_clear_flag(hdev, HCI_RFKILLED); } return 0; } static const struct rfkill_ops hci_rfkill_ops = { .set_block = hci_rfkill_set_block, }; static void hci_power_on(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, power_on); int err; BT_DBG("%s", hdev->name); if (test_bit(HCI_UP, &hdev->flags) && hci_dev_test_flag(hdev, HCI_MGMT) && hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF)) { cancel_delayed_work(&hdev->power_off); err = hci_powered_update_sync(hdev); mgmt_power_on(hdev, err); return; } err = hci_dev_do_open(hdev); if (err < 0) { hci_dev_lock(hdev); mgmt_set_powered_failed(hdev, err); hci_dev_unlock(hdev); return; } /* During the HCI setup phase, a few error conditions are * ignored and they need to be checked now. If they are still * valid, it is important to turn the device back off. */ if (hci_dev_test_flag(hdev, HCI_RFKILLED) || hci_dev_test_flag(hdev, HCI_UNCONFIGURED) || (!bacmp(&hdev->bdaddr, BDADDR_ANY) && !bacmp(&hdev->static_addr, BDADDR_ANY))) { hci_dev_clear_flag(hdev, HCI_AUTO_OFF); hci_dev_do_close(hdev); } else if (hci_dev_test_flag(hdev, HCI_AUTO_OFF)) { queue_delayed_work(hdev->req_workqueue, &hdev->power_off, HCI_AUTO_OFF_TIMEOUT); } if (hci_dev_test_and_clear_flag(hdev, HCI_SETUP)) { /* For unconfigured devices, set the HCI_RAW flag * so that userspace can easily identify them. */ if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) set_bit(HCI_RAW, &hdev->flags); /* For fully configured devices, this will send * the Index Added event. For unconfigured devices, * it will send Unconfigued Index Added event. * * Devices with HCI_QUIRK_RAW_DEVICE are ignored * and no event will be send. */ mgmt_index_added(hdev); } else if (hci_dev_test_and_clear_flag(hdev, HCI_CONFIG)) { /* When the controller is now configured, then it * is important to clear the HCI_RAW flag. */ if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) clear_bit(HCI_RAW, &hdev->flags); /* Powering on the controller with HCI_CONFIG set only * happens with the transition from unconfigured to * configured. This will send the Index Added event. */ mgmt_index_added(hdev); } } static void hci_power_off(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, power_off.work); BT_DBG("%s", hdev->name); hci_dev_do_close(hdev); } static void hci_error_reset(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset); hci_dev_hold(hdev); BT_DBG("%s", hdev->name); if (hdev->hw_error) hdev->hw_error(hdev, hdev->hw_error_code); else bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); if (!hci_dev_do_close(hdev)) hci_dev_do_open(hdev); hci_dev_put(hdev); } void hci_uuids_clear(struct hci_dev *hdev) { struct bt_uuid *uuid, *tmp; list_for_each_entry_safe(uuid, tmp, &hdev->uuids, list) { list_del(&uuid->list); kfree(uuid); } } void hci_link_keys_clear(struct hci_dev *hdev) { struct link_key *key, *tmp; list_for_each_entry_safe(key, tmp, &hdev->link_keys, list) { list_del_rcu(&key->list); kfree_rcu(key, rcu); } } void hci_smp_ltks_clear(struct hci_dev *hdev) { struct smp_ltk *k, *tmp; list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) { list_del_rcu(&k->list); kfree_rcu(k, rcu); } } void hci_smp_irks_clear(struct hci_dev *hdev) { struct smp_irk *k, *tmp; list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) { list_del_rcu(&k->list); kfree_rcu(k, rcu); } } void hci_blocked_keys_clear(struct hci_dev *hdev) { struct blocked_key *b, *tmp; list_for_each_entry_safe(b, tmp, &hdev->blocked_keys, list) { list_del_rcu(&b->list); kfree_rcu(b, rcu); } } bool hci_is_blocked_key(struct hci_dev *hdev, u8 type, u8 val[16]) { bool blocked = false; struct blocked_key *b; rcu_read_lock(); list_for_each_entry_rcu(b, &hdev->blocked_keys, list) { if (b->type == type && !memcmp(b->val, val, sizeof(b->val))) { blocked = true; break; } } rcu_read_unlock(); return blocked; } struct link_key *hci_find_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) { struct link_key *k; rcu_read_lock(); list_for_each_entry_rcu(k, &hdev->link_keys, list) { if (bacmp(bdaddr, &k->bdaddr) == 0) { rcu_read_unlock(); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LINKKEY, k->val)) { bt_dev_warn_ratelimited(hdev, "Link key blocked for %pMR", &k->bdaddr); return NULL; } return k; } } rcu_read_unlock(); return NULL; } static bool hci_persistent_key(struct hci_dev *hdev, struct hci_conn *conn, u8 key_type, u8 old_key_type) { /* Legacy key */ if (key_type < 0x03) return true; /* Debug keys are insecure so don't store them persistently */ if (key_type == HCI_LK_DEBUG_COMBINATION) return false; /* Changed combination key and there's no previous one */ if (key_type == HCI_LK_CHANGED_COMBINATION && old_key_type == 0xff) return false; /* Security mode 3 case */ if (!conn) return true; /* BR/EDR key derived using SC from an LE link */ if (conn->type == LE_LINK) return true; /* Neither local nor remote side had no-bonding as requirement */ if (conn->auth_type > 0x01 && conn->remote_auth > 0x01) return true; /* Local side had dedicated bonding as requirement */ if (conn->auth_type == 0x02 || conn->auth_type == 0x03) return true; /* Remote side had dedicated bonding as requirement */ if (conn->remote_auth == 0x02 || conn->remote_auth == 0x03) return true; /* If none of the above criteria match, then don't store the key * persistently */ return false; } static u8 ltk_role(u8 type) { if (type == SMP_LTK) return HCI_ROLE_MASTER; return HCI_ROLE_SLAVE; } struct smp_ltk *hci_find_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 role) { struct smp_ltk *k; rcu_read_lock(); list_for_each_entry_rcu(k, &hdev->long_term_keys, list) { if (addr_type != k->bdaddr_type || bacmp(bdaddr, &k->bdaddr)) continue; if (smp_ltk_is_sc(k) || ltk_role(k->type) == role) { rcu_read_unlock(); if (hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_LTK, k->val)) { bt_dev_warn_ratelimited(hdev, "LTK blocked for %pMR", &k->bdaddr); return NULL; } return k; } } rcu_read_unlock(); return NULL; } struct smp_irk *hci_find_irk_by_rpa(struct hci_dev *hdev, bdaddr_t *rpa) { struct smp_irk *irk_to_return = NULL; struct smp_irk *irk; rcu_read_lock(); list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) { if (!bacmp(&irk->rpa, rpa)) { irk_to_return = irk; goto done; } } list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) { if (smp_irk_matches(hdev, irk->val, rpa)) { bacpy(&irk->rpa, rpa); irk_to_return = irk; goto done; } } done: if (irk_to_return && hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK, irk_to_return->val)) { bt_dev_warn_ratelimited(hdev, "Identity key blocked for %pMR", &irk_to_return->bdaddr); irk_to_return = NULL; } rcu_read_unlock(); return irk_to_return; } struct smp_irk *hci_find_irk_by_addr(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type) { struct smp_irk *irk_to_return = NULL; struct smp_irk *irk; /* Identity Address must be public or static random */ if (addr_type == ADDR_LE_DEV_RANDOM && (bdaddr->b[5] & 0xc0) != 0xc0) return NULL; rcu_read_lock(); list_for_each_entry_rcu(irk, &hdev->identity_resolving_keys, list) { if (addr_type == irk->addr_type && bacmp(bdaddr, &irk->bdaddr) == 0) { irk_to_return = irk; break; } } if (irk_to_return && hci_is_blocked_key(hdev, HCI_BLOCKED_KEY_TYPE_IRK, irk_to_return->val)) { bt_dev_warn_ratelimited(hdev, "Identity key blocked for %pMR", &irk_to_return->bdaddr); irk_to_return = NULL; } rcu_read_unlock(); return irk_to_return; } struct link_key *hci_add_link_key(struct hci_dev *hdev, struct hci_conn *conn, bdaddr_t *bdaddr, u8 *val, u8 type, u8 pin_len, bool *persistent) { struct link_key *key, *old_key; u8 old_key_type; old_key = hci_find_link_key(hdev, bdaddr); if (old_key) { old_key_type = old_key->type; key = old_key; } else { old_key_type = conn ? conn->key_type : 0xff; key = kzalloc(sizeof(*key), GFP_KERNEL); if (!key) return NULL; list_add_rcu(&key->list, &hdev->link_keys); } BT_DBG("%s key for %pMR type %u", hdev->name, bdaddr, type); /* Some buggy controller combinations generate a changed * combination key for legacy pairing even when there's no * previous key */ if (type == HCI_LK_CHANGED_COMBINATION && (!conn || conn->remote_auth == 0xff) && old_key_type == 0xff) { type = HCI_LK_COMBINATION; if (conn) conn->key_type = type; } bacpy(&key->bdaddr, bdaddr); memcpy(key->val, val, HCI_LINK_KEY_SIZE); key->pin_len = pin_len; if (type == HCI_LK_CHANGED_COMBINATION) key->type = old_key_type; else key->type = type; if (persistent) *persistent = hci_persistent_key(hdev, conn, type, old_key_type); return key; } struct smp_ltk *hci_add_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 type, u8 authenticated, u8 tk[16], u8 enc_size, __le16 ediv, __le64 rand) { struct smp_ltk *key, *old_key; u8 role = ltk_role(type); old_key = hci_find_ltk(hdev, bdaddr, addr_type, role); if (old_key) key = old_key; else { key = kzalloc(sizeof(*key), GFP_KERNEL); if (!key) return NULL; list_add_rcu(&key->list, &hdev->long_term_keys); } bacpy(&key->bdaddr, bdaddr); key->bdaddr_type = addr_type; memcpy(key->val, tk, sizeof(key->val)); key->authenticated = authenticated; key->ediv = ediv; key->rand = rand; key->enc_size = enc_size; key->type = type; return key; } struct smp_irk *hci_add_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type, u8 val[16], bdaddr_t *rpa) { struct smp_irk *irk; irk = hci_find_irk_by_addr(hdev, bdaddr, addr_type); if (!irk) { irk = kzalloc(sizeof(*irk), GFP_KERNEL); if (!irk) return NULL; bacpy(&irk->bdaddr, bdaddr); irk->addr_type = addr_type; list_add_rcu(&irk->list, &hdev->identity_resolving_keys); } memcpy(irk->val, val, 16); bacpy(&irk->rpa, rpa); return irk; } int hci_remove_link_key(struct hci_dev *hdev, bdaddr_t *bdaddr) { struct link_key *key; key = hci_find_link_key(hdev, bdaddr); if (!key) return -ENOENT; BT_DBG("%s removing %pMR", hdev->name, bdaddr); list_del_rcu(&key->list); kfree_rcu(key, rcu); return 0; } int hci_remove_ltk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) { struct smp_ltk *k, *tmp; int removed = 0; list_for_each_entry_safe(k, tmp, &hdev->long_term_keys, list) { if (bacmp(bdaddr, &k->bdaddr) || k->bdaddr_type != bdaddr_type) continue; BT_DBG("%s removing %pMR", hdev->name, bdaddr); list_del_rcu(&k->list); kfree_rcu(k, rcu); removed++; } return removed ? 0 : -ENOENT; } void hci_remove_irk(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 addr_type) { struct smp_irk *k, *tmp; list_for_each_entry_safe(k, tmp, &hdev->identity_resolving_keys, list) { if (bacmp(bdaddr, &k->bdaddr) || k->addr_type != addr_type) continue; BT_DBG("%s removing %pMR", hdev->name, bdaddr); list_del_rcu(&k->list); kfree_rcu(k, rcu); } } bool hci_bdaddr_is_paired(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 type) { struct smp_ltk *k; struct smp_irk *irk; u8 addr_type; if (type == BDADDR_BREDR) { if (hci_find_link_key(hdev, bdaddr)) return true; return false; } /* Convert to HCI addr type which struct smp_ltk uses */ if (type == BDADDR_LE_PUBLIC) addr_type = ADDR_LE_DEV_PUBLIC; else addr_type = ADDR_LE_DEV_RANDOM; irk = hci_get_irk(hdev, bdaddr, addr_type); if (irk) { bdaddr = &irk->bdaddr; addr_type = irk->addr_type; } rcu_read_lock(); list_for_each_entry_rcu(k, &hdev->long_term_keys, list) { if (k->bdaddr_type == addr_type && !bacmp(bdaddr, &k->bdaddr)) { rcu_read_unlock(); return true; } } rcu_read_unlock(); return false; } /* HCI command timer function */ static void hci_cmd_timeout(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_timer.work); if (hdev->req_skb) { u16 opcode = hci_skb_opcode(hdev->req_skb); bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); hci_cmd_sync_cancel_sync(hdev, ETIMEDOUT); } else { bt_dev_err(hdev, "command tx timeout"); } if (hdev->reset) hdev->reset(hdev); atomic_set(&hdev->cmd_cnt, 1); queue_work(hdev->workqueue, &hdev->cmd_work); } /* HCI ncmd timer function */ static void hci_ncmd_timeout(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, ncmd_timer.work); bt_dev_err(hdev, "Controller not accepting commands anymore: ncmd = 0"); /* During HCI_INIT phase no events can be injected if the ncmd timer * triggers since the procedure has its own timeout handling. */ if (test_bit(HCI_INIT, &hdev->flags)) return; /* This is an irrecoverable state, inject hardware error event */ hci_reset_dev(hdev); } struct oob_data *hci_find_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) { struct oob_data *data; list_for_each_entry(data, &hdev->remote_oob_data, list) { if (bacmp(bdaddr, &data->bdaddr) != 0) continue; if (data->bdaddr_type != bdaddr_type) continue; return data; } return NULL; } int hci_remove_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type) { struct oob_data *data; data = hci_find_remote_oob_data(hdev, bdaddr, bdaddr_type); if (!data) return -ENOENT; BT_DBG("%s removing %pMR (%u)", hdev->name, bdaddr, bdaddr_type); list_del(&data->list); kfree(data); return 0; } void hci_remote_oob_data_clear(struct hci_dev *hdev) { struct oob_data *data, *n; list_for_each_entry_safe(data, n, &hdev->remote_oob_data, list) { list_del(&data->list); kfree(data); } } int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 bdaddr_type, u8 *hash192, u8 *rand192, u8 *hash256, u8 *rand256) { struct oob_data *data; data = hci_find_remote_oob_data(hdev, bdaddr, bdaddr_type); if (!data) { data = kmalloc(sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; bacpy(&data->bdaddr, bdaddr); data->bdaddr_type = bdaddr_type; list_add(&data->list, &hdev->remote_oob_data); } if (hash192 && rand192) { memcpy(data->hash192, hash192, sizeof(data->hash192)); memcpy(data->rand192, rand192, sizeof(data->rand192)); if (hash256 && rand256) data->present = 0x03; } else { memset(data->hash192, 0, sizeof(data->hash192)); memset(data->rand192, 0, sizeof(data->rand192)); if (hash256 && rand256) data->present = 0x02; else data->present = 0x00; } if (hash256 && rand256) { memcpy(data->hash256, hash256, sizeof(data->hash256)); memcpy(data->rand256, rand256, sizeof(data->rand256)); } else { memset(data->hash256, 0, sizeof(data->hash256)); memset(data->rand256, 0, sizeof(data->rand256)); if (hash192 && rand192) data->present = 0x01; } BT_DBG("%s for %pMR", hdev->name, bdaddr); return 0; } /* This function requires the caller holds hdev->lock */ struct adv_info *hci_find_adv_instance(struct hci_dev *hdev, u8 instance) { struct adv_info *adv_instance; list_for_each_entry(adv_instance, &hdev->adv_instances, list) { if (adv_instance->instance == instance) return adv_instance; } return NULL; } /* This function requires the caller holds hdev->lock */ struct adv_info *hci_find_adv_sid(struct hci_dev *hdev, u8 sid) { struct adv_info *adv; list_for_each_entry(adv, &hdev->adv_instances, list) { if (adv->sid == sid) return adv; } return NULL; } /* This function requires the caller holds hdev->lock */ struct adv_info *hci_get_next_instance(struct hci_dev *hdev, u8 instance) { struct adv_info *cur_instance; cur_instance = hci_find_adv_instance(hdev, instance); if (!cur_instance) return NULL; if (cur_instance == list_last_entry(&hdev->adv_instances, struct adv_info, list)) return list_first_entry(&hdev->adv_instances, struct adv_info, list); else return list_next_entry(cur_instance, list); } /* This function requires the caller holds hdev->lock */ int hci_remove_adv_instance(struct hci_dev *hdev, u8 instance) { struct adv_info *adv_instance; adv_instance = hci_find_adv_instance(hdev, instance); if (!adv_instance) return -ENOENT; BT_DBG("%s removing %dMR", hdev->name, instance); if (hdev->cur_adv_instance == instance) { if (hdev->adv_instance_timeout) { cancel_delayed_work(&hdev->adv_instance_expire); hdev->adv_instance_timeout = 0; } hdev->cur_adv_instance = 0x00; } cancel_delayed_work_sync(&adv_instance->rpa_expired_cb); list_del(&adv_instance->list); kfree(adv_instance); hdev->adv_instance_cnt--; return 0; } void hci_adv_instances_set_rpa_expired(struct hci_dev *hdev, bool rpa_expired) { struct adv_info *adv_instance, *n; list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) adv_instance->rpa_expired = rpa_expired; } /* This function requires the caller holds hdev->lock */ void hci_adv_instances_clear(struct hci_dev *hdev) { struct adv_info *adv_instance, *n; if (hdev->adv_instance_timeout) { disable_delayed_work(&hdev->adv_instance_expire); hdev->adv_instance_timeout = 0; } list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, list) { disable_delayed_work_sync(&adv_instance->rpa_expired_cb); list_del(&adv_instance->list); kfree(adv_instance); } hdev->adv_instance_cnt = 0; hdev->cur_adv_instance = 0x00; } static void adv_instance_rpa_expired(struct work_struct *work) { struct adv_info *adv_instance = container_of(work, struct adv_info, rpa_expired_cb.work); BT_DBG(""); adv_instance->rpa_expired = true; } /* This function requires the caller holds hdev->lock */ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data, u16 timeout, u16 duration, s8 tx_power, u32 min_interval, u32 max_interval, u8 mesh_handle) { struct adv_info *adv; adv = hci_find_adv_instance(hdev, instance); if (adv) { memset(adv->adv_data, 0, sizeof(adv->adv_data)); memset(adv->scan_rsp_data, 0, sizeof(adv->scan_rsp_data)); memset(adv->per_adv_data, 0, sizeof(adv->per_adv_data)); } else { if (hdev->adv_instance_cnt >= hdev->le_num_of_adv_sets || instance < 1 || instance > hdev->le_num_of_adv_sets + 1) return ERR_PTR(-EOVERFLOW); adv = kzalloc(sizeof(*adv), GFP_KERNEL); if (!adv) return ERR_PTR(-ENOMEM); adv->pending = true; adv->instance = instance; /* If controller support only one set and the instance is set to * 1 then there is no option other than using handle 0x00. */ if (hdev->le_num_of_adv_sets == 1 && instance == 1) adv->handle = 0x00; else adv->handle = instance; list_add(&adv->list, &hdev->adv_instances); hdev->adv_instance_cnt++; } adv->flags = flags; adv->min_interval = min_interval; adv->max_interval = max_interval; adv->tx_power = tx_power; /* Defining a mesh_handle changes the timing units to ms, * rather than seconds, and ties the instance to the requested * mesh_tx queue. */ adv->mesh = mesh_handle; hci_set_adv_instance_data(hdev, instance, adv_data_len, adv_data, scan_rsp_len, scan_rsp_data); adv->timeout = timeout; adv->remaining_time = timeout; if (duration == 0) adv->duration = hdev->def_multi_adv_rotation_duration; else adv->duration = duration; INIT_DELAYED_WORK(&adv->rpa_expired_cb, adv_instance_rpa_expired); BT_DBG("%s for %dMR", hdev->name, instance); return adv; } /* This function requires the caller holds hdev->lock */ struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, u8 sid, u32 flags, u8 data_len, u8 *data, u32 min_interval, u32 max_interval) { struct adv_info *adv; adv = hci_add_adv_instance(hdev, instance, flags, 0, NULL, 0, NULL, 0, 0, HCI_ADV_TX_POWER_NO_PREFERENCE, min_interval, max_interval, 0); if (IS_ERR(adv)) return adv; adv->sid = sid; adv->periodic = true; adv->per_adv_data_len = data_len; if (data) memcpy(adv->per_adv_data, data, data_len); return adv; } /* This function requires the caller holds hdev->lock */ int hci_set_adv_instance_data(struct hci_dev *hdev, u8 instance, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data) { struct adv_info *adv; adv = hci_find_adv_instance(hdev, instance); /* If advertisement doesn't exist, we can't modify its data */ if (!adv) return -ENOENT; if (adv_data_len && ADV_DATA_CMP(adv, adv_data, adv_data_len)) { memset(adv->adv_data, 0, sizeof(adv->adv_data)); memcpy(adv->adv_data, adv_data, adv_data_len); adv->adv_data_len = adv_data_len; adv->adv_data_changed = true; } if (scan_rsp_len && SCAN_RSP_CMP(adv, scan_rsp_data, scan_rsp_len)) { memset(adv->scan_rsp_data, 0, sizeof(adv->scan_rsp_data)); memcpy(adv->scan_rsp_data, scan_rsp_data, scan_rsp_len); adv->scan_rsp_len = scan_rsp_len; adv->scan_rsp_changed = true; } /* Mark as changed if there are flags which would affect it */ if (((adv->flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) || adv->flags & MGMT_ADV_FLAG_LOCAL_NAME) adv->scan_rsp_changed = true; return 0; } /* This function requires the caller holds hdev->lock */ u32 hci_adv_instance_flags(struct hci_dev *hdev, u8 instance) { u32 flags; struct adv_info *adv; if (instance == 0x00) { /* Instance 0 always manages the "Tx Power" and "Flags" * fields */ flags = MGMT_ADV_FLAG_TX_POWER | MGMT_ADV_FLAG_MANAGED_FLAGS; /* For instance 0, the HCI_ADVERTISING_CONNECTABLE setting * corresponds to the "connectable" instance flag. */ if (hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) flags |= MGMT_ADV_FLAG_CONNECTABLE; if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; else if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) flags |= MGMT_ADV_FLAG_DISCOV; return flags; } adv = hci_find_adv_instance(hdev, instance); /* Return 0 when we got an invalid instance identifier. */ if (!adv) return 0; return adv->flags; } bool hci_adv_instance_is_scannable(struct hci_dev *hdev, u8 instance) { struct adv_info *adv; /* Instance 0x00 always set local name */ if (instance == 0x00) return true; adv = hci_find_adv_instance(hdev, instance); if (!adv) return false; if (adv->flags & MGMT_ADV_FLAG_APPEARANCE || adv->flags & MGMT_ADV_FLAG_LOCAL_NAME) return true; return adv->scan_rsp_len ? true : false; } /* This function requires the caller holds hdev->lock */ void hci_adv_monitors_clear(struct hci_dev *hdev) { struct adv_monitor *monitor; int handle; idr_for_each_entry(&hdev->adv_monitors_idr, monitor, handle) hci_free_adv_monitor(hdev, monitor); idr_destroy(&hdev->adv_monitors_idr); } /* Frees the monitor structure and do some bookkeepings. * This function requires the caller holds hdev->lock. */ void hci_free_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) { struct adv_pattern *pattern; struct adv_pattern *tmp; if (!monitor) return; list_for_each_entry_safe(pattern, tmp, &monitor->patterns, list) { list_del(&pattern->list); kfree(pattern); } if (monitor->handle) idr_remove(&hdev->adv_monitors_idr, monitor->handle); if (monitor->state != ADV_MONITOR_STATE_NOT_REGISTERED) hdev->adv_monitors_cnt--; kfree(monitor); } /* Assigns handle to a monitor, and if offloading is supported and power is on, * also attempts to forward the request to the controller. * This function requires the caller holds hci_req_sync_lock. */ int hci_add_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) { int min, max, handle; int status = 0; if (!monitor) return -EINVAL; hci_dev_lock(hdev); min = HCI_MIN_ADV_MONITOR_HANDLE; max = HCI_MIN_ADV_MONITOR_HANDLE + HCI_MAX_ADV_MONITOR_NUM_HANDLES; handle = idr_alloc(&hdev->adv_monitors_idr, monitor, min, max, GFP_KERNEL); hci_dev_unlock(hdev); if (handle < 0) return handle; monitor->handle = handle; if (!hdev_is_powered(hdev)) return status; switch (hci_get_adv_monitor_offload_ext(hdev)) { case HCI_ADV_MONITOR_EXT_NONE: bt_dev_dbg(hdev, "add monitor %d status %d", monitor->handle, status); /* Message was not forwarded to controller - not an error */ break; case HCI_ADV_MONITOR_EXT_MSFT: status = msft_add_monitor_pattern(hdev, monitor); bt_dev_dbg(hdev, "add monitor %d msft status %d", handle, status); break; } return status; } /* Attempts to tell the controller and free the monitor. If somehow the * controller doesn't have a corresponding handle, remove anyway. * This function requires the caller holds hci_req_sync_lock. */ static int hci_remove_adv_monitor(struct hci_dev *hdev, struct adv_monitor *monitor) { int status = 0; int handle; switch (hci_get_adv_monitor_offload_ext(hdev)) { case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */ bt_dev_dbg(hdev, "remove monitor %d status %d", monitor->handle, status); goto free_monitor; case HCI_ADV_MONITOR_EXT_MSFT: handle = monitor->handle; status = msft_remove_monitor(hdev, monitor); bt_dev_dbg(hdev, "remove monitor %d msft status %d", handle, status); break; } /* In case no matching handle registered, just free the monitor */ if (status == -ENOENT) goto free_monitor; return status; free_monitor: if (status == -ENOENT) bt_dev_warn(hdev, "Removing monitor with no matching handle %d", monitor->handle); hci_free_adv_monitor(hdev, monitor); return status; } /* This function requires the caller holds hci_req_sync_lock */ int hci_remove_single_adv_monitor(struct hci_dev *hdev, u16 handle) { struct adv_monitor *monitor = idr_find(&hdev->adv_monitors_idr, handle); if (!monitor) return -EINVAL; return hci_remove_adv_monitor(hdev, monitor); } /* This function requires the caller holds hci_req_sync_lock */ int hci_remove_all_adv_monitor(struct hci_dev *hdev) { struct adv_monitor *monitor; int idr_next_id = 0; int status = 0; while (1) { monitor = idr_get_next(&hdev->adv_monitors_idr, &idr_next_id); if (!monitor) break; status = hci_remove_adv_monitor(hdev, monitor); if (status) return status; idr_next_id++; } return status; } /* This function requires the caller holds hdev->lock */ bool hci_is_adv_monitoring(struct hci_dev *hdev) { return !idr_is_empty(&hdev->adv_monitors_idr); } int hci_get_adv_monitor_offload_ext(struct hci_dev *hdev) { if (msft_monitor_supported(hdev)) return HCI_ADV_MONITOR_EXT_MSFT; return HCI_ADV_MONITOR_EXT_NONE; } struct bdaddr_list *hci_bdaddr_list_lookup(struct list_head *bdaddr_list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list *b; list_for_each_entry(b, bdaddr_list, list) { if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type) return b; } return NULL; } struct bdaddr_list_with_irk *hci_bdaddr_list_lookup_with_irk( struct list_head *bdaddr_list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list_with_irk *b; list_for_each_entry(b, bdaddr_list, list) { if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type) return b; } return NULL; } struct bdaddr_list_with_flags * hci_bdaddr_list_lookup_with_flags(struct list_head *bdaddr_list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list_with_flags *b; list_for_each_entry(b, bdaddr_list, list) { if (!bacmp(&b->bdaddr, bdaddr) && b->bdaddr_type == type) return b; } return NULL; } void hci_bdaddr_list_clear(struct list_head *bdaddr_list) { struct bdaddr_list *b, *n; list_for_each_entry_safe(b, n, bdaddr_list, list) { list_del(&b->list); kfree(b); } } int hci_bdaddr_list_add(struct list_head *list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list *entry; if (!bacmp(bdaddr, BDADDR_ANY)) return -EBADF; if (hci_bdaddr_list_lookup(list, bdaddr, type)) return -EEXIST; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; bacpy(&entry->bdaddr, bdaddr); entry->bdaddr_type = type; list_add(&entry->list, list); return 0; } int hci_bdaddr_list_add_with_irk(struct list_head *list, bdaddr_t *bdaddr, u8 type, u8 *peer_irk, u8 *local_irk) { struct bdaddr_list_with_irk *entry; if (!bacmp(bdaddr, BDADDR_ANY)) return -EBADF; if (hci_bdaddr_list_lookup(list, bdaddr, type)) return -EEXIST; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; bacpy(&entry->bdaddr, bdaddr); entry->bdaddr_type = type; if (peer_irk) memcpy(entry->peer_irk, peer_irk, 16); if (local_irk) memcpy(entry->local_irk, local_irk, 16); list_add(&entry->list, list); return 0; } int hci_bdaddr_list_add_with_flags(struct list_head *list, bdaddr_t *bdaddr, u8 type, u32 flags) { struct bdaddr_list_with_flags *entry; if (!bacmp(bdaddr, BDADDR_ANY)) return -EBADF; if (hci_bdaddr_list_lookup(list, bdaddr, type)) return -EEXIST; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return -ENOMEM; bacpy(&entry->bdaddr, bdaddr); entry->bdaddr_type = type; entry->flags = flags; list_add(&entry->list, list); return 0; } int hci_bdaddr_list_del(struct list_head *list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list *entry; if (!bacmp(bdaddr, BDADDR_ANY)) { hci_bdaddr_list_clear(list); return 0; } entry = hci_bdaddr_list_lookup(list, bdaddr, type); if (!entry) return -ENOENT; list_del(&entry->list); kfree(entry); return 0; } int hci_bdaddr_list_del_with_irk(struct list_head *list, bdaddr_t *bdaddr, u8 type) { struct bdaddr_list_with_irk *entry; if (!bacmp(bdaddr, BDADDR_ANY)) { hci_bdaddr_list_clear(list); return 0; } entry = hci_bdaddr_list_lookup_with_irk(list, bdaddr, type); if (!entry) return -ENOENT; list_del(&entry->list); kfree(entry); return 0; } /* This function requires the caller holds hdev->lock */ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) { struct hci_conn_params *params; list_for_each_entry(params, &hdev->le_conn_params, list) { if (bacmp(&params->addr, addr) == 0 && params->addr_type == addr_type) { return params; } } return NULL; } /* This function requires the caller holds hdev->lock or rcu_read_lock */ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list, bdaddr_t *addr, u8 addr_type) { struct hci_conn_params *param; rcu_read_lock(); list_for_each_entry_rcu(param, list, action) { if (bacmp(&param->addr, addr) == 0 && param->addr_type == addr_type) { rcu_read_unlock(); return param; } } rcu_read_unlock(); return NULL; } /* This function requires the caller holds hdev->lock */ void hci_pend_le_list_del_init(struct hci_conn_params *param) { if (list_empty(&param->action)) return; list_del_rcu(&param->action); synchronize_rcu(); INIT_LIST_HEAD(&param->action); } /* This function requires the caller holds hdev->lock */ void hci_pend_le_list_add(struct hci_conn_params *param, struct list_head *list) { list_add_rcu(&param->action, list); } /* This function requires the caller holds hdev->lock */ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) { struct hci_conn_params *params; params = hci_conn_params_lookup(hdev, addr, addr_type); if (params) return params; params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) { bt_dev_err(hdev, "out of memory"); return NULL; } bacpy(&params->addr, addr); params->addr_type = addr_type; list_add(&params->list, &hdev->le_conn_params); INIT_LIST_HEAD(&params->action); params->conn_min_interval = hdev->le_conn_min_interval; params->conn_max_interval = hdev->le_conn_max_interval; params->conn_latency = hdev->le_conn_latency; params->supervision_timeout = hdev->le_supv_timeout; params->auto_connect = HCI_AUTO_CONN_DISABLED; BT_DBG("addr %pMR (type %u)", addr, addr_type); return params; } void hci_conn_params_free(struct hci_conn_params *params) { hci_pend_le_list_del_init(params); if (params->conn) { hci_conn_drop(params->conn); hci_conn_put(params->conn); } list_del(&params->list); kfree(params); } /* This function requires the caller holds hdev->lock */ void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type) { struct hci_conn_params *params; params = hci_conn_params_lookup(hdev, addr, addr_type); if (!params) return; hci_conn_params_free(params); hci_update_passive_scan(hdev); BT_DBG("addr %pMR (type %u)", addr, addr_type); } /* This function requires the caller holds hdev->lock */ void hci_conn_params_clear_disabled(struct hci_dev *hdev) { struct hci_conn_params *params, *tmp; list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) { if (params->auto_connect != HCI_AUTO_CONN_DISABLED) continue; /* If trying to establish one time connection to disabled * device, leave the params, but mark them as just once. */ if (params->explicit_connect) { params->auto_connect = HCI_AUTO_CONN_EXPLICIT; continue; } hci_conn_params_free(params); } BT_DBG("All LE disabled connection parameters were removed"); } /* This function requires the caller holds hdev->lock */ static void hci_conn_params_clear_all(struct hci_dev *hdev) { struct hci_conn_params *params, *tmp; list_for_each_entry_safe(params, tmp, &hdev->le_conn_params, list) hci_conn_params_free(params); BT_DBG("All LE connection parameters were removed"); } /* Copy the Identity Address of the controller. * * If the controller has a public BD_ADDR, then by default use that one. * If this is a LE only controller without a public address, default to * the static random address. * * For debugging purposes it is possible to force controllers with a * public address to use the static random address instead. * * In case BR/EDR has been disabled on a dual-mode controller and * userspace has configured a static address, then that address * becomes the identity address instead of the public BR/EDR address. */ void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 *bdaddr_type) { if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || !bacmp(&hdev->bdaddr, BDADDR_ANY) || (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && bacmp(&hdev->static_addr, BDADDR_ANY))) { bacpy(bdaddr, &hdev->static_addr); *bdaddr_type = ADDR_LE_DEV_RANDOM; } else { bacpy(bdaddr, &hdev->bdaddr); *bdaddr_type = ADDR_LE_DEV_PUBLIC; } } static void hci_clear_wake_reason(struct hci_dev *hdev) { hci_dev_lock(hdev); hdev->wake_reason = 0; bacpy(&hdev->wake_addr, BDADDR_ANY); hdev->wake_addr_type = 0; hci_dev_unlock(hdev); } static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct hci_dev *hdev = container_of(nb, struct hci_dev, suspend_notifier); int ret = 0; /* Userspace has full control of this device. Do nothing. */ if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) return NOTIFY_DONE; /* To avoid a potential race with hci_unregister_dev. */ hci_dev_hold(hdev); switch (action) { case PM_HIBERNATION_PREPARE: case PM_SUSPEND_PREPARE: ret = hci_suspend_dev(hdev); break; case PM_POST_HIBERNATION: case PM_POST_SUSPEND: ret = hci_resume_dev(hdev); break; } if (ret) bt_dev_err(hdev, "Suspend notifier action (%lu) failed: %d", action, ret); hci_dev_put(hdev); return NOTIFY_DONE; } /* Alloc HCI device */ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) { struct hci_dev *hdev; unsigned int alloc_size; alloc_size = sizeof(*hdev); if (sizeof_priv) { /* Fixme: May need ALIGN-ment? */ alloc_size += sizeof_priv; } hdev = kzalloc(alloc_size, GFP_KERNEL); if (!hdev) return NULL; if (init_srcu_struct(&hdev->srcu)) { kfree(hdev); return NULL; } hdev->pkt_type = (HCI_DM1 | HCI_DH1 | HCI_HV1); hdev->esco_type = (ESCO_HV1); hdev->link_mode = (HCI_LM_ACCEPT); hdev->num_iac = 0x01; /* One IAC support is mandatory */ hdev->io_capability = 0x03; /* No Input No Output */ hdev->manufacturer = 0xffff; /* Default to internal use */ hdev->inq_tx_power = HCI_TX_POWER_INVALID; hdev->adv_tx_power = HCI_TX_POWER_INVALID; hdev->adv_instance_cnt = 0; hdev->cur_adv_instance = 0x00; hdev->adv_instance_timeout = 0; hdev->advmon_allowlist_duration = 300; hdev->advmon_no_filter_duration = 500; hdev->enable_advmon_interleave_scan = 0x00; /* Default to disable */ hdev->sniff_max_interval = 800; hdev->sniff_min_interval = 80; hdev->le_adv_channel_map = 0x07; hdev->le_adv_min_interval = 0x0800; hdev->le_adv_max_interval = 0x0800; hdev->le_scan_interval = DISCOV_LE_SCAN_INT_FAST; hdev->le_scan_window = DISCOV_LE_SCAN_WIN_FAST; hdev->le_scan_int_suspend = DISCOV_LE_SCAN_INT_SLOW1; hdev->le_scan_window_suspend = DISCOV_LE_SCAN_WIN_SLOW1; hdev->le_scan_int_discovery = DISCOV_LE_SCAN_INT; hdev->le_scan_window_discovery = DISCOV_LE_SCAN_WIN; hdev->le_scan_int_adv_monitor = DISCOV_LE_SCAN_INT_FAST; hdev->le_scan_window_adv_monitor = DISCOV_LE_SCAN_WIN_FAST; hdev->le_scan_int_connect = DISCOV_LE_SCAN_INT_CONN; hdev->le_scan_window_connect = DISCOV_LE_SCAN_WIN_CONN; hdev->le_conn_min_interval = 0x0018; hdev->le_conn_max_interval = 0x0028; hdev->le_conn_latency = 0x0000; hdev->le_supv_timeout = 0x002a; hdev->le_def_tx_len = 0x001b; hdev->le_def_tx_time = 0x0148; hdev->le_max_tx_len = 0x001b; hdev->le_max_tx_time = 0x0148; hdev->le_max_rx_len = 0x001b; hdev->le_max_rx_time = 0x0148; hdev->le_max_key_size = SMP_MAX_ENC_KEY_SIZE; hdev->le_min_key_size = SMP_MIN_ENC_KEY_SIZE; hdev->le_tx_def_phys = HCI_LE_SET_PHY_1M; hdev->le_rx_def_phys = HCI_LE_SET_PHY_1M; hdev->le_num_of_adv_sets = HCI_MAX_ADV_INSTANCES; hdev->def_multi_adv_rotation_duration = HCI_DEFAULT_ADV_DURATION; hdev->def_le_autoconnect_timeout = HCI_LE_CONN_TIMEOUT; hdev->min_le_tx_power = HCI_TX_POWER_INVALID; hdev->max_le_tx_power = HCI_TX_POWER_INVALID; hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT; hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT; hdev->conn_info_min_age = DEFAULT_CONN_INFO_MIN_AGE; hdev->conn_info_max_age = DEFAULT_CONN_INFO_MAX_AGE; hdev->auth_payload_timeout = DEFAULT_AUTH_PAYLOAD_TIMEOUT; hdev->min_enc_key_size = HCI_MIN_ENC_KEY_SIZE; /* default 1.28 sec page scan */ hdev->def_page_scan_type = PAGE_SCAN_TYPE_STANDARD; hdev->def_page_scan_int = 0x0800; hdev->def_page_scan_window = 0x0012; mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); mutex_init(&hdev->mgmt_pending_lock); ida_init(&hdev->unset_handle_ida); INIT_LIST_HEAD(&hdev->mesh_pending); INIT_LIST_HEAD(&hdev->mgmt_pending); INIT_LIST_HEAD(&hdev->reject_list); INIT_LIST_HEAD(&hdev->accept_list); INIT_LIST_HEAD(&hdev->uuids); INIT_LIST_HEAD(&hdev->link_keys); INIT_LIST_HEAD(&hdev->long_term_keys); INIT_LIST_HEAD(&hdev->identity_resolving_keys); INIT_LIST_HEAD(&hdev->remote_oob_data); INIT_LIST_HEAD(&hdev->le_accept_list); INIT_LIST_HEAD(&hdev->le_resolv_list); INIT_LIST_HEAD(&hdev->le_conn_params); INIT_LIST_HEAD(&hdev->pend_le_conns); INIT_LIST_HEAD(&hdev->pend_le_reports); INIT_LIST_HEAD(&hdev->conn_hash.list); INIT_LIST_HEAD(&hdev->adv_instances); INIT_LIST_HEAD(&hdev->blocked_keys); INIT_LIST_HEAD(&hdev->monitored_devices); INIT_LIST_HEAD(&hdev->local_codecs); INIT_WORK(&hdev->rx_work, hci_rx_work); INIT_WORK(&hdev->cmd_work, hci_cmd_work); INIT_WORK(&hdev->tx_work, hci_tx_work); INIT_WORK(&hdev->power_on, hci_power_on); INIT_WORK(&hdev->error_reset, hci_error_reset); hci_cmd_sync_init(hdev); INIT_DELAYED_WORK(&hdev->power_off, hci_power_off); skb_queue_head_init(&hdev->rx_q); skb_queue_head_init(&hdev->cmd_q); skb_queue_head_init(&hdev->raw_q); init_waitqueue_head(&hdev->req_wait_q); INIT_DELAYED_WORK(&hdev->cmd_timer, hci_cmd_timeout); INIT_DELAYED_WORK(&hdev->ncmd_timer, hci_ncmd_timeout); hci_devcd_setup(hdev); hci_init_sysfs(hdev); discovery_init(hdev); return hdev; } EXPORT_SYMBOL(hci_alloc_dev_priv); /* Free HCI device */ void hci_free_dev(struct hci_dev *hdev) { /* will free via device release */ put_device(&hdev->dev); } EXPORT_SYMBOL(hci_free_dev); /* Register HCI device */ int hci_register_dev(struct hci_dev *hdev) { int id, error; if (!hdev->open || !hdev->close || !hdev->send) return -EINVAL; id = ida_alloc_max(&hci_index_ida, HCI_MAX_ID - 1, GFP_KERNEL); if (id < 0) return id; error = dev_set_name(&hdev->dev, "hci%u", id); if (error) return error; hdev->name = dev_name(&hdev->dev); hdev->id = id; BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); hdev->workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI, hdev->name); if (!hdev->workqueue) { error = -ENOMEM; goto err; } hdev->req_workqueue = alloc_ordered_workqueue("%s", WQ_HIGHPRI, hdev->name); if (!hdev->req_workqueue) { destroy_workqueue(hdev->workqueue); error = -ENOMEM; goto err; } if (!IS_ERR_OR_NULL(bt_debugfs)) hdev->debugfs = debugfs_create_dir(hdev->name, bt_debugfs); error = device_add(&hdev->dev); if (error < 0) goto err_wqueue; hci_leds_init(hdev); hdev->rfkill = rfkill_alloc(hdev->name, &hdev->dev, RFKILL_TYPE_BLUETOOTH, &hci_rfkill_ops, hdev); if (hdev->rfkill) { if (rfkill_register(hdev->rfkill) < 0) { rfkill_destroy(hdev->rfkill); hdev->rfkill = NULL; } } if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) hci_dev_set_flag(hdev, HCI_RFKILLED); hci_dev_set_flag(hdev, HCI_SETUP); hci_dev_set_flag(hdev, HCI_AUTO_OFF); /* Assume BR/EDR support until proven otherwise (such as * through reading supported features during init. */ hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); write_lock(&hci_dev_list_lock); list_add(&hdev->list, &hci_dev_list); write_unlock(&hci_dev_list_lock); /* Devices that are marked for raw-only usage are unconfigured * and should not be included in normal operation. */ if (hci_test_quirk(hdev, HCI_QUIRK_RAW_DEVICE)) hci_dev_set_flag(hdev, HCI_UNCONFIGURED); /* Mark Remote Wakeup connection flag as supported if driver has wakeup * callback. */ if (hdev->wakeup) hdev->conn_flags |= HCI_CONN_FLAG_REMOTE_WAKEUP; hci_sock_dev_event(hdev, HCI_DEV_REG); hci_dev_hold(hdev); error = hci_register_suspend_notifier(hdev); if (error) BT_WARN("register suspend notifier failed error:%d\n", error); queue_work(hdev->req_workqueue, &hdev->power_on); idr_init(&hdev->adv_monitors_idr); msft_register(hdev); return id; err_wqueue: debugfs_remove_recursive(hdev->debugfs); destroy_workqueue(hdev->workqueue); destroy_workqueue(hdev->req_workqueue); err: ida_free(&hci_index_ida, hdev->id); return error; } EXPORT_SYMBOL(hci_register_dev); /* Unregister HCI device */ void hci_unregister_dev(struct hci_dev *hdev) { BT_DBG("%p name %s bus %d", hdev, hdev->name, hdev->bus); mutex_lock(&hdev->unregister_lock); hci_dev_set_flag(hdev, HCI_UNREGISTER); mutex_unlock(&hdev->unregister_lock); write_lock(&hci_dev_list_lock); list_del(&hdev->list); write_unlock(&hci_dev_list_lock); synchronize_srcu(&hdev->srcu); cleanup_srcu_struct(&hdev->srcu); disable_work_sync(&hdev->rx_work); disable_work_sync(&hdev->cmd_work); disable_work_sync(&hdev->tx_work); disable_work_sync(&hdev->power_on); disable_work_sync(&hdev->error_reset); hci_cmd_sync_clear(hdev); hci_unregister_suspend_notifier(hdev); hci_dev_do_close(hdev); if (!test_bit(HCI_INIT, &hdev->flags) && !hci_dev_test_flag(hdev, HCI_SETUP) && !hci_dev_test_flag(hdev, HCI_CONFIG)) { hci_dev_lock(hdev); mgmt_index_removed(hdev); hci_dev_unlock(hdev); } /* mgmt_index_removed should take care of emptying the * pending list */ BUG_ON(!list_empty(&hdev->mgmt_pending)); hci_sock_dev_event(hdev, HCI_DEV_UNREG); if (hdev->rfkill) { rfkill_unregister(hdev->rfkill); rfkill_destroy(hdev->rfkill); } device_del(&hdev->dev); /* Actual cleanup is deferred until hci_release_dev(). */ hci_dev_put(hdev); } EXPORT_SYMBOL(hci_unregister_dev); /* Release HCI device */ void hci_release_dev(struct hci_dev *hdev) { debugfs_remove_recursive(hdev->debugfs); kfree_const(hdev->hw_info); kfree_const(hdev->fw_info); destroy_workqueue(hdev->workqueue); destroy_workqueue(hdev->req_workqueue); hci_dev_lock(hdev); hci_bdaddr_list_clear(&hdev->reject_list); hci_bdaddr_list_clear(&hdev->accept_list); hci_uuids_clear(hdev); hci_link_keys_clear(hdev); hci_smp_ltks_clear(hdev); hci_smp_irks_clear(hdev); hci_remote_oob_data_clear(hdev); hci_adv_instances_clear(hdev); hci_adv_monitors_clear(hdev); hci_bdaddr_list_clear(&hdev->le_accept_list); hci_bdaddr_list_clear(&hdev->le_resolv_list); hci_conn_params_clear_all(hdev); hci_discovery_filter_clear(hdev); hci_blocked_keys_clear(hdev); hci_codec_list_clear(&hdev->local_codecs); msft_release(hdev); hci_dev_unlock(hdev); ida_destroy(&hdev->unset_handle_ida); ida_free(&hci_index_ida, hdev->id); kfree_skb(hdev->sent_cmd); kfree_skb(hdev->req_skb); kfree_skb(hdev->recv_event); kfree(hdev); } EXPORT_SYMBOL(hci_release_dev); int hci_register_suspend_notifier(struct hci_dev *hdev) { int ret = 0; if (!hdev->suspend_notifier.notifier_call && !hci_test_quirk(hdev, HCI_QUIRK_NO_SUSPEND_NOTIFIER)) { hdev->suspend_notifier.notifier_call = hci_suspend_notifier; ret = register_pm_notifier(&hdev->suspend_notifier); } return ret; } int hci_unregister_suspend_notifier(struct hci_dev *hdev) { int ret = 0; if (hdev->suspend_notifier.notifier_call) { ret = unregister_pm_notifier(&hdev->suspend_notifier); if (!ret) hdev->suspend_notifier.notifier_call = NULL; } return ret; } /* Cancel ongoing command synchronously: * * - Cancel command timer * - Reset command counter * - Cancel command request */ static void hci_cancel_cmd_sync(struct hci_dev *hdev, int err) { bt_dev_dbg(hdev, "err 0x%2.2x", err); if (hci_dev_test_flag(hdev, HCI_UNREGISTER)) { disable_delayed_work_sync(&hdev->cmd_timer); disable_delayed_work_sync(&hdev->ncmd_timer); } else { cancel_delayed_work_sync(&hdev->cmd_timer); cancel_delayed_work_sync(&hdev->ncmd_timer); } atomic_set(&hdev->cmd_cnt, 1); hci_cmd_sync_cancel_sync(hdev, err); } /* Suspend HCI device */ int hci_suspend_dev(struct hci_dev *hdev) { int ret; bt_dev_dbg(hdev, ""); /* Suspend should only act on when powered. */ if (!hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_UNREGISTER)) return 0; /* If powering down don't attempt to suspend */ if (mgmt_powering_down(hdev)) return 0; /* Cancel potentially blocking sync operation before suspend */ hci_cancel_cmd_sync(hdev, EHOSTDOWN); hci_req_sync_lock(hdev); ret = hci_suspend_sync(hdev); hci_req_sync_unlock(hdev); hci_clear_wake_reason(hdev); mgmt_suspending(hdev, hdev->suspend_state); hci_sock_dev_event(hdev, HCI_DEV_SUSPEND); return ret; } EXPORT_SYMBOL(hci_suspend_dev); /* Resume HCI device */ int hci_resume_dev(struct hci_dev *hdev) { int ret; bt_dev_dbg(hdev, ""); /* Resume should only act on when powered. */ if (!hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_UNREGISTER)) return 0; /* If powering down don't attempt to resume */ if (mgmt_powering_down(hdev)) return 0; hci_req_sync_lock(hdev); ret = hci_resume_sync(hdev); hci_req_sync_unlock(hdev); mgmt_resuming(hdev, hdev->wake_reason, &hdev->wake_addr, hdev->wake_addr_type); hci_sock_dev_event(hdev, HCI_DEV_RESUME); return ret; } EXPORT_SYMBOL(hci_resume_dev); /* Reset HCI device */ int hci_reset_dev(struct hci_dev *hdev) { static const u8 hw_err[] = { HCI_EV_HARDWARE_ERROR, 0x01, 0x00 }; struct sk_buff *skb; skb = bt_skb_alloc(3, GFP_ATOMIC); if (!skb) return -ENOMEM; hci_skb_pkt_type(skb) = HCI_EVENT_PKT; skb_put_data(skb, hw_err, 3); bt_dev_err(hdev, "Injecting HCI hardware error event"); /* Send Hardware Error to upper stack */ return hci_recv_frame(hdev, skb); } EXPORT_SYMBOL(hci_reset_dev); static u8 hci_dev_classify_pkt_type(struct hci_dev *hdev, struct sk_buff *skb) { if (hdev->classify_pkt_type) return hdev->classify_pkt_type(hdev, skb); return hci_skb_pkt_type(skb); } /* Receive frame from HCI drivers */ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb) { u8 dev_pkt_type; if (!hdev || (!test_bit(HCI_UP, &hdev->flags) && !test_bit(HCI_INIT, &hdev->flags))) { kfree_skb(skb); return -ENXIO; } /* Check if the driver agree with packet type classification */ dev_pkt_type = hci_dev_classify_pkt_type(hdev, skb); if (hci_skb_pkt_type(skb) != dev_pkt_type) { hci_skb_pkt_type(skb) = dev_pkt_type; } switch (hci_skb_pkt_type(skb)) { case HCI_EVENT_PKT: break; case HCI_ACLDATA_PKT: /* Detect if ISO packet has been sent as ACL */ if (hci_conn_num(hdev, CIS_LINK) || hci_conn_num(hdev, BIS_LINK) || hci_conn_num(hdev, PA_LINK)) { __u16 handle = __le16_to_cpu(hci_acl_hdr(skb)->handle); __u8 type; type = hci_conn_lookup_type(hdev, hci_handle(handle)); if (type == CIS_LINK || type == BIS_LINK || type == PA_LINK) hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; } break; case HCI_SCODATA_PKT: break; case HCI_ISODATA_PKT: break; case HCI_DRV_PKT: break; default: kfree_skb(skb); return -EINVAL; } /* Incoming skb */ bt_cb(skb)->incoming = 1; /* Time stamp */ __net_timestamp(skb); skb_queue_tail(&hdev->rx_q, skb); queue_work(hdev->workqueue, &hdev->rx_work); return 0; } EXPORT_SYMBOL(hci_recv_frame); /* Receive diagnostic message from HCI drivers */ int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb) { /* Mark as diagnostic packet */ hci_skb_pkt_type(skb) = HCI_DIAG_PKT; /* Time stamp */ __net_timestamp(skb); skb_queue_tail(&hdev->rx_q, skb); queue_work(hdev->workqueue, &hdev->rx_work); return 0; } EXPORT_SYMBOL(hci_recv_diag); void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...) { va_list vargs; va_start(vargs, fmt); kfree_const(hdev->hw_info); hdev->hw_info = kvasprintf_const(GFP_KERNEL, fmt, vargs); va_end(vargs); } EXPORT_SYMBOL(hci_set_hw_info); void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...) { va_list vargs; va_start(vargs, fmt); kfree_const(hdev->fw_info); hdev->fw_info = kvasprintf_const(GFP_KERNEL, fmt, vargs); va_end(vargs); } EXPORT_SYMBOL(hci_set_fw_info); /* ---- Interface to upper protocols ---- */ int hci_register_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); mutex_lock(&hci_cb_list_lock); list_add_tail(&cb->list, &hci_cb_list); mutex_unlock(&hci_cb_list_lock); return 0; } EXPORT_SYMBOL(hci_register_cb); int hci_unregister_cb(struct hci_cb *cb) { BT_DBG("%p name %s", cb, cb->name); mutex_lock(&hci_cb_list_lock); list_del(&cb->list); mutex_unlock(&hci_cb_list_lock); return 0; } EXPORT_SYMBOL(hci_unregister_cb); static int hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { int err; BT_DBG("%s type %d len %d", hdev->name, hci_skb_pkt_type(skb), skb->len); /* Time stamp */ __net_timestamp(skb); /* Send copy to monitor */ hci_send_to_monitor(hdev, skb); if (atomic_read(&hdev->promisc)) { /* Send copy to the sockets */ hci_send_to_sock(hdev, skb); } /* Get rid of skb owner, prior to sending to the driver. */ skb_orphan(skb); if (!test_bit(HCI_RUNNING, &hdev->flags)) { kfree_skb(skb); return -EINVAL; } if (hci_skb_pkt_type(skb) == HCI_DRV_PKT) { /* Intercept HCI Drv packet here and don't go with hdev->send * callback. */ err = hci_drv_process_cmd(hdev, skb); kfree_skb(skb); return err; } err = hdev->send(hdev, skb); if (err < 0) { bt_dev_err(hdev, "sending frame failed (%d)", err); kfree_skb(skb); return err; } return 0; } static int hci_send_conn_frame(struct hci_dev *hdev, struct hci_conn *conn, struct sk_buff *skb) { hci_conn_tx_queue(conn, skb); return hci_send_frame(hdev, skb); } /* Send HCI command */ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, const void *param) { struct sk_buff *skb; BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen); skb = hci_cmd_sync_alloc(hdev, opcode, plen, param, NULL); if (!skb) { bt_dev_err(hdev, "no memory for command"); return -ENOMEM; } /* Stand-alone HCI commands must be flagged as * single-command requests. */ bt_cb(skb)->hci.req_flags |= HCI_REQ_START; skb_queue_tail(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); return 0; } int __hci_cmd_send(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param) { struct sk_buff *skb; if (hci_opcode_ogf(opcode) != 0x3f) { /* A controller receiving a command shall respond with either * a Command Status Event or a Command Complete Event. * Therefore, all standard HCI commands must be sent via the * standard API, using hci_send_cmd or hci_cmd_sync helpers. * Some vendors do not comply with this rule for vendor-specific * commands and do not return any event. We want to support * unresponded commands for such cases only. */ bt_dev_err(hdev, "unresponded command not supported"); return -EINVAL; } skb = hci_cmd_sync_alloc(hdev, opcode, plen, param, NULL); if (!skb) { bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)", opcode); return -ENOMEM; } hci_send_frame(hdev, skb); return 0; } EXPORT_SYMBOL(__hci_cmd_send); /* Get data from the previously sent command */ static void *hci_cmd_data(struct sk_buff *skb, __u16 opcode) { struct hci_command_hdr *hdr; if (!skb || skb->len < HCI_COMMAND_HDR_SIZE) return NULL; hdr = (void *)skb->data; if (hdr->opcode != cpu_to_le16(opcode)) return NULL; return skb->data + HCI_COMMAND_HDR_SIZE; } /* Get data from the previously sent command */ void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode) { void *data; /* Check if opcode matches last sent command */ data = hci_cmd_data(hdev->sent_cmd, opcode); if (!data) /* Check if opcode matches last request */ data = hci_cmd_data(hdev->req_skb, opcode); return data; } /* Get data from last received event */ void *hci_recv_event_data(struct hci_dev *hdev, __u8 event) { struct hci_event_hdr *hdr; int offset; if (!hdev->recv_event) return NULL; hdr = (void *)hdev->recv_event->data; offset = sizeof(*hdr); if (hdr->evt != event) { /* In case of LE metaevent check the subevent match */ if (hdr->evt == HCI_EV_LE_META) { struct hci_ev_le_meta *ev; ev = (void *)hdev->recv_event->data + offset; offset += sizeof(*ev); if (ev->subevent == event) goto found; } return NULL; } found: bt_dev_dbg(hdev, "event 0x%2.2x", event); return hdev->recv_event->data + offset; } /* Send ACL data */ static void hci_add_acl_hdr(struct sk_buff *skb, __u16 handle, __u16 flags) { struct hci_acl_hdr *hdr; int len = skb->len; skb_push(skb, HCI_ACL_HDR_SIZE); skb_reset_transport_header(skb); hdr = (struct hci_acl_hdr *)skb_transport_header(skb); hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags)); hdr->dlen = cpu_to_le16(len); } static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue, struct sk_buff *skb, __u16 flags) { struct hci_conn *conn = chan->conn; struct hci_dev *hdev = conn->hdev; struct sk_buff *list; skb->len = skb_headlen(skb); skb->data_len = 0; hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT; hci_add_acl_hdr(skb, conn->handle, flags); list = skb_shinfo(skb)->frag_list; if (!list) { /* Non fragmented */ BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len); skb_queue_tail(queue, skb); } else { /* Fragmented */ BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); skb_shinfo(skb)->frag_list = NULL; /* Queue all fragments atomically. We need to use spin_lock_bh * here because of 6LoWPAN links, as there this function is * called from softirq and using normal spin lock could cause * deadlocks. */ spin_lock_bh(&queue->lock); __skb_queue_tail(queue, skb); flags &= ~ACL_START; flags |= ACL_CONT; do { skb = list; list = list->next; hci_skb_pkt_type(skb) = HCI_ACLDATA_PKT; hci_add_acl_hdr(skb, conn->handle, flags); BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); __skb_queue_tail(queue, skb); } while (list); spin_unlock_bh(&queue->lock); } bt_dev_dbg(hdev, "chan %p queued %d", chan, skb_queue_len(queue)); } void hci_send_acl(struct hci_chan *chan, struct sk_buff *skb, __u16 flags) { struct hci_dev *hdev = chan->conn->hdev; BT_DBG("%s chan %p flags 0x%4.4x", hdev->name, chan, flags); hci_queue_acl(chan, &chan->data_q, skb, flags); queue_work(hdev->workqueue, &hdev->tx_work); } /* Send SCO data */ void hci_send_sco(struct hci_conn *conn, struct sk_buff *skb) { struct hci_dev *hdev = conn->hdev; struct hci_sco_hdr hdr; BT_DBG("%s len %d", hdev->name, skb->len); hdr.handle = cpu_to_le16(conn->handle); hdr.dlen = skb->len; skb_push(skb, HCI_SCO_HDR_SIZE); skb_reset_transport_header(skb); memcpy(skb_transport_header(skb), &hdr, HCI_SCO_HDR_SIZE); hci_skb_pkt_type(skb) = HCI_SCODATA_PKT; skb_queue_tail(&conn->data_q, skb); bt_dev_dbg(hdev, "hcon %p queued %d", conn, skb_queue_len(&conn->data_q)); queue_work(hdev->workqueue, &hdev->tx_work); } /* Send ISO data */ static void hci_add_iso_hdr(struct sk_buff *skb, __u16 handle, __u8 flags) { struct hci_iso_hdr *hdr; int len = skb->len; skb_push(skb, HCI_ISO_HDR_SIZE); skb_reset_transport_header(skb); hdr = (struct hci_iso_hdr *)skb_transport_header(skb); hdr->handle = cpu_to_le16(hci_handle_pack(handle, flags)); hdr->dlen = cpu_to_le16(len); } static void hci_queue_iso(struct hci_conn *conn, struct sk_buff_head *queue, struct sk_buff *skb) { struct hci_dev *hdev = conn->hdev; struct sk_buff *list; __u16 flags; skb->len = skb_headlen(skb); skb->data_len = 0; hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; list = skb_shinfo(skb)->frag_list; flags = hci_iso_flags_pack(list ? ISO_START : ISO_SINGLE, 0x00); hci_add_iso_hdr(skb, conn->handle, flags); if (!list) { /* Non fragmented */ BT_DBG("%s nonfrag skb %p len %d", hdev->name, skb, skb->len); skb_queue_tail(queue, skb); } else { /* Fragmented */ BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); skb_shinfo(skb)->frag_list = NULL; __skb_queue_tail(queue, skb); do { skb = list; list = list->next; hci_skb_pkt_type(skb) = HCI_ISODATA_PKT; flags = hci_iso_flags_pack(list ? ISO_CONT : ISO_END, 0x00); hci_add_iso_hdr(skb, conn->handle, flags); BT_DBG("%s frag %p len %d", hdev->name, skb, skb->len); __skb_queue_tail(queue, skb); } while (list); } bt_dev_dbg(hdev, "hcon %p queued %d", conn, skb_queue_len(queue)); } void hci_send_iso(struct hci_conn *conn, struct sk_buff *skb) { struct hci_dev *hdev = conn->hdev; BT_DBG("%s len %d", hdev->name, skb->len); hci_queue_iso(conn, &conn->data_q, skb); queue_work(hdev->workqueue, &hdev->tx_work); } /* ---- HCI TX task (outgoing data) ---- */ /* HCI Connection scheduler */ static inline void hci_quote_sent(struct hci_conn *conn, int num, int *quote) { struct hci_dev *hdev; int cnt, q; if (!conn) { *quote = 0; return; } hdev = conn->hdev; switch (conn->type) { case ACL_LINK: cnt = hdev->acl_cnt; break; case SCO_LINK: case ESCO_LINK: cnt = hdev->sco_cnt; break; case LE_LINK: cnt = hdev->le_mtu ? hdev->le_cnt : hdev->acl_cnt; break; case CIS_LINK: case BIS_LINK: case PA_LINK: cnt = hdev->iso_cnt; break; default: cnt = 0; bt_dev_err(hdev, "unknown link type %d", conn->type); } q = cnt / num; *quote = q ? q : 1; } static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, int *quote) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *conn = NULL, *c; unsigned int num = 0, min = ~0; /* We don't have to lock device here. Connections are always * added and removed with TX task disabled. */ rcu_read_lock(); list_for_each_entry_rcu(c, &h->list, list) { if (c->type != type || skb_queue_empty(&c->data_q)) continue; bt_dev_dbg(hdev, "hcon %p state %s queued %d", c, state_to_string(c->state), skb_queue_len(&c->data_q)); if (c->state != BT_CONNECTED && c->state != BT_CONFIG) continue; num++; if (c->sent < min) { min = c->sent; conn = c; } if (hci_conn_num(hdev, type) == num) break; } rcu_read_unlock(); hci_quote_sent(conn, num, quote); BT_DBG("conn %p quote %d", conn, *quote); return conn; } static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; bt_dev_err(hdev, "link tx timeout"); hci_dev_lock(hdev); /* Kill stalled connections */ list_for_each_entry(c, &h->list, list) { if (c->type == type && c->sent) { bt_dev_err(hdev, "killing stalled connection %pMR", &c->dst); hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); } } hci_dev_unlock(hdev); } static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, int *quote) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_chan *chan = NULL; unsigned int num = 0, min = ~0, cur_prio = 0; struct hci_conn *conn; int conn_num = 0; BT_DBG("%s", hdev->name); rcu_read_lock(); list_for_each_entry_rcu(conn, &h->list, list) { struct hci_chan *tmp; if (conn->type != type) continue; if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) continue; conn_num++; list_for_each_entry_rcu(tmp, &conn->chan_list, list) { struct sk_buff *skb; if (skb_queue_empty(&tmp->data_q)) continue; skb = skb_peek(&tmp->data_q); if (skb->priority < cur_prio) continue; if (skb->priority > cur_prio) { num = 0; min = ~0; cur_prio = skb->priority; } num++; if (conn->sent < min) { min = conn->sent; chan = tmp; } } if (hci_conn_num(hdev, type) == conn_num) break; } rcu_read_unlock(); if (!chan) return NULL; hci_quote_sent(chan->conn, num, quote); BT_DBG("chan %p quote %d", chan, *quote); return chan; } static void hci_prio_recalculate(struct hci_dev *hdev, __u8 type) { struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *conn; int num = 0; BT_DBG("%s", hdev->name); rcu_read_lock(); list_for_each_entry_rcu(conn, &h->list, list) { struct hci_chan *chan; if (conn->type != type) continue; if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) continue; num++; list_for_each_entry_rcu(chan, &conn->chan_list, list) { struct sk_buff *skb; if (chan->sent) { chan->sent = 0; continue; } if (skb_queue_empty(&chan->data_q)) continue; skb = skb_peek(&chan->data_q); if (skb->priority >= HCI_PRIO_MAX - 1) continue; skb->priority = HCI_PRIO_MAX - 1; BT_DBG("chan %p skb %p promoted to %d", chan, skb, skb->priority); } if (hci_conn_num(hdev, type) == num) break; } rcu_read_unlock(); } static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type) { unsigned long timeout; if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) return; switch (type) { case ACL_LINK: /* tx timeout must be longer than maximum link supervision * timeout (40.9 seconds) */ timeout = hdev->acl_last_tx + HCI_ACL_TX_TIMEOUT; break; case LE_LINK: /* tx timeout must be longer than maximum link supervision * timeout (40.9 seconds) */ timeout = hdev->le_last_tx + HCI_ACL_TX_TIMEOUT; break; case CIS_LINK: case BIS_LINK: case PA_LINK: /* tx timeout must be longer than the maximum transport latency * (8.388607 seconds) */ timeout = hdev->iso_last_tx + HCI_ISO_TX_TIMEOUT; break; default: return; } if (!cnt && time_after(jiffies, timeout)) hci_link_tx_to(hdev, type); } /* Schedule SCO */ static void hci_sched_sco(struct hci_dev *hdev, __u8 type) { struct hci_conn *conn; struct sk_buff *skb; int quote, *cnt; unsigned int pkts = hdev->sco_pkts; bt_dev_dbg(hdev, "type %u", type); if (!hci_conn_num(hdev, type) || !pkts) return; /* Use sco_pkts if flow control has not been enabled which will limit * the amount of buffer sent in a row. */ if (!hci_dev_test_flag(hdev, HCI_SCO_FLOWCTL)) cnt = &pkts; else cnt = &hdev->sco_cnt; while (*cnt && (conn = hci_low_sent(hdev, type, &quote))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); hci_send_conn_frame(hdev, conn, skb); conn->sent++; if (conn->sent == ~0) conn->sent = 0; (*cnt)--; } } /* Rescheduled if all packets were sent and flow control is not enabled * as there could be more packets queued that could not be sent and * since no HCI_EV_NUM_COMP_PKTS event will be generated the reschedule * needs to be forced. */ if (!pkts && !hci_dev_test_flag(hdev, HCI_SCO_FLOWCTL)) queue_work(hdev->workqueue, &hdev->tx_work); } static void hci_sched_acl_pkt(struct hci_dev *hdev) { unsigned int cnt = hdev->acl_cnt; struct hci_chan *chan; struct sk_buff *skb; int quote; __check_timeout(hdev, cnt, ACL_LINK); while (hdev->acl_cnt && (chan = hci_chan_sent(hdev, ACL_LINK, &quote))) { u32 priority = (skb_peek(&chan->data_q))->priority; while (quote-- && (skb = skb_peek(&chan->data_q))) { BT_DBG("chan %p skb %p len %d priority %u", chan, skb, skb->len, skb->priority); /* Stop if priority has changed */ if (skb->priority < priority) break; skb = skb_dequeue(&chan->data_q); hci_conn_enter_active_mode(chan->conn, bt_cb(skb)->force_active); hci_send_conn_frame(hdev, chan->conn, skb); hdev->acl_last_tx = jiffies; hdev->acl_cnt--; chan->sent++; chan->conn->sent++; /* Send pending SCO packets right away */ hci_sched_sco(hdev, SCO_LINK); hci_sched_sco(hdev, ESCO_LINK); } } if (cnt != hdev->acl_cnt) hci_prio_recalculate(hdev, ACL_LINK); } static void hci_sched_acl(struct hci_dev *hdev) { BT_DBG("%s", hdev->name); /* No ACL link over BR/EDR controller */ if (!hci_conn_num(hdev, ACL_LINK)) return; hci_sched_acl_pkt(hdev); } static void hci_sched_le(struct hci_dev *hdev) { struct hci_chan *chan; struct sk_buff *skb; int quote, *cnt, tmp; BT_DBG("%s", hdev->name); if (!hci_conn_num(hdev, LE_LINK)) return; cnt = hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt; __check_timeout(hdev, *cnt, LE_LINK); tmp = *cnt; while (*cnt && (chan = hci_chan_sent(hdev, LE_LINK, &quote))) { u32 priority = (skb_peek(&chan->data_q))->priority; while (quote-- && (skb = skb_peek(&chan->data_q))) { BT_DBG("chan %p skb %p len %d priority %u", chan, skb, skb->len, skb->priority); /* Stop if priority has changed */ if (skb->priority < priority) break; skb = skb_dequeue(&chan->data_q); hci_send_conn_frame(hdev, chan->conn, skb); hdev->le_last_tx = jiffies; (*cnt)--; chan->sent++; chan->conn->sent++; /* Send pending SCO packets right away */ hci_sched_sco(hdev, SCO_LINK); hci_sched_sco(hdev, ESCO_LINK); } } if (*cnt != tmp) hci_prio_recalculate(hdev, LE_LINK); } /* Schedule iso */ static void hci_sched_iso(struct hci_dev *hdev, __u8 type) { struct hci_conn *conn; struct sk_buff *skb; int quote, *cnt; BT_DBG("%s", hdev->name); if (!hci_conn_num(hdev, type)) return; cnt = &hdev->iso_cnt; __check_timeout(hdev, *cnt, type); while (*cnt && (conn = hci_low_sent(hdev, type, &quote))) { while (quote-- && (skb = skb_dequeue(&conn->data_q))) { BT_DBG("skb %p len %d", skb, skb->len); hci_send_conn_frame(hdev, conn, skb); hdev->iso_last_tx = jiffies; conn->sent++; if (conn->sent == ~0) conn->sent = 0; (*cnt)--; } } } static void hci_tx_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, tx_work); struct sk_buff *skb; BT_DBG("%s acl %d sco %d le %d iso %d", hdev->name, hdev->acl_cnt, hdev->sco_cnt, hdev->le_cnt, hdev->iso_cnt); if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) { /* Schedule queues and send stuff to HCI driver */ hci_sched_sco(hdev, SCO_LINK); hci_sched_sco(hdev, ESCO_LINK); hci_sched_iso(hdev, CIS_LINK); hci_sched_iso(hdev, BIS_LINK); hci_sched_iso(hdev, PA_LINK); hci_sched_acl(hdev); hci_sched_le(hdev); } /* Send next queued raw (unknown type) packet */ while ((skb = skb_dequeue(&hdev->raw_q))) hci_send_frame(hdev, skb); } /* ----- HCI RX task (incoming data processing) ----- */ /* ACL data packet */ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_acl_hdr *hdr; struct hci_conn *conn; __u16 handle, flags; hdr = skb_pull_data(skb, sizeof(*hdr)); if (!hdr) { bt_dev_err(hdev, "ACL packet too small"); goto drop; } handle = __le16_to_cpu(hdr->handle); flags = hci_flags(handle); handle = hci_handle(handle); bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len, handle, flags); hdev->stat.acl_rx++; hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, handle); hci_dev_unlock(hdev); if (conn) { hci_conn_enter_active_mode(conn, BT_POWER_FORCE_ACTIVE_OFF); /* Send to upper protocol */ l2cap_recv_acldata(conn, skb, flags); return; } else { bt_dev_err(hdev, "ACL packet for unknown connection handle %d", handle); } drop: kfree_skb(skb); } /* SCO data packet */ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_sco_hdr *hdr; struct hci_conn *conn; __u16 handle, flags; hdr = skb_pull_data(skb, sizeof(*hdr)); if (!hdr) { bt_dev_err(hdev, "SCO packet too small"); goto drop; } handle = __le16_to_cpu(hdr->handle); flags = hci_flags(handle); handle = hci_handle(handle); bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len, handle, flags); hdev->stat.sco_rx++; hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, handle); hci_dev_unlock(hdev); if (conn) { /* Send to upper protocol */ hci_skb_pkt_status(skb) = flags & 0x03; sco_recv_scodata(conn, skb); return; } else { bt_dev_err_ratelimited(hdev, "SCO packet for unknown connection handle %d", handle); } drop: kfree_skb(skb); } static void hci_isodata_packet(struct hci_dev *hdev, struct sk_buff *skb) { struct hci_iso_hdr *hdr; struct hci_conn *conn; __u16 handle, flags; hdr = skb_pull_data(skb, sizeof(*hdr)); if (!hdr) { bt_dev_err(hdev, "ISO packet too small"); goto drop; } handle = __le16_to_cpu(hdr->handle); flags = hci_flags(handle); handle = hci_handle(handle); bt_dev_dbg(hdev, "len %d handle 0x%4.4x flags 0x%4.4x", skb->len, handle, flags); hci_dev_lock(hdev); conn = hci_conn_hash_lookup_handle(hdev, handle); hci_dev_unlock(hdev); if (!conn) { bt_dev_err(hdev, "ISO packet for unknown connection handle %d", handle); goto drop; } /* Send to upper protocol */ iso_recv(conn, skb, flags); return; drop: kfree_skb(skb); } static bool hci_req_is_complete(struct hci_dev *hdev) { struct sk_buff *skb; skb = skb_peek(&hdev->cmd_q); if (!skb) return true; return (bt_cb(skb)->hci.req_flags & HCI_REQ_START); } static void hci_resend_last(struct hci_dev *hdev) { struct hci_command_hdr *sent; struct sk_buff *skb; u16 opcode; if (!hdev->sent_cmd) return; sent = (void *) hdev->sent_cmd->data; opcode = __le16_to_cpu(sent->opcode); if (opcode == HCI_OP_RESET) return; skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); if (!skb) return; skb_queue_head(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); } void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status, hci_req_complete_t *req_complete, hci_req_complete_skb_t *req_complete_skb) { struct sk_buff *skb; unsigned long flags; BT_DBG("opcode 0x%04x status 0x%02x", opcode, status); /* If the completed command doesn't match the last one that was * sent we need to do special handling of it. */ if (!hci_sent_cmd_data(hdev, opcode)) { /* Some CSR based controllers generate a spontaneous * reset complete event during init and any pending * command will never be completed. In such a case we * need to resend whatever was the last sent * command. */ if (test_bit(HCI_INIT, &hdev->flags) && opcode == HCI_OP_RESET) hci_resend_last(hdev); return; } /* If we reach this point this event matches the last command sent */ hci_dev_clear_flag(hdev, HCI_CMD_PENDING); /* If the command succeeded and there's still more commands in * this request the request is not yet complete. */ if (!status && !hci_req_is_complete(hdev)) return; skb = hdev->req_skb; /* If this was the last command in a request the complete * callback would be found in hdev->req_skb instead of the * command queue (hdev->cmd_q). */ if (skb && bt_cb(skb)->hci.req_flags & HCI_REQ_SKB) { *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; return; } if (skb && bt_cb(skb)->hci.req_complete) { *req_complete = bt_cb(skb)->hci.req_complete; return; } /* Remove all pending commands belonging to this request */ spin_lock_irqsave(&hdev->cmd_q.lock, flags); while ((skb = __skb_dequeue(&hdev->cmd_q))) { if (bt_cb(skb)->hci.req_flags & HCI_REQ_START) { __skb_queue_head(&hdev->cmd_q, skb); break; } if (bt_cb(skb)->hci.req_flags & HCI_REQ_SKB) *req_complete_skb = bt_cb(skb)->hci.req_complete_skb; else *req_complete = bt_cb(skb)->hci.req_complete; dev_kfree_skb_irq(skb); } spin_unlock_irqrestore(&hdev->cmd_q.lock, flags); } static void hci_rx_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, rx_work); struct sk_buff *skb; BT_DBG("%s", hdev->name); /* The kcov_remote functions used for collecting packet parsing * coverage information from this background thread and associate * the coverage with the syscall's thread which originally injected * the packet. This helps fuzzing the kernel. */ for (; (skb = skb_dequeue(&hdev->rx_q)); kcov_remote_stop()) { kcov_remote_start_common(skb_get_kcov_handle(skb)); /* Send copy to monitor */ hci_send_to_monitor(hdev, skb); if (atomic_read(&hdev->promisc)) { /* Send copy to the sockets */ hci_send_to_sock(hdev, skb); } /* If the device has been opened in HCI_USER_CHANNEL, * the userspace has exclusive access to device. * When device is HCI_INIT, we still need to process * the data packets to the driver in order * to complete its setup(). */ if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && !test_bit(HCI_INIT, &hdev->flags)) { kfree_skb(skb); continue; } if (test_bit(HCI_INIT, &hdev->flags)) { /* Don't process data packets in this states. */ switch (hci_skb_pkt_type(skb)) { case HCI_ACLDATA_PKT: case HCI_SCODATA_PKT: case HCI_ISODATA_PKT: kfree_skb(skb); continue; } } /* Process frame */ switch (hci_skb_pkt_type(skb)) { case HCI_EVENT_PKT: BT_DBG("%s Event packet", hdev->name); hci_event_packet(hdev, skb); break; case HCI_ACLDATA_PKT: BT_DBG("%s ACL data packet", hdev->name); hci_acldata_packet(hdev, skb); break; case HCI_SCODATA_PKT: BT_DBG("%s SCO data packet", hdev->name); hci_scodata_packet(hdev, skb); break; case HCI_ISODATA_PKT: BT_DBG("%s ISO data packet", hdev->name); hci_isodata_packet(hdev, skb); break; default: kfree_skb(skb); break; } } } static void hci_send_cmd_sync(struct hci_dev *hdev, struct sk_buff *skb) { int err; bt_dev_dbg(hdev, "skb %p", skb); kfree_skb(hdev->sent_cmd); hdev->sent_cmd = skb_clone(skb, GFP_KERNEL); if (!hdev->sent_cmd) { skb_queue_head(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); return; } if (hci_skb_opcode(skb) != HCI_OP_NOP) { err = hci_send_frame(hdev, skb); if (err < 0) { hci_cmd_sync_cancel_sync(hdev, -err); return; } atomic_dec(&hdev->cmd_cnt); } if (hdev->req_status == HCI_REQ_PEND && !hci_dev_test_and_set_flag(hdev, HCI_CMD_PENDING)) { kfree_skb(hdev->req_skb); hdev->req_skb = skb_clone(hdev->sent_cmd, GFP_KERNEL); } } static void hci_cmd_work(struct work_struct *work) { struct hci_dev *hdev = container_of(work, struct hci_dev, cmd_work); struct sk_buff *skb; BT_DBG("%s cmd_cnt %d cmd queued %d", hdev->name, atomic_read(&hdev->cmd_cnt), skb_queue_len(&hdev->cmd_q)); /* Send queued commands */ if (atomic_read(&hdev->cmd_cnt)) { skb = skb_dequeue(&hdev->cmd_q); if (!skb) return; hci_send_cmd_sync(hdev, skb); rcu_read_lock(); if (test_bit(HCI_RESET, &hdev->flags) || hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) cancel_delayed_work(&hdev->cmd_timer); else queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, HCI_CMD_TIMEOUT); rcu_read_unlock(); } }
12 12 1 2 3 4 5 6 7 8 9 10 11 12 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef _NET_CORE_SOCK_DESTRUCTOR_H #define _NET_CORE_SOCK_DESTRUCTOR_H #include <net/tcp.h> static inline bool is_skb_wmem(const struct sk_buff *skb) { return skb->destructor == sock_wfree || skb->destructor == __sock_wfree || (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree); } #endif
5 143 716 717 717 715 714 712 633 673 622 7 7 7 6 710 29 707 708 708 707 525 707 62 653 708 520 5 706 706 707 520 707 707 7 5 35 35 32 3 35 31 31 15 7 30 29 29 5 5 5 5 5 5 23 11 11 4 4 1 3 3 7 4 1 6 4 6 16 20 2 6 22 12 22 22 10 21 10 22 27 30 6 35 7 89 37 35 118 308 303 309 143 143 1 1 38 38 37 12 7 6 6 1 27 19 9 27 32 38 31 55 13 13 12 56 6 3 26 25 111 112 112 20 17 20 20 18 14 19 12 12 12 494 494 494 494 494 494 494 494 494 494 494 494 494 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 // SPDX-License-Identifier: GPL-2.0-or-later /* * PF_INET6 socket protocol family * Linux INET6 implementation * * Authors: * Pedro Roque <roque@di.fc.ul.pt> * * Adapted from linux/net/ipv4/af_inet.c * * Fixes: * piggy, Karl Knutson : Socket protocol table * Hideaki YOSHIFUJI : sin6_scope_id support * Arnaldo Melo : check proc_net_create return, cleanups */ #define pr_fmt(fmt) "IPv6: " fmt #include <linux/module.h> #include <linux/capability.h> #include <linux/errno.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kernel.h> #include <linux/timer.h> #include <linux/string.h> #include <linux/sockios.h> #include <linux/net.h> #include <linux/fcntl.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/proc_fs.h> #include <linux/stat.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/icmpv6.h> #include <linux/netfilter_ipv6.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/udp.h> #include <net/udplite.h> #include <net/tcp.h> #include <net/ping.h> #include <net/protocol.h> #include <net/inet_common.h> #include <net/route.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> #include <net/ipv6_stubs.h> #include <net/ndisc.h> #ifdef CONFIG_IPV6_TUNNEL #include <net/ip6_tunnel.h> #endif #include <net/calipso.h> #include <net/seg6.h> #include <net/rpl.h> #include <net/compat.h> #include <net/xfrm.h> #include <net/ioam6.h> #include <net/rawv6.h> #include <net/rps.h> #include <linux/uaccess.h> #include <linux/mroute6.h> #include "ip6_offload.h" MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); /* The inetsw6 table contains everything that inet6_create needs to * build a new socket. */ static struct list_head inetsw6[SOCK_MAX]; static DEFINE_SPINLOCK(inetsw6_lock); struct ipv6_params ipv6_defaults = { .disable_ipv6 = 0, .autoconf = 1, }; static int disable_ipv6_mod; module_param_named(disable, disable_ipv6_mod, int, 0444); MODULE_PARM_DESC(disable, "Disable IPv6 module such that it is non-functional"); module_param_named(disable_ipv6, ipv6_defaults.disable_ipv6, int, 0444); MODULE_PARM_DESC(disable_ipv6, "Disable IPv6 on all interfaces"); module_param_named(autoconf, ipv6_defaults.autoconf, int, 0444); MODULE_PARM_DESC(autoconf, "Enable IPv6 address autoconfiguration on all interfaces"); bool ipv6_mod_enabled(void) { return disable_ipv6_mod == 0; } EXPORT_SYMBOL_GPL(ipv6_mod_enabled); static struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) { const int offset = sk->sk_prot->ipv6_pinfo_offset; return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } void inet6_sock_destruct(struct sock *sk) { inet6_cleanup_sock(sk); inet_sock_destruct(sk); } EXPORT_SYMBOL_GPL(inet6_sock_destruct); static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) { struct inet_sock *inet; struct ipv6_pinfo *np; struct sock *sk; struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; int try_loading_module = 0; int err; if (protocol < 0 || protocol >= IPPROTO_MAX) return -EINVAL; /* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); list_for_each_entry_rcu(answer, &inetsw6[sock->type], list) { err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) break; } else { /* Check for the two wild cases. */ if (IPPROTO_IP == protocol) { protocol = answer->protocol; break; } if (IPPROTO_IP == answer->protocol) break; } err = -EPROTONOSUPPORT; } if (err) { if (try_loading_module < 2) { rcu_read_unlock(); /* * Be more specific, e.g. net-pf-10-proto-132-type-1 * (net-pf-PF_INET6-proto-IPPROTO_SCTP-type-SOCK_STREAM) */ if (++try_loading_module == 1) request_module("net-pf-%d-proto-%d-type-%d", PF_INET6, protocol, sock->type); /* * Fall back to generic, e.g. net-pf-10-proto-132 * (net-pf-PF_INET6-proto-IPPROTO_SCTP) */ else request_module("net-pf-%d-proto-%d", PF_INET6, protocol); goto lookup_protocol; } else goto out_rcu_unlock; } err = -EPERM; if (sock->type == SOCK_RAW && !kern && !ns_capable(net->user_ns, CAP_NET_RAW)) goto out_rcu_unlock; sock->ops = answer->ops; answer_prot = answer->prot; answer_flags = answer->flags; rcu_read_unlock(); WARN_ON(!answer_prot->slab); err = -ENOBUFS; sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern); if (!sk) goto out; sock_init_data(sock, sk); err = 0; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; if (INET_PROTOSW_ICSK & answer_flags) inet_init_csk_locks(sk); inet = inet_sk(sk); inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags); if (SOCK_RAW == sock->type) { inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet_set_bit(HDRINCL, sk); } sk->sk_destruct = inet6_sock_destruct; sk->sk_family = PF_INET6; sk->sk_protocol = protocol; sk->sk_backlog_rcv = answer->prot->backlog_rcv; inet_sk(sk)->pinet6 = np = inet6_sk_generic(sk); np->hop_limit = -1; np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; inet6_set_bit(MC6_LOOP, sk); inet6_set_bit(MC6_ALL, sk); np->pmtudisc = IPV6_PMTUDISC_WANT; inet6_assign_bit(REPFLOW, sk, net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED); sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash); /* Init the ipv4 part of the socket since we can have sockets * using v6 API for ipv4. */ inet->uc_ttl = -1; inet_set_bit(MC_LOOP, sk); inet->mc_ttl = 1; inet->mc_index = 0; RCU_INIT_POINTER(inet->mc_list, NULL); inet->rcv_tos = 0; if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically shares. */ inet->inet_sport = htons(inet->inet_num); err = sk->sk_prot->hash(sk); if (err) goto out_sk_release; } if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); if (err) goto out_sk_release; } if (!kern) { err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); if (err) goto out_sk_release; } out: return err; out_rcu_unlock: rcu_read_unlock(); goto out; out_sk_release: sk_common_release(sk); sock->sk = NULL; goto out; } static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, u32 flags) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct net *net = sock_net(sk); __be32 v4addr = 0; unsigned short snum; bool saved_ipv6only; int addr_type = 0; int err = 0; if (addr->sin6_family != AF_INET6) return -EAFNOSUPPORT; addr_type = ipv6_addr_type(&addr->sin6_addr); if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM) return -EINVAL; snum = ntohs(addr->sin6_port); if (!(flags & BIND_NO_CAP_NET_BIND_SERVICE) && snum && inet_port_requires_bind_service(net, snum) && !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; if (flags & BIND_WITH_LOCK) lock_sock(sk); /* Check these errors (active socket, double bind). */ if (sk->sk_state != TCP_CLOSE || inet->inet_num) { err = -EINVAL; goto out; } /* Check if the address belongs to the host. */ if (addr_type == IPV6_ADDR_MAPPED) { struct net_device *dev = NULL; int chk_addr_ret; /* Binding to v4-mapped address on a v6-only socket * makes no sense */ if (ipv6_only_sock(sk)) { err = -EINVAL; goto out; } rcu_read_lock(); if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out_unlock; } } /* Reproduce AF_INET checks to make the bindings consistent */ v4addr = addr->sin6_addr.s6_addr32[3]; chk_addr_ret = inet_addr_type_dev_table(net, dev, v4addr); rcu_read_unlock(); if (!inet_addr_valid_or_nonlocal(net, inet, v4addr, chk_addr_ret)) { err = -EADDRNOTAVAIL; goto out; } } else { if (addr_type != IPV6_ADDR_ANY) { struct net_device *dev = NULL; rcu_read_lock(); if (__ipv6_addr_needs_scope_id(addr_type)) { if (addr_len >= sizeof(struct sockaddr_in6) && addr->sin6_scope_id) { /* Override any existing binding, if another one * is supplied by user. */ sk->sk_bound_dev_if = addr->sin6_scope_id; } /* Binding to link-local address requires an interface */ if (!sk->sk_bound_dev_if) { err = -EINVAL; goto out_unlock; } } if (sk->sk_bound_dev_if) { dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); if (!dev) { err = -ENODEV; goto out_unlock; } } /* ipv4 addr of the socket is invalid. Only the * unspecified and mapped address have a v4 equivalent. */ v4addr = LOOPBACK4_IPV6; if (!(addr_type & IPV6_ADDR_MULTICAST)) { if (!ipv6_can_nonlocal_bind(net, inet) && !ipv6_chk_addr(net, &addr->sin6_addr, dev, 0)) { err = -EADDRNOTAVAIL; goto out_unlock; } } rcu_read_unlock(); } } inet->inet_rcv_saddr = v4addr; inet->inet_saddr = v4addr; sk->sk_v6_rcv_saddr = addr->sin6_addr; if (!(addr_type & IPV6_ADDR_MULTICAST)) np->saddr = addr->sin6_addr; saved_ipv6only = sk->sk_ipv6only; if (addr_type != IPV6_ADDR_ANY && addr_type != IPV6_ADDR_MAPPED) sk->sk_ipv6only = 1; /* Make sure we are allowed to bind here. */ if (snum || !(inet_test_bit(BIND_ADDRESS_NO_PORT, sk) || (flags & BIND_FORCE_ADDRESS_NO_PORT))) { err = sk->sk_prot->get_port(sk, snum); if (err) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); goto out; } if (!(flags & BIND_FROM_BPF)) { err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk); if (err) { sk->sk_ipv6only = saved_ipv6only; inet_reset_saddr(sk); if (sk->sk_prot->put_port) sk->sk_prot->put_port(sk); goto out; } } } if (addr_type != IPV6_ADDR_ANY) sk->sk_userlocks |= SOCK_BINDADDR_LOCK; if (snum) sk->sk_userlocks |= SOCK_BINDPORT_LOCK; inet->inet_sport = htons(inet->inet_num); inet->inet_dport = 0; inet->inet_daddr = 0; out: if (flags & BIND_WITH_LOCK) release_sock(sk); return err; out_unlock: rcu_read_unlock(); goto out; } int inet6_bind_sk(struct sock *sk, struct sockaddr *uaddr, int addr_len) { u32 flags = BIND_WITH_LOCK; const struct proto *prot; int err = 0; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); /* If the socket has its own bind function then use it. */ if (prot->bind) return prot->bind(sk, uaddr, addr_len); if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; /* BPF prog is run before any checks are done so that if the prog * changes context in a wrong way it will be caught. */ err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, &addr_len, CGROUP_INET6_BIND, &flags); if (err) return err; return __inet6_bind(sk, uaddr, addr_len, flags); } /* bind for INET6 API */ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { return inet6_bind_sk(sock->sk, uaddr, addr_len); } EXPORT_SYMBOL(inet6_bind); int inet6_release(struct socket *sock) { struct sock *sk = sock->sk; if (!sk) return -EINVAL; /* Free mc lists */ ipv6_sock_mc_close(sk); /* Free ac lists */ ipv6_sock_ac_close(sk); return inet_release(sock); } EXPORT_SYMBOL(inet6_release); void inet6_cleanup_sock(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; struct ipv6_txoptions *opt; /* Release rx options */ skb = xchg(&np->pktoptions, NULL); kfree_skb(skb); skb = xchg(&np->rxpmtu, NULL); kfree_skb(skb); /* Free flowlabels */ fl6_free_socklist(sk); /* Free tx options */ opt = unrcu_pointer(xchg(&np->opt, NULL)); if (opt) { atomic_sub(opt->tot_len, &sk->sk_omem_alloc); txopt_put(opt); } } EXPORT_SYMBOL_GPL(inet6_cleanup_sock); /* * This does both peername and sockname. */ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, int peer) { struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr; int sin_addr_len = sizeof(*sin); struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_scope_id = 0; lock_sock(sk); if (peer) { if (!inet->inet_dport || (((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)) && peer == 1)) { release_sock(sk); return -ENOTCONN; } sin->sin6_port = inet->inet_dport; sin->sin6_addr = sk->sk_v6_daddr; if (inet6_test_bit(SNDFLOW, sk)) sin->sin6_flowinfo = np->flow_label; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETPEERNAME); } else { if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) sin->sin6_addr = np->saddr; else sin->sin6_addr = sk->sk_v6_rcv_saddr; sin->sin6_port = inet->inet_sport; BPF_CGROUP_RUN_SA_PROG(sk, (struct sockaddr *)sin, &sin_addr_len, CGROUP_INET6_GETSOCKNAME); } sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, sk->sk_bound_dev_if); release_sock(sk); return sin_addr_len; } EXPORT_SYMBOL(inet6_getname); int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct sock *sk = sock->sk; struct net *net = sock_net(sk); const struct proto *prot; switch (cmd) { case SIOCADDRT: case SIOCDELRT: { struct in6_rtmsg rtmsg; if (copy_from_user(&rtmsg, argp, sizeof(rtmsg))) return -EFAULT; return ipv6_route_ioctl(net, cmd, &rtmsg); } case SIOCSIFADDR: return addrconf_add_ifaddr(net, argp); case SIOCDIFADDR: return addrconf_del_ifaddr(net, argp); case SIOCSIFDSTADDR: return addrconf_set_dstaddr(net, argp); default: /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); if (!prot->ioctl) return -ENOIOCTLCMD; return sk_ioctl(sk, cmd, (void __user *)arg); } /*NOTREACHED*/ return 0; } EXPORT_SYMBOL(inet6_ioctl); #ifdef CONFIG_COMPAT struct compat_in6_rtmsg { struct in6_addr rtmsg_dst; struct in6_addr rtmsg_src; struct in6_addr rtmsg_gateway; u32 rtmsg_type; u16 rtmsg_dst_len; u16 rtmsg_src_len; u32 rtmsg_metric; u32 rtmsg_info; u32 rtmsg_flags; s32 rtmsg_ifindex; }; static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd, struct compat_in6_rtmsg __user *ur) { struct in6_rtmsg rt; if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst, 3 * sizeof(struct in6_addr)) || get_user(rt.rtmsg_type, &ur->rtmsg_type) || get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) || get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) || get_user(rt.rtmsg_metric, &ur->rtmsg_metric) || get_user(rt.rtmsg_info, &ur->rtmsg_info) || get_user(rt.rtmsg_flags, &ur->rtmsg_flags) || get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex)) return -EFAULT; return ipv6_route_ioctl(sock_net(sk), cmd, &rt); } int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { void __user *argp = compat_ptr(arg); struct sock *sk = sock->sk; switch (cmd) { case SIOCADDRT: case SIOCDELRT: return inet6_compat_routing_ioctl(sk, cmd, argp); default: return -ENOIOCTLCMD; } } EXPORT_SYMBOL_GPL(inet6_compat_ioctl); #endif /* CONFIG_COMPAT */ INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, size_t)); int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udpv6_sendmsg, sk, msg, size); } INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *, size_t, int, int *)); int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; const struct proto *prot; int addr_len = 0; int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, sk, msg, size, flags, &addr_len); if (err >= 0) msg->msg_namelen = addr_len; return err; } const struct proto_ops inet6_stream_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = inet_stream_connect, /* ok */ .socketpair = sock_no_socketpair, /* a do nothing */ .accept = inet_accept, /* ok */ .getname = inet6_getname, .poll = tcp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .gettstamp = sock_gettstamp, .listen = inet_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet6_sendmsg, /* retpoline's sake */ .recvmsg = inet6_recvmsg, /* retpoline's sake */ #ifdef CONFIG_MMU .mmap = tcp_mmap, #endif .splice_eof = inet_splice_eof, .sendmsg_locked = tcp_sendmsg_locked, .splice_read = tcp_splice_read, .set_peek_off = sk_set_peek_off, .read_sock = tcp_read_sock, .read_skb = tcp_read_skb, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif .set_rcvlowat = tcp_set_rcvlowat, }; EXPORT_SYMBOL_GPL(inet6_stream_ops); const struct proto_ops inet6_dgram_ops = { .family = PF_INET6, .owner = THIS_MODULE, .release = inet6_release, .bind = inet6_bind, .connect = inet_dgram_connect, /* ok */ .socketpair = sock_no_socketpair, /* a do nothing */ .accept = sock_no_accept, /* a do nothing */ .getname = inet6_getname, .poll = udp_poll, /* ok */ .ioctl = inet6_ioctl, /* must change */ .gettstamp = sock_gettstamp, .listen = sock_no_listen, /* ok */ .shutdown = inet_shutdown, /* ok */ .setsockopt = sock_common_setsockopt, /* ok */ .getsockopt = sock_common_getsockopt, /* ok */ .sendmsg = inet6_sendmsg, /* retpoline's sake */ .recvmsg = inet6_recvmsg, /* retpoline's sake */ .read_skb = udp_read_skb, .mmap = sock_no_mmap, .set_peek_off = udp_set_peek_off, #ifdef CONFIG_COMPAT .compat_ioctl = inet6_compat_ioctl, #endif }; static const struct net_proto_family inet6_family_ops = { .family = PF_INET6, .create = inet6_create, .owner = THIS_MODULE, }; int inet6_register_protosw(struct inet_protosw *p) { struct list_head *lh; struct inet_protosw *answer; struct list_head *last_perm; int protocol = p->protocol; int ret; spin_lock_bh(&inetsw6_lock); ret = -EINVAL; if (p->type >= SOCK_MAX) goto out_illegal; /* If we are trying to override a permanent protocol, bail. */ answer = NULL; ret = -EPERM; last_perm = &inetsw6[p->type]; list_for_each(lh, &inetsw6[p->type]) { answer = list_entry(lh, struct inet_protosw, list); /* Check only the non-wild match. */ if (INET_PROTOSW_PERMANENT & answer->flags) { if (protocol == answer->protocol) break; last_perm = lh; } answer = NULL; } if (answer) goto out_permanent; /* Add the new entry after the last permanent entry if any, so that * the new entry does not override a permanent entry when matched with * a wild-card protocol. But it is allowed to override any existing * non-permanent entry. This means that when we remove this entry, the * system automatically returns to the old behavior. */ list_add_rcu(&p->list, last_perm); ret = 0; out: spin_unlock_bh(&inetsw6_lock); return ret; out_permanent: pr_err("Attempt to override permanent protocol %d\n", protocol); goto out; out_illegal: pr_err("Ignoring attempt to register invalid socket type %d\n", p->type); goto out; } EXPORT_SYMBOL(inet6_register_protosw); void inet6_unregister_protosw(struct inet_protosw *p) { if (INET_PROTOSW_PERMANENT & p->flags) { pr_err("Attempt to unregister permanent protocol %d\n", p->protocol); } else { spin_lock_bh(&inetsw6_lock); list_del_rcu(&p->list); spin_unlock_bh(&inetsw6_lock); synchronize_net(); } } EXPORT_SYMBOL(inet6_unregister_protosw); int inet6_sk_rebuild_header(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct dst_entry *dst; dst = __sk_dst_check(sk, np->dst_cookie); if (!dst) { struct inet_sock *inet = inet_sk(sk); struct in6_addr *final_p, final; struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_proto = sk->sk_protocol; fl6.daddr = sk->sk_v6_daddr; fl6.saddr = np->saddr; fl6.flowlabel = np->flow_label; fl6.flowi6_oif = sk->sk_bound_dev_if; fl6.flowi6_mark = sk->sk_mark; fl6.fl6_dport = inet->inet_dport; fl6.fl6_sport = inet->inet_sport; fl6.flowi6_uid = sk_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); rcu_read_lock(); final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); if (IS_ERR(dst)) { sk->sk_route_caps = 0; WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); return PTR_ERR(dst); } ip6_dst_store(sk, dst, false, false); } return 0; } EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header); bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb, const struct inet6_skb_parm *opt) { const struct ipv6_pinfo *np = inet6_sk(sk); if (np->rxopt.all) { if (((opt->flags & IP6SKB_HOPBYHOP) && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) && np->rxopt.bits.rxflow) || (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts))) return true; } return false; } static struct packet_type ipv6_packet_type __read_mostly = { .type = cpu_to_be16(ETH_P_IPV6), .func = ipv6_rcv, .list_func = ipv6_list_rcv, }; static int __init ipv6_packet_init(void) { dev_add_pack(&ipv6_packet_type); return 0; } static void ipv6_packet_cleanup(void) { dev_remove_pack(&ipv6_packet_type); } static int __net_init ipv6_init_mibs(struct net *net) { int i; net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib); if (!net->mib.udp_stats_in6) return -ENOMEM; net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib); if (!net->mib.udplite_stats_in6) goto err_udplite_mib; net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib); if (!net->mib.ipv6_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet6_stats; af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i); u64_stats_init(&af_inet6_stats->syncp); } net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib); if (!net->mib.icmpv6_statistics) goto err_icmp_mib; net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib), GFP_KERNEL); if (!net->mib.icmpv6msg_statistics) goto err_icmpmsg_mib; return 0; err_icmpmsg_mib: free_percpu(net->mib.icmpv6_statistics); err_icmp_mib: free_percpu(net->mib.ipv6_statistics); err_ip_mib: free_percpu(net->mib.udplite_stats_in6); err_udplite_mib: free_percpu(net->mib.udp_stats_in6); return -ENOMEM; } static void ipv6_cleanup_mibs(struct net *net) { free_percpu(net->mib.udp_stats_in6); free_percpu(net->mib.udplite_stats_in6); free_percpu(net->mib.ipv6_statistics); free_percpu(net->mib.icmpv6_statistics); kfree(net->mib.icmpv6msg_statistics); } static int __net_init inet6_net_init(struct net *net) { int err = 0; net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.icmpv6_echo_ignore_all = 0; net->ipv6.sysctl.icmpv6_echo_ignore_multicast = 0; net->ipv6.sysctl.icmpv6_echo_ignore_anycast = 0; net->ipv6.sysctl.icmpv6_error_anycast_as_unicast = 0; /* By default, rate limit error messages. * Except for pmtu discovery, it would break it. * proc_do_large_bitmap needs pointer to the bitmap. */ bitmap_set(net->ipv6.sysctl.icmpv6_ratemask, 0, ICMPV6_ERRMSG_MAX + 1); bitmap_clear(net->ipv6.sysctl.icmpv6_ratemask, ICMPV6_PKT_TOOBIG, 1); net->ipv6.sysctl.icmpv6_ratemask_ptr = net->ipv6.sysctl.icmpv6_ratemask; net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = IP6_DEFAULT_AUTO_FLOW_LABELS; net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; net->ipv6.sysctl.flowlabel_state_ranges = 0; net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT; net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT; net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN; net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN; net->ipv6.sysctl.fib_notify_on_flag_change = 0; atomic_set(&net->ipv6.fib6_sernum, 1); net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID; net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE; err = ipv6_init_mibs(net); if (err) return err; #ifdef CONFIG_PROC_FS err = udp6_proc_init(net); if (err) goto out; err = tcp6_proc_init(net); if (err) goto proc_tcp6_fail; err = ac6_proc_init(net); if (err) goto proc_ac6_fail; #endif return err; #ifdef CONFIG_PROC_FS proc_ac6_fail: tcp6_proc_exit(net); proc_tcp6_fail: udp6_proc_exit(net); out: ipv6_cleanup_mibs(net); return err; #endif } static void __net_exit inet6_net_exit(struct net *net) { #ifdef CONFIG_PROC_FS udp6_proc_exit(net); tcp6_proc_exit(net); ac6_proc_exit(net); #endif ipv6_cleanup_mibs(net); } static struct pernet_operations inet6_net_ops = { .init = inet6_net_init, .exit = inet6_net_exit, }; static int ipv6_route_input(struct sk_buff *skb) { ip6_route_input(skb); return skb_dst(skb)->error; } static const struct ipv6_stub ipv6_stub_impl = { .ipv6_sock_mc_join = ipv6_sock_mc_join, .ipv6_sock_mc_drop = ipv6_sock_mc_drop, .ipv6_dst_lookup_flow = ip6_dst_lookup_flow, .ipv6_route_input = ipv6_route_input, .fib6_get_table = fib6_get_table, .fib6_table_lookup = fib6_table_lookup, .fib6_lookup = fib6_lookup, .fib6_select_path = fib6_select_path, .ip6_mtu_from_fib6 = ip6_mtu_from_fib6, .fib6_nh_init = fib6_nh_init, .fib6_nh_release = fib6_nh_release, .fib6_nh_release_dsts = fib6_nh_release_dsts, .fib6_update_sernum = fib6_update_sernum_stub, .fib6_rt_update = fib6_rt_update, .ip6_del_rt = ip6_del_rt, .udpv6_encap_enable = udpv6_encap_enable, .ndisc_send_na = ndisc_send_na, #if IS_ENABLED(CONFIG_XFRM) .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv, .xfrm6_rcv_encap = xfrm6_rcv_encap, #endif .nd_tbl = &nd_tbl, .ipv6_fragment = ip6_fragment, .ipv6_dev_find = ipv6_dev_find, .ip6_xmit = ip6_xmit, }; static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { .inet6_bind = __inet6_bind, .udp6_lib_lookup = __udp6_lib_lookup, .ipv6_setsockopt = do_ipv6_setsockopt, .ipv6_getsockopt = do_ipv6_getsockopt, .ipv6_dev_get_saddr = ipv6_dev_get_saddr, }; static int __init inet6_init(void) { struct list_head *r; int err = 0; sock_skb_cb_check_size(sizeof(struct inet6_skb_parm)); /* Register the socket-side information for inet6_create. */ for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); raw_hashinfo_init(&raw_v6_hashinfo); if (disable_ipv6_mod) { pr_info("Loaded, but administratively disabled, reboot required to enable\n"); goto out; } err = proto_register(&tcpv6_prot, 1); if (err) goto out; err = proto_register(&udpv6_prot, 1); if (err) goto out_unregister_tcp_proto; err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; err = proto_register(&rawv6_prot, 1); if (err) goto out_unregister_udplite_proto; err = proto_register(&pingv6_prot, 1); if (err) goto out_unregister_raw_proto; /* We MUST register RAW sockets before we create the ICMP6, * IGMP6, or NDISC control sockets. */ err = rawv6_init(); if (err) goto out_unregister_ping_proto; /* Register the family here so that the init calls below will * be able to create sockets. (?? is this dangerous ??) */ err = sock_register(&inet6_family_ops); if (err) goto out_sock_register_fail; /* * ipngwg API draft makes clear that the correct semantics * for TCP and UDP is to consider one TCP and UDP instance * in a host available by both INET and INET6 APIs and * able to communicate via both network protocols. */ err = register_pernet_subsys(&inet6_net_ops); if (err) goto register_pernet_fail; err = ip6_mr_init(); if (err) goto ipmr_fail; err = icmpv6_init(); if (err) goto icmp_fail; err = ndisc_init(); if (err) goto ndisc_fail; err = igmp6_init(); if (err) goto igmp_fail; err = ipv6_netfilter_init(); if (err) goto netfilter_fail; /* Create /proc/foo6 entries. */ #ifdef CONFIG_PROC_FS err = -ENOMEM; if (raw6_proc_init()) goto proc_raw6_fail; if (udplite6_proc_init()) goto proc_udplite6_fail; if (ipv6_misc_proc_init()) goto proc_misc6_fail; if (if6_proc_init()) goto proc_if6_fail; #endif err = ip6_route_init(); if (err) goto ip6_route_fail; err = ndisc_late_init(); if (err) goto ndisc_late_fail; err = ip6_flowlabel_init(); if (err) goto ip6_flowlabel_fail; err = ipv6_anycast_init(); if (err) goto ipv6_anycast_fail; err = addrconf_init(); if (err) goto addrconf_fail; /* Init v6 extension headers. */ err = ipv6_exthdrs_init(); if (err) goto ipv6_exthdrs_fail; err = ipv6_frag_init(); if (err) goto ipv6_frag_fail; /* Init v6 transport protocols. */ err = udpv6_init(); if (err) goto udpv6_fail; err = udplitev6_init(); if (err) goto udplitev6_fail; err = udpv6_offload_init(); if (err) goto udpv6_offload_fail; err = tcpv6_init(); if (err) goto tcpv6_fail; err = ipv6_packet_init(); if (err) goto ipv6_packet_fail; err = pingv6_init(); if (err) goto pingv6_fail; err = calipso_init(); if (err) goto calipso_fail; err = seg6_init(); if (err) goto seg6_fail; err = rpl_init(); if (err) goto rpl_fail; err = ioam6_init(); if (err) goto ioam6_fail; err = igmp6_late_init(); if (err) goto igmp6_late_err; #ifdef CONFIG_SYSCTL err = ipv6_sysctl_register(); if (err) goto sysctl_fail; #endif /* ensure that ipv6 stubs are visible only after ipv6 is ready */ wmb(); ipv6_stub = &ipv6_stub_impl; ipv6_bpf_stub = &ipv6_bpf_stub_impl; out: return err; #ifdef CONFIG_SYSCTL sysctl_fail: igmp6_late_cleanup(); #endif igmp6_late_err: ioam6_exit(); ioam6_fail: rpl_exit(); rpl_fail: seg6_exit(); seg6_fail: calipso_exit(); calipso_fail: pingv6_exit(); pingv6_fail: ipv6_packet_cleanup(); ipv6_packet_fail: tcpv6_exit(); tcpv6_fail: udpv6_offload_exit(); udpv6_offload_fail: udplitev6_exit(); udplitev6_fail: udpv6_exit(); udpv6_fail: ipv6_frag_exit(); ipv6_frag_fail: ipv6_exthdrs_exit(); ipv6_exthdrs_fail: addrconf_cleanup(); addrconf_fail: ipv6_anycast_cleanup(); ipv6_anycast_fail: ip6_flowlabel_cleanup(); ip6_flowlabel_fail: ndisc_late_cleanup(); ndisc_late_fail: ip6_route_cleanup(); ip6_route_fail: #ifdef CONFIG_PROC_FS if6_proc_exit(); proc_if6_fail: ipv6_misc_proc_exit(); proc_misc6_fail: udplite6_proc_exit(); proc_udplite6_fail: raw6_proc_exit(); proc_raw6_fail: #endif ipv6_netfilter_fini(); netfilter_fail: igmp6_cleanup(); igmp_fail: ndisc_cleanup(); ndisc_fail: icmpv6_cleanup(); icmp_fail: ip6_mr_cleanup(); ipmr_fail: unregister_pernet_subsys(&inet6_net_ops); register_pernet_fail: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); out_sock_register_fail: rawv6_exit(); out_unregister_ping_proto: proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: proto_unregister(&udplitev6_prot); out_unregister_udp_proto: proto_unregister(&udpv6_prot); out_unregister_tcp_proto: proto_unregister(&tcpv6_prot); goto out; } module_init(inet6_init); MODULE_ALIAS_NETPROTO(PF_INET6);
10 9 12 5 2 18 12 1 1 14 9 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 #undef TRACE_SYSTEM #define TRACE_SYSTEM rtc #if !defined(_TRACE_RTC_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_RTC_H #include <linux/rtc.h> #include <linux/tracepoint.h> DECLARE_EVENT_CLASS(rtc_time_alarm_class, TP_PROTO(time64_t secs, int err), TP_ARGS(secs, err), TP_STRUCT__entry( __field(time64_t, secs) __field(int, err) ), TP_fast_assign( __entry->secs = secs; __entry->err = err; ), TP_printk("UTC (%lld) (%d)", __entry->secs, __entry->err ) ); DEFINE_EVENT(rtc_time_alarm_class, rtc_set_time, TP_PROTO(time64_t secs, int err), TP_ARGS(secs, err) ); DEFINE_EVENT(rtc_time_alarm_class, rtc_read_time, TP_PROTO(time64_t secs, int err), TP_ARGS(secs, err) ); DEFINE_EVENT(rtc_time_alarm_class, rtc_set_alarm, TP_PROTO(time64_t secs, int err), TP_ARGS(secs, err) ); DEFINE_EVENT(rtc_time_alarm_class, rtc_read_alarm, TP_PROTO(time64_t secs, int err), TP_ARGS(secs, err) ); TRACE_EVENT(rtc_irq_set_freq, TP_PROTO(int freq, int err), TP_ARGS(freq, err), TP_STRUCT__entry( __field(int, freq) __field(int, err) ), TP_fast_assign( __entry->freq = freq; __entry->err = err; ), TP_printk("set RTC periodic IRQ frequency:%u (%d)", __entry->freq, __entry->err ) ); TRACE_EVENT(rtc_irq_set_state, TP_PROTO(int enabled, int err), TP_ARGS(enabled, err), TP_STRUCT__entry( __field(int, enabled) __field(int, err) ), TP_fast_assign( __entry->enabled = enabled; __entry->err = err; ), TP_printk("%s RTC 2^N Hz periodic IRQs (%d)", __entry->enabled ? "enable" : "disable", __entry->err ) ); TRACE_EVENT(rtc_alarm_irq_enable, TP_PROTO(unsigned int enabled, int err), TP_ARGS(enabled, err), TP_STRUCT__entry( __field(unsigned int, enabled) __field(int, err) ), TP_fast_assign( __entry->enabled = enabled; __entry->err = err; ), TP_printk("%s RTC alarm IRQ (%d)", __entry->enabled ? "enable" : "disable", __entry->err ) ); DECLARE_EVENT_CLASS(rtc_offset_class, TP_PROTO(long offset, int err), TP_ARGS(offset, err), TP_STRUCT__entry( __field(long, offset) __field(int, err) ), TP_fast_assign( __entry->offset = offset; __entry->err = err; ), TP_printk("RTC offset: %ld (%d)", __entry->offset, __entry->err ) ); DEFINE_EVENT(rtc_offset_class, rtc_set_offset, TP_PROTO(long offset, int err), TP_ARGS(offset, err) ); DEFINE_EVENT(rtc_offset_class, rtc_read_offset, TP_PROTO(long offset, int err), TP_ARGS(offset, err) ); DECLARE_EVENT_CLASS(rtc_timer_class, TP_PROTO(struct rtc_timer *timer), TP_ARGS(timer), TP_STRUCT__entry( __field(struct rtc_timer *, timer) __field(ktime_t, expires) __field(ktime_t, period) ), TP_fast_assign( __entry->timer = timer; __entry->expires = timer->node.expires; __entry->period = timer->period; ), TP_printk("RTC timer:(%p) expires:%lld period:%lld", __entry->timer, __entry->expires, __entry->period ) ); DEFINE_EVENT(rtc_timer_class, rtc_timer_enqueue, TP_PROTO(struct rtc_timer *timer), TP_ARGS(timer) ); DEFINE_EVENT(rtc_timer_class, rtc_timer_dequeue, TP_PROTO(struct rtc_timer *timer), TP_ARGS(timer) ); DEFINE_EVENT(rtc_timer_class, rtc_timer_fired, TP_PROTO(struct rtc_timer *timer), TP_ARGS(timer) ); #endif /* _TRACE_RTC_H */ /* This part must be outside protection */ #include <trace/define_trace.h>
1 1 1 1 2 2 2 16 16 20 20 20 20 20 20 8 20 2 2 2 17 13 13 13 13 11 13 13 1 13 13 9 9 9 9 14 14 6 9 9 9 9 14 2 2 2 10 1 10 1 2 1 14 14 17 17 2 2 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 // SPDX-License-Identifier: GPL-2.0-only /* * Fence mechanism for dma-buf and to allow for asynchronous dma access * * Copyright (C) 2012 Canonical Ltd * Copyright (C) 2012 Texas Instruments * * Authors: * Rob Clark <robdclark@gmail.com> * Maarten Lankhorst <maarten.lankhorst@canonical.com> */ #include <linux/slab.h> #include <linux/export.h> #include <linux/atomic.h> #include <linux/dma-fence.h> #include <linux/sched/signal.h> #include <linux/seq_file.h> #define CREATE_TRACE_POINTS #include <trace/events/dma_fence.h> EXPORT_TRACEPOINT_SYMBOL(dma_fence_emit); EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal); EXPORT_TRACEPOINT_SYMBOL(dma_fence_signaled); static DEFINE_SPINLOCK(dma_fence_stub_lock); static struct dma_fence dma_fence_stub; /* * fence context counter: each execution context should have its own * fence context, this allows checking if fences belong to the same * context or not. One device can have multiple separate contexts, * and they're used if some engine can run independently of another. */ static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(1); /** * DOC: DMA fences overview * * DMA fences, represented by &struct dma_fence, are the kernel internal * synchronization primitive for DMA operations like GPU rendering, video * encoding/decoding, or displaying buffers on a screen. * * A fence is initialized using dma_fence_init() and completed using * dma_fence_signal(). Fences are associated with a context, allocated through * dma_fence_context_alloc(), and all fences on the same context are * fully ordered. * * Since the purposes of fences is to facilitate cross-device and * cross-application synchronization, there's multiple ways to use one: * * - Individual fences can be exposed as a &sync_file, accessed as a file * descriptor from userspace, created by calling sync_file_create(). This is * called explicit fencing, since userspace passes around explicit * synchronization points. * * - Some subsystems also have their own explicit fencing primitives, like * &drm_syncobj. Compared to &sync_file, a &drm_syncobj allows the underlying * fence to be updated. * * - Then there's also implicit fencing, where the synchronization points are * implicitly passed around as part of shared &dma_buf instances. Such * implicit fences are stored in &struct dma_resv through the * &dma_buf.resv pointer. */ /** * DOC: fence cross-driver contract * * Since &dma_fence provide a cross driver contract, all drivers must follow the * same rules: * * * Fences must complete in a reasonable time. Fences which represent kernels * and shaders submitted by userspace, which could run forever, must be backed * up by timeout and gpu hang recovery code. Minimally that code must prevent * further command submission and force complete all in-flight fences, e.g. * when the driver or hardware do not support gpu reset, or if the gpu reset * failed for some reason. Ideally the driver supports gpu recovery which only * affects the offending userspace context, and no other userspace * submissions. * * * Drivers may have different ideas of what completion within a reasonable * time means. Some hang recovery code uses a fixed timeout, others a mix * between observing forward progress and increasingly strict timeouts. * Drivers should not try to second guess timeout handling of fences from * other drivers. * * * To ensure there's no deadlocks of dma_fence_wait() against other locks * drivers should annotate all code required to reach dma_fence_signal(), * which completes the fences, with dma_fence_begin_signalling() and * dma_fence_end_signalling(). * * * Drivers are allowed to call dma_fence_wait() while holding dma_resv_lock(). * This means any code required for fence completion cannot acquire a * &dma_resv lock. Note that this also pulls in the entire established * locking hierarchy around dma_resv_lock() and dma_resv_unlock(). * * * Drivers are allowed to call dma_fence_wait() from their &shrinker * callbacks. This means any code required for fence completion cannot * allocate memory with GFP_KERNEL. * * * Drivers are allowed to call dma_fence_wait() from their &mmu_notifier * respectively &mmu_interval_notifier callbacks. This means any code required * for fence completion cannot allocate memory with GFP_NOFS or GFP_NOIO. * Only GFP_ATOMIC is permissible, which might fail. * * Note that only GPU drivers have a reasonable excuse for both requiring * &mmu_interval_notifier and &shrinker callbacks at the same time as having to * track asynchronous compute work using &dma_fence. No driver outside of * drivers/gpu should ever call dma_fence_wait() in such contexts. */ static const char *dma_fence_stub_get_name(struct dma_fence *fence) { return "stub"; } static const struct dma_fence_ops dma_fence_stub_ops = { .get_driver_name = dma_fence_stub_get_name, .get_timeline_name = dma_fence_stub_get_name, }; /** * dma_fence_get_stub - return a signaled fence * * Return a stub fence which is already signaled. The fence's * timestamp corresponds to the first time after boot this * function is called. */ struct dma_fence *dma_fence_get_stub(void) { spin_lock(&dma_fence_stub_lock); if (!dma_fence_stub.ops) { dma_fence_init(&dma_fence_stub, &dma_fence_stub_ops, &dma_fence_stub_lock, 0, 0); set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &dma_fence_stub.flags); dma_fence_signal_locked(&dma_fence_stub); } spin_unlock(&dma_fence_stub_lock); return dma_fence_get(&dma_fence_stub); } EXPORT_SYMBOL(dma_fence_get_stub); /** * dma_fence_allocate_private_stub - return a private, signaled fence * @timestamp: timestamp when the fence was signaled * * Return a newly allocated and signaled stub fence. */ struct dma_fence *dma_fence_allocate_private_stub(ktime_t timestamp) { struct dma_fence *fence; fence = kzalloc(sizeof(*fence), GFP_KERNEL); if (fence == NULL) return NULL; dma_fence_init(fence, &dma_fence_stub_ops, &dma_fence_stub_lock, 0, 0); set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); dma_fence_signal_timestamp(fence, timestamp); return fence; } EXPORT_SYMBOL(dma_fence_allocate_private_stub); /** * dma_fence_context_alloc - allocate an array of fence contexts * @num: amount of contexts to allocate * * This function will return the first index of the number of fence contexts * allocated. The fence context is used for setting &dma_fence.context to a * unique number by passing the context to dma_fence_init(). */ u64 dma_fence_context_alloc(unsigned num) { WARN_ON(!num); return atomic64_fetch_add(num, &dma_fence_context_counter); } EXPORT_SYMBOL(dma_fence_context_alloc); /** * DOC: fence signalling annotation * * Proving correctness of all the kernel code around &dma_fence through code * review and testing is tricky for a few reasons: * * * It is a cross-driver contract, and therefore all drivers must follow the * same rules for lock nesting order, calling contexts for various functions * and anything else significant for in-kernel interfaces. But it is also * impossible to test all drivers in a single machine, hence brute-force N vs. * N testing of all combinations is impossible. Even just limiting to the * possible combinations is infeasible. * * * There is an enormous amount of driver code involved. For render drivers * there's the tail of command submission, after fences are published, * scheduler code, interrupt and workers to process job completion, * and timeout, gpu reset and gpu hang recovery code. Plus for integration * with core mm with have &mmu_notifier, respectively &mmu_interval_notifier, * and &shrinker. For modesetting drivers there's the commit tail functions * between when fences for an atomic modeset are published, and when the * corresponding vblank completes, including any interrupt processing and * related workers. Auditing all that code, across all drivers, is not * feasible. * * * Due to how many other subsystems are involved and the locking hierarchies * this pulls in there is extremely thin wiggle-room for driver-specific * differences. &dma_fence interacts with almost all of the core memory * handling through page fault handlers via &dma_resv, dma_resv_lock() and * dma_resv_unlock(). On the other side it also interacts through all * allocation sites through &mmu_notifier and &shrinker. * * Furthermore lockdep does not handle cross-release dependencies, which means * any deadlocks between dma_fence_wait() and dma_fence_signal() can't be caught * at runtime with some quick testing. The simplest example is one thread * waiting on a &dma_fence while holding a lock:: * * lock(A); * dma_fence_wait(B); * unlock(A); * * while the other thread is stuck trying to acquire the same lock, which * prevents it from signalling the fence the previous thread is stuck waiting * on:: * * lock(A); * unlock(A); * dma_fence_signal(B); * * By manually annotating all code relevant to signalling a &dma_fence we can * teach lockdep about these dependencies, which also helps with the validation * headache since now lockdep can check all the rules for us:: * * cookie = dma_fence_begin_signalling(); * lock(A); * unlock(A); * dma_fence_signal(B); * dma_fence_end_signalling(cookie); * * For using dma_fence_begin_signalling() and dma_fence_end_signalling() to * annotate critical sections the following rules need to be observed: * * * All code necessary to complete a &dma_fence must be annotated, from the * point where a fence is accessible to other threads, to the point where * dma_fence_signal() is called. Un-annotated code can contain deadlock issues, * and due to the very strict rules and many corner cases it is infeasible to * catch these just with review or normal stress testing. * * * &struct dma_resv deserves a special note, since the readers are only * protected by rcu. This means the signalling critical section starts as soon * as the new fences are installed, even before dma_resv_unlock() is called. * * * The only exception are fast paths and opportunistic signalling code, which * calls dma_fence_signal() purely as an optimization, but is not required to * guarantee completion of a &dma_fence. The usual example is a wait IOCTL * which calls dma_fence_signal(), while the mandatory completion path goes * through a hardware interrupt and possible job completion worker. * * * To aid composability of code, the annotations can be freely nested, as long * as the overall locking hierarchy is consistent. The annotations also work * both in interrupt and process context. Due to implementation details this * requires that callers pass an opaque cookie from * dma_fence_begin_signalling() to dma_fence_end_signalling(). * * * Validation against the cross driver contract is implemented by priming * lockdep with the relevant hierarchy at boot-up. This means even just * testing with a single device is enough to validate a driver, at least as * far as deadlocks with dma_fence_wait() against dma_fence_signal() are * concerned. */ #ifdef CONFIG_LOCKDEP static struct lockdep_map dma_fence_lockdep_map = { .name = "dma_fence_map" }; /** * dma_fence_begin_signalling - begin a critical DMA fence signalling section * * Drivers should use this to annotate the beginning of any code section * required to eventually complete &dma_fence by calling dma_fence_signal(). * * The end of these critical sections are annotated with * dma_fence_end_signalling(). * * Returns: * * Opaque cookie needed by the implementation, which needs to be passed to * dma_fence_end_signalling(). */ bool dma_fence_begin_signalling(void) { /* explicitly nesting ... */ if (lock_is_held_type(&dma_fence_lockdep_map, 1)) return true; /* rely on might_sleep check for soft/hardirq locks */ if (in_atomic()) return true; /* ... and non-recursive successful read_trylock */ lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _RET_IP_); return false; } EXPORT_SYMBOL(dma_fence_begin_signalling); /** * dma_fence_end_signalling - end a critical DMA fence signalling section * @cookie: opaque cookie from dma_fence_begin_signalling() * * Closes a critical section annotation opened by dma_fence_begin_signalling(). */ void dma_fence_end_signalling(bool cookie) { if (cookie) return; lock_release(&dma_fence_lockdep_map, _RET_IP_); } EXPORT_SYMBOL(dma_fence_end_signalling); void __dma_fence_might_wait(void) { bool tmp; tmp = lock_is_held_type(&dma_fence_lockdep_map, 1); if (tmp) lock_release(&dma_fence_lockdep_map, _THIS_IP_); lock_map_acquire(&dma_fence_lockdep_map); lock_map_release(&dma_fence_lockdep_map); if (tmp) lock_acquire(&dma_fence_lockdep_map, 0, 1, 1, 1, NULL, _THIS_IP_); } #endif /** * dma_fence_signal_timestamp_locked - signal completion of a fence * @fence: the fence to signal * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. Set the timestamp provided as the fence * signal timestamp. * * Unlike dma_fence_signal_timestamp(), this function must be called with * &dma_fence.lock held. * * Returns 0 on success and a negative error value when @fence has been * signalled already. */ int dma_fence_signal_timestamp_locked(struct dma_fence *fence, ktime_t timestamp) { struct dma_fence_cb *cur, *tmp; struct list_head cb_list; lockdep_assert_held(fence->lock); if (unlikely(test_and_set_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))) return -EINVAL; /* Stash the cb_list before replacing it with the timestamp */ list_replace(&fence->cb_list, &cb_list); fence->timestamp = timestamp; set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags); trace_dma_fence_signaled(fence); list_for_each_entry_safe(cur, tmp, &cb_list, node) { INIT_LIST_HEAD(&cur->node); cur->func(fence, cur); } return 0; } EXPORT_SYMBOL(dma_fence_signal_timestamp_locked); /** * dma_fence_signal_timestamp - signal completion of a fence * @fence: the fence to signal * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. Set the timestamp provided as the fence * signal timestamp. * * Returns 0 on success and a negative error value when @fence has been * signalled already. */ int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp) { unsigned long flags; int ret; if (WARN_ON(!fence)) return -EINVAL; spin_lock_irqsave(fence->lock, flags); ret = dma_fence_signal_timestamp_locked(fence, timestamp); spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_signal_timestamp); /** * dma_fence_signal_locked - signal completion of a fence * @fence: the fence to signal * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. * * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock * held. * * Returns 0 on success and a negative error value when @fence has been * signalled already. */ int dma_fence_signal_locked(struct dma_fence *fence) { return dma_fence_signal_timestamp_locked(fence, ktime_get()); } EXPORT_SYMBOL(dma_fence_signal_locked); /** * dma_fence_signal - signal completion of a fence * @fence: the fence to signal * * Signal completion for software callbacks on a fence, this will unblock * dma_fence_wait() calls and run all the callbacks added with * dma_fence_add_callback(). Can be called multiple times, but since a fence * can only go from the unsignaled to the signaled state and not back, it will * only be effective the first time. * * Returns 0 on success and a negative error value when @fence has been * signalled already. */ int dma_fence_signal(struct dma_fence *fence) { unsigned long flags; int ret; bool tmp; if (WARN_ON(!fence)) return -EINVAL; tmp = dma_fence_begin_signalling(); spin_lock_irqsave(fence->lock, flags); ret = dma_fence_signal_timestamp_locked(fence, ktime_get()); spin_unlock_irqrestore(fence->lock, flags); dma_fence_end_signalling(tmp); return ret; } EXPORT_SYMBOL(dma_fence_signal); /** * dma_fence_wait_timeout - sleep until the fence gets signaled * or until timeout elapses * @fence: the fence to wait on * @intr: if true, do an interruptible wait * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the * remaining timeout in jiffies on success. Other error values may be * returned on custom implementations. * * Performs a synchronous wait on this fence. It is assumed the caller * directly or indirectly (buf-mgr between reservation and committing) * holds a reference to the fence, otherwise the fence might be * freed before return, resulting in undefined behavior. * * See also dma_fence_wait() and dma_fence_wait_any_timeout(). */ signed long dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout) { signed long ret; if (WARN_ON(timeout < 0)) return -EINVAL; might_sleep(); __dma_fence_might_wait(); dma_fence_enable_sw_signaling(fence); if (trace_dma_fence_wait_start_enabled()) { rcu_read_lock(); trace_dma_fence_wait_start(fence); rcu_read_unlock(); } if (fence->ops->wait) ret = fence->ops->wait(fence, intr, timeout); else ret = dma_fence_default_wait(fence, intr, timeout); if (trace_dma_fence_wait_end_enabled()) { rcu_read_lock(); trace_dma_fence_wait_end(fence); rcu_read_unlock(); } return ret; } EXPORT_SYMBOL(dma_fence_wait_timeout); /** * dma_fence_release - default release function for fences * @kref: &dma_fence.recfount * * This is the default release functions for &dma_fence. Drivers shouldn't call * this directly, but instead call dma_fence_put(). */ void dma_fence_release(struct kref *kref) { struct dma_fence *fence = container_of(kref, struct dma_fence, refcount); rcu_read_lock(); trace_dma_fence_destroy(fence); if (!list_empty(&fence->cb_list) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { const char __rcu *timeline; const char __rcu *driver; unsigned long flags; driver = dma_fence_driver_name(fence); timeline = dma_fence_timeline_name(fence); WARN(1, "Fence %s:%s:%llx:%llx released with pending signals!\n", rcu_dereference(driver), rcu_dereference(timeline), fence->context, fence->seqno); /* * Failed to signal before release, likely a refcounting issue. * * This should never happen, but if it does make sure that we * don't leave chains dangling. We set the error flag first * so that the callbacks know this signal is due to an error. */ spin_lock_irqsave(fence->lock, flags); fence->error = -EDEADLK; dma_fence_signal_locked(fence); spin_unlock_irqrestore(fence->lock, flags); } rcu_read_unlock(); if (fence->ops->release) fence->ops->release(fence); else dma_fence_free(fence); } EXPORT_SYMBOL(dma_fence_release); /** * dma_fence_free - default release function for &dma_fence. * @fence: fence to release * * This is the default implementation for &dma_fence_ops.release. It calls * kfree_rcu() on @fence. */ void dma_fence_free(struct dma_fence *fence) { kfree_rcu(fence, rcu); } EXPORT_SYMBOL(dma_fence_free); static bool __dma_fence_enable_signaling(struct dma_fence *fence) { bool was_set; lockdep_assert_held(fence->lock); was_set = test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &fence->flags); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return false; if (!was_set && fence->ops->enable_signaling) { trace_dma_fence_enable_signal(fence); if (!fence->ops->enable_signaling(fence)) { dma_fence_signal_locked(fence); return false; } } return true; } /** * dma_fence_enable_sw_signaling - enable signaling on fence * @fence: the fence to enable * * This will request for sw signaling to be enabled, to make the fence * complete as soon as possible. This calls &dma_fence_ops.enable_signaling * internally. */ void dma_fence_enable_sw_signaling(struct dma_fence *fence) { unsigned long flags; spin_lock_irqsave(fence->lock, flags); __dma_fence_enable_signaling(fence); spin_unlock_irqrestore(fence->lock, flags); } EXPORT_SYMBOL(dma_fence_enable_sw_signaling); /** * dma_fence_add_callback - add a callback to be called when the fence * is signaled * @fence: the fence to wait on * @cb: the callback to register * @func: the function to call * * Add a software callback to the fence. The caller should keep a reference to * the fence. * * @cb will be initialized by dma_fence_add_callback(), no initialization * by the caller is required. Any number of callbacks can be registered * to a fence, but a callback can only be registered to one fence at a time. * * If fence is already signaled, this function will return -ENOENT (and * *not* call the callback). * * Note that the callback can be called from an atomic context or irq context. * * Returns 0 in case of success, -ENOENT if the fence is already signaled * and -EINVAL in case of error. */ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb, dma_fence_func_t func) { unsigned long flags; int ret = 0; if (WARN_ON(!fence || !func)) return -EINVAL; if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { INIT_LIST_HEAD(&cb->node); return -ENOENT; } spin_lock_irqsave(fence->lock, flags); if (__dma_fence_enable_signaling(fence)) { cb->func = func; list_add_tail(&cb->node, &fence->cb_list); } else { INIT_LIST_HEAD(&cb->node); ret = -ENOENT; } spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_add_callback); /** * dma_fence_get_status - returns the status upon completion * @fence: the dma_fence to query * * This wraps dma_fence_get_status_locked() to return the error status * condition on a signaled fence. See dma_fence_get_status_locked() for more * details. * * Returns 0 if the fence has not yet been signaled, 1 if the fence has * been signaled without an error condition, or a negative error code * if the fence has been completed in err. */ int dma_fence_get_status(struct dma_fence *fence) { unsigned long flags; int status; spin_lock_irqsave(fence->lock, flags); status = dma_fence_get_status_locked(fence); spin_unlock_irqrestore(fence->lock, flags); return status; } EXPORT_SYMBOL(dma_fence_get_status); /** * dma_fence_remove_callback - remove a callback from the signaling list * @fence: the fence to wait on * @cb: the callback to remove * * Remove a previously queued callback from the fence. This function returns * true if the callback is successfully removed, or false if the fence has * already been signaled. * * *WARNING*: * Cancelling a callback should only be done if you really know what you're * doing, since deadlocks and race conditions could occur all too easily. For * this reason, it should only ever be done on hardware lockup recovery, * with a reference held to the fence. * * Behaviour is undefined if @cb has not been added to @fence using * dma_fence_add_callback() beforehand. */ bool dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb) { unsigned long flags; bool ret; spin_lock_irqsave(fence->lock, flags); ret = !list_empty(&cb->node); if (ret) list_del_init(&cb->node); spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_remove_callback); struct default_wait_cb { struct dma_fence_cb base; struct task_struct *task; }; static void dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { struct default_wait_cb *wait = container_of(cb, struct default_wait_cb, base); wake_up_state(wait->task, TASK_NORMAL); } /** * dma_fence_default_wait - default sleep until the fence gets signaled * or until timeout elapses * @fence: the fence to wait on * @intr: if true, do an interruptible wait * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the * remaining timeout in jiffies on success. If timeout is zero the value one is * returned if the fence is already signaled for consistency with other * functions taking a jiffies timeout. */ signed long dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout) { struct default_wait_cb cb; unsigned long flags; signed long ret = timeout ? timeout : 1; spin_lock_irqsave(fence->lock, flags); if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) goto out; if (intr && signal_pending(current)) { ret = -ERESTARTSYS; goto out; } if (!timeout) { ret = 0; goto out; } cb.base.func = dma_fence_default_wait_cb; cb.task = current; list_add(&cb.base.node, &fence->cb_list); while (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) && ret > 0) { if (intr) __set_current_state(TASK_INTERRUPTIBLE); else __set_current_state(TASK_UNINTERRUPTIBLE); spin_unlock_irqrestore(fence->lock, flags); ret = schedule_timeout(ret); spin_lock_irqsave(fence->lock, flags); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; } if (!list_empty(&cb.base.node)) list_del(&cb.base.node); __set_current_state(TASK_RUNNING); out: spin_unlock_irqrestore(fence->lock, flags); return ret; } EXPORT_SYMBOL(dma_fence_default_wait); static bool dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count, uint32_t *idx) { int i; for (i = 0; i < count; ++i) { struct dma_fence *fence = fences[i]; if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { if (idx) *idx = i; return true; } } return false; } /** * dma_fence_wait_any_timeout - sleep until any fence gets signaled * or until timeout elapses * @fences: array of fences to wait on * @count: number of fences to wait on * @intr: if true, do an interruptible wait * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT * @idx: used to store the first signaled fence index, meaningful only on * positive return * * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies * on success. * * Synchronous waits for the first fence in the array to be signaled. The * caller needs to hold a reference to all fences in the array, otherwise a * fence might be freed before return, resulting in undefined behavior. * * See also dma_fence_wait() and dma_fence_wait_timeout(). */ signed long dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count, bool intr, signed long timeout, uint32_t *idx) { struct default_wait_cb *cb; signed long ret = timeout; unsigned i; if (WARN_ON(!fences || !count || timeout < 0)) return -EINVAL; if (timeout == 0) { for (i = 0; i < count; ++i) if (dma_fence_is_signaled(fences[i])) { if (idx) *idx = i; return 1; } return 0; } cb = kcalloc(count, sizeof(struct default_wait_cb), GFP_KERNEL); if (cb == NULL) { ret = -ENOMEM; goto err_free_cb; } for (i = 0; i < count; ++i) { struct dma_fence *fence = fences[i]; cb[i].task = current; if (dma_fence_add_callback(fence, &cb[i].base, dma_fence_default_wait_cb)) { /* This fence is already signaled */ if (idx) *idx = i; goto fence_rm_cb; } } while (ret > 0) { if (intr) set_current_state(TASK_INTERRUPTIBLE); else set_current_state(TASK_UNINTERRUPTIBLE); if (dma_fence_test_signaled_any(fences, count, idx)) break; ret = schedule_timeout(ret); if (ret > 0 && intr && signal_pending(current)) ret = -ERESTARTSYS; } __set_current_state(TASK_RUNNING); fence_rm_cb: while (i-- > 0) dma_fence_remove_callback(fences[i], &cb[i].base); err_free_cb: kfree(cb); return ret; } EXPORT_SYMBOL(dma_fence_wait_any_timeout); /** * DOC: deadline hints * * In an ideal world, it would be possible to pipeline a workload sufficiently * that a utilization based device frequency governor could arrive at a minimum * frequency that meets the requirements of the use-case, in order to minimize * power consumption. But in the real world there are many workloads which * defy this ideal. For example, but not limited to: * * * Workloads that ping-pong between device and CPU, with alternating periods * of CPU waiting for device, and device waiting on CPU. This can result in * devfreq and cpufreq seeing idle time in their respective domains and in * result reduce frequency. * * * Workloads that interact with a periodic time based deadline, such as double * buffered GPU rendering vs vblank sync'd page flipping. In this scenario, * missing a vblank deadline results in an *increase* in idle time on the GPU * (since it has to wait an additional vblank period), sending a signal to * the GPU's devfreq to reduce frequency, when in fact the opposite is what is * needed. * * To this end, deadline hint(s) can be set on a &dma_fence via &dma_fence_set_deadline * (or indirectly via userspace facing ioctls like &sync_set_deadline). * The deadline hint provides a way for the waiting driver, or userspace, to * convey an appropriate sense of urgency to the signaling driver. * * A deadline hint is given in absolute ktime (CLOCK_MONOTONIC for userspace * facing APIs). The time could either be some point in the future (such as * the vblank based deadline for page-flipping, or the start of a compositor's * composition cycle), or the current time to indicate an immediate deadline * hint (Ie. forward progress cannot be made until this fence is signaled). * * Multiple deadlines may be set on a given fence, even in parallel. See the * documentation for &dma_fence_ops.set_deadline. * * The deadline hint is just that, a hint. The driver that created the fence * may react by increasing frequency, making different scheduling choices, etc. * Or doing nothing at all. */ /** * dma_fence_set_deadline - set desired fence-wait deadline hint * @fence: the fence that is to be waited on * @deadline: the time by which the waiter hopes for the fence to be * signaled * * Give the fence signaler a hint about an upcoming deadline, such as * vblank, by which point the waiter would prefer the fence to be * signaled by. This is intended to give feedback to the fence signaler * to aid in power management decisions, such as boosting GPU frequency * if a periodic vblank deadline is approaching but the fence is not * yet signaled.. */ void dma_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) { if (fence->ops->set_deadline && !dma_fence_is_signaled(fence)) fence->ops->set_deadline(fence, deadline); } EXPORT_SYMBOL(dma_fence_set_deadline); /** * dma_fence_describe - Dump fence description into seq_file * @fence: the fence to describe * @seq: the seq_file to put the textual description into * * Dump a textual description of the fence and it's state into the seq_file. */ void dma_fence_describe(struct dma_fence *fence, struct seq_file *seq) { const char __rcu *timeline; const char __rcu *driver; rcu_read_lock(); timeline = dma_fence_timeline_name(fence); driver = dma_fence_driver_name(fence); seq_printf(seq, "%s %s seq %llu %ssignalled\n", rcu_dereference(driver), rcu_dereference(timeline), fence->seqno, dma_fence_is_signaled(fence) ? "" : "un"); rcu_read_unlock(); } EXPORT_SYMBOL(dma_fence_describe); static void __dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, spinlock_t *lock, u64 context, u64 seqno, unsigned long flags) { BUG_ON(!lock); BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name); kref_init(&fence->refcount); fence->ops = ops; INIT_LIST_HEAD(&fence->cb_list); fence->lock = lock; fence->context = context; fence->seqno = seqno; fence->flags = flags; fence->error = 0; trace_dma_fence_init(fence); } /** * dma_fence_init - Initialize a custom fence. * @fence: the fence to initialize * @ops: the dma_fence_ops for operations on this fence * @lock: the irqsafe spinlock to use for locking this fence * @context: the execution context this fence is run on * @seqno: a linear increasing sequence number for this context * * Initializes an allocated fence, the caller doesn't have to keep its * refcount after committing with this fence, but it will need to hold a * refcount again if &dma_fence_ops.enable_signaling gets called. * * context and seqno are used for easy comparison between fences, allowing * to check which fence is later by simply using dma_fence_later(). */ void dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops, spinlock_t *lock, u64 context, u64 seqno) { __dma_fence_init(fence, ops, lock, context, seqno, 0UL); } EXPORT_SYMBOL(dma_fence_init); /** * dma_fence_init64 - Initialize a custom fence with 64-bit seqno support. * @fence: the fence to initialize * @ops: the dma_fence_ops for operations on this fence * @lock: the irqsafe spinlock to use for locking this fence * @context: the execution context this fence is run on * @seqno: a linear increasing sequence number for this context * * Initializes an allocated fence, the caller doesn't have to keep its * refcount after committing with this fence, but it will need to hold a * refcount again if &dma_fence_ops.enable_signaling gets called. * * Context and seqno are used for easy comparison between fences, allowing * to check which fence is later by simply using dma_fence_later(). */ void dma_fence_init64(struct dma_fence *fence, const struct dma_fence_ops *ops, spinlock_t *lock, u64 context, u64 seqno) { __dma_fence_init(fence, ops, lock, context, seqno, BIT(DMA_FENCE_FLAG_SEQNO64_BIT)); } EXPORT_SYMBOL(dma_fence_init64); /** * dma_fence_driver_name - Access the driver name * @fence: the fence to query * * Returns a driver name backing the dma-fence implementation. * * IMPORTANT CONSIDERATION: * Dma-fence contract stipulates that access to driver provided data (data not * directly embedded into the object itself), such as the &dma_fence.lock and * memory potentially accessed by the &dma_fence.ops functions, is forbidden * after the fence has been signalled. Drivers are allowed to free that data, * and some do. * * To allow safe access drivers are mandated to guarantee a RCU grace period * between signalling the fence and freeing said data. * * As such access to the driver name is only valid inside a RCU locked section. * The pointer MUST be both queried and USED ONLY WITHIN a SINGLE block guarded * by the &rcu_read_lock and &rcu_read_unlock pair. */ const char __rcu *dma_fence_driver_name(struct dma_fence *fence) { RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "RCU protection is required for safe access to returned string"); if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return fence->ops->get_driver_name(fence); else return "detached-driver"; } EXPORT_SYMBOL(dma_fence_driver_name); /** * dma_fence_timeline_name - Access the timeline name * @fence: the fence to query * * Returns a timeline name provided by the dma-fence implementation. * * IMPORTANT CONSIDERATION: * Dma-fence contract stipulates that access to driver provided data (data not * directly embedded into the object itself), such as the &dma_fence.lock and * memory potentially accessed by the &dma_fence.ops functions, is forbidden * after the fence has been signalled. Drivers are allowed to free that data, * and some do. * * To allow safe access drivers are mandated to guarantee a RCU grace period * between signalling the fence and freeing said data. * * As such access to the driver name is only valid inside a RCU locked section. * The pointer MUST be both queried and USED ONLY WITHIN a SINGLE block guarded * by the &rcu_read_lock and &rcu_read_unlock pair. */ const char __rcu *dma_fence_timeline_name(struct dma_fence *fence) { RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "RCU protection is required for safe access to returned string"); if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return fence->ops->get_driver_name(fence); else return "signaled-timeline"; } EXPORT_SYMBOL(dma_fence_timeline_name);
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 /* SPDX-License-Identifier: LGPL-2.1 */ /* * * Copyright (C) International Business Machines Corp., 2002,2008 * Author(s): Steve French (sfrench@us.ibm.com) * Jeremy Allison (jra@samba.org) * */ #ifndef _CIFS_GLOB_H #define _CIFS_GLOB_H #include <linux/in.h> #include <linux/in6.h> #include <linux/inet.h> #include <linux/slab.h> #include <linux/scatterlist.h> #include <linux/mm.h> #include <linux/mempool.h> #include <linux/workqueue.h> #include <linux/utsname.h> #include <linux/sched/mm.h> #include <linux/netfs.h> #include "cifs_fs_sb.h" #include "cifsacl.h" #include <crypto/internal/hash.h> #include <uapi/linux/cifs/cifs_mount.h> #include "../common/cifsglob.h" #include "../common/smb2pdu.h" #include "smb2pdu.h" #include <linux/filelock.h> #define SMB_PATH_MAX 260 #define CIFS_PORT 445 #define RFC1001_PORT 139 /* * The sizes of various internal tables and strings */ #define MAX_UID_INFO 16 #define MAX_SES_INFO 2 #define MAX_TCON_INFO 4 #define MAX_TREE_SIZE (2 + CIFS_NI_MAXHOST + 1 + CIFS_MAX_SHARE_LEN + 1) #define CIFS_MIN_RCV_POOL 4 #define MAX_REOPEN_ATT 5 /* these many maximum attempts to reopen a file */ /* * default attribute cache timeout (jiffies) */ #define CIFS_DEF_ACTIMEO (1 * HZ) /* * max sleep time before retry to server */ #define CIFS_MAX_SLEEP 2000 /* * max attribute cache timeout (jiffies) - 2^30 */ #define CIFS_MAX_ACTIMEO (1 << 30) /* * Max persistent and resilient handle timeout (milliseconds). * Windows durable max was 960000 (16 minutes) */ #define SMB3_MAX_HANDLE_TIMEOUT 960000 /* * MAX_REQ is the maximum number of requests that WE will send * on one socket concurrently. */ #define CIFS_MAX_REQ 32767 #define RFC1001_NAME_LEN 15 #define RFC1001_NAME_LEN_WITH_NULL (RFC1001_NAME_LEN + 1) /* maximum length of ip addr as a string (including ipv6 and sctp) */ #define SERVER_NAME_LENGTH 80 #define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1) /* echo interval in seconds */ #define SMB_ECHO_INTERVAL_MIN 1 #define SMB_ECHO_INTERVAL_MAX 600 #define SMB_ECHO_INTERVAL_DEFAULT 60 /* smb multichannel query server interfaces interval in seconds */ #define SMB_INTERFACE_POLL_INTERVAL 600 /* maximum number of PDUs in one compound */ #define MAX_COMPOUND 10 /* * Default number of credits to keep available for SMB3. * This value is chosen somewhat arbitrarily. The Windows client * defaults to 128 credits, the Windows server allows clients up to * 512 credits (or 8K for later versions), and the NetApp server * does not limit clients at all. Choose a high enough default value * such that the client shouldn't limit performance, but allow mount * to override (until you approach 64K, where we limit credits to 65000 * to reduce possibility of seeing more server credit overflow bugs. */ #define SMB2_MAX_CREDITS_AVAILABLE 32000 #include "cifspdu.h" #ifndef XATTR_DOS_ATTRIB #define XATTR_DOS_ATTRIB "user.DOSATTRIB" #endif #define CIFS_MAX_WORKSTATION_LEN (__NEW_UTS_LEN + 1) /* reasonable max for client */ #define CIFS_DFS_ROOT_SES(ses) ((ses)->dfs_root_ses ?: (ses)) /* * CIFS vfs client Status information (based on what we know.) */ /* associated with each connection */ enum statusEnum { CifsNew = 0, CifsGood, CifsExiting, CifsNeedReconnect, CifsNeedNegotiate, CifsInNegotiate, }; /* associated with each smb session */ enum ses_status_enum { SES_NEW = 0, SES_GOOD, SES_EXITING, SES_NEED_RECON, SES_IN_SETUP }; /* associated with each tree connection to the server */ enum tid_status_enum { TID_NEW = 0, TID_GOOD, TID_EXITING, TID_NEED_RECON, TID_NEED_TCON, TID_IN_TCON, TID_NEED_FILES_INVALIDATE, /* currently unused */ TID_IN_FILES_INVALIDATE }; enum securityEnum { Unspecified = 0, /* not specified */ NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ RawNTLMSSP, /* NTLMSSP without SPNEGO, NTLMv2 hash */ Kerberos, /* Kerberos via SPNEGO */ IAKerb, /* Kerberos proxy */ }; enum upcall_target_enum { UPTARGET_UNSPECIFIED, /* not specified, defaults to app */ UPTARGET_MOUNT, /* upcall to the mount namespace */ UPTARGET_APP, /* upcall to the application namespace which did the mount */ }; enum cifs_reparse_type { CIFS_REPARSE_TYPE_NONE, CIFS_REPARSE_TYPE_NFS, CIFS_REPARSE_TYPE_WSL, CIFS_REPARSE_TYPE_DEFAULT = CIFS_REPARSE_TYPE_NFS, }; static inline const char *cifs_reparse_type_str(enum cifs_reparse_type type) { switch (type) { case CIFS_REPARSE_TYPE_NONE: return "none"; case CIFS_REPARSE_TYPE_NFS: return "nfs"; case CIFS_REPARSE_TYPE_WSL: return "wsl"; default: return "unknown"; } } enum cifs_symlink_type { CIFS_SYMLINK_TYPE_DEFAULT, CIFS_SYMLINK_TYPE_NONE, CIFS_SYMLINK_TYPE_NATIVE, CIFS_SYMLINK_TYPE_UNIX, CIFS_SYMLINK_TYPE_MFSYMLINKS, CIFS_SYMLINK_TYPE_SFU, CIFS_SYMLINK_TYPE_NFS, CIFS_SYMLINK_TYPE_WSL, }; static inline const char *cifs_symlink_type_str(enum cifs_symlink_type type) { switch (type) { case CIFS_SYMLINK_TYPE_NONE: return "none"; case CIFS_SYMLINK_TYPE_NATIVE: return "native"; case CIFS_SYMLINK_TYPE_UNIX: return "unix"; case CIFS_SYMLINK_TYPE_MFSYMLINKS: return "mfsymlinks"; case CIFS_SYMLINK_TYPE_SFU: return "sfu"; case CIFS_SYMLINK_TYPE_NFS: return "nfs"; case CIFS_SYMLINK_TYPE_WSL: return "wsl"; default: return "unknown"; } } struct session_key { unsigned int len; char *response; }; /* crypto hashing related structure/fields, not specific to a sec mech */ struct cifs_secmech { struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */ struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */ struct crypto_aead *dec; /* smb3 decryption AEAD TFM (AES-CCM and AES-GCM) */ }; /* per smb session structure/fields */ struct ntlmssp_auth { bool sesskey_per_smbsess; /* whether session key is per smb session */ __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */ __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */ unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlmssp */ }; struct cifs_cred { int uid; int gid; int mode; int cecount; struct smb_sid osid; struct smb_sid gsid; struct cifs_ntace *ntaces; struct smb_ace *aces; }; struct cifs_open_info_data { bool adjust_tz; bool reparse_point; bool contains_posix_file_info; struct { /* ioctl response buffer */ struct { int buftype; struct kvec iov; } io; __u32 tag; struct reparse_data_buffer *buf; } reparse; struct { __u8 eas[SMB2_WSL_MAX_QUERY_EA_RESP_SIZE]; unsigned int eas_len; } wsl; char *symlink_target; struct smb_sid posix_owner; struct smb_sid posix_group; union { struct smb2_file_all_info fi; struct smb311_posix_qinfo posix_fi; }; }; /* ***************************************************************** * Except the CIFS PDUs themselves all the * globally interesting structs should go here ***************************************************************** */ /* * A smb_rqst represents a complete request to be issued to a server. It's * formed by a kvec array, followed by an array of pages. Page data is assumed * to start at the beginning of the first page. */ struct smb_rqst { struct kvec *rq_iov; /* array of kvecs */ unsigned int rq_nvec; /* number of kvecs in array */ struct iov_iter rq_iter; /* Data iterator */ struct folio_queue *rq_buffer; /* Buffer for encryption */ }; struct mid_q_entry; struct TCP_Server_Info; struct cifsFileInfo; struct cifs_ses; struct cifs_tcon; struct dfs_info3_param; struct cifs_fattr; struct smb3_fs_context; struct cifs_fid; struct cifs_io_subrequest; struct cifs_io_parms; struct cifs_search_info; struct cifsInodeInfo; struct cifs_open_parms; struct cifs_credits; struct smb_version_operations { int (*send_cancel)(struct TCP_Server_Info *, struct smb_rqst *, struct mid_q_entry *); bool (*compare_fids)(struct cifsFileInfo *, struct cifsFileInfo *); /* setup request: allocate mid, sign message */ struct mid_q_entry *(*setup_request)(struct cifs_ses *, struct TCP_Server_Info *, struct smb_rqst *); /* setup async request: allocate mid, sign message */ struct mid_q_entry *(*setup_async_request)(struct TCP_Server_Info *, struct smb_rqst *); /* check response: verify signature, map error */ int (*check_receive)(struct mid_q_entry *, struct TCP_Server_Info *, bool); void (*add_credits)(struct TCP_Server_Info *server, struct cifs_credits *credits, const int optype); void (*set_credits)(struct TCP_Server_Info *, const int); int * (*get_credits_field)(struct TCP_Server_Info *, const int); unsigned int (*get_credits)(struct mid_q_entry *); __u64 (*get_next_mid)(struct TCP_Server_Info *); void (*revert_current_mid)(struct TCP_Server_Info *server, const unsigned int val); /* data offset from read response message */ unsigned int (*read_data_offset)(char *); /* * Data length from read response message * When in_remaining is true, the returned data length is in * message field DataRemaining for out-of-band data read (e.g through * Memory Registration RDMA write in SMBD). * Otherwise, the returned data length is in message field DataLength. */ unsigned int (*read_data_length)(char *, bool in_remaining); /* map smb to linux error */ int (*map_error)(char *, bool); /* find mid corresponding to the response message */ struct mid_q_entry * (*find_mid)(struct TCP_Server_Info *, char *); void (*dump_detail)(void *buf, struct TCP_Server_Info *ptcp_info); void (*clear_stats)(struct cifs_tcon *); void (*print_stats)(struct seq_file *m, struct cifs_tcon *); void (*dump_share_caps)(struct seq_file *, struct cifs_tcon *); /* verify the message */ int (*check_message)(char *, unsigned int, struct TCP_Server_Info *); bool (*is_oplock_break)(char *, struct TCP_Server_Info *); int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *); void (*downgrade_oplock)(struct TCP_Server_Info *server, struct cifsInodeInfo *cinode, __u32 oplock, __u16 epoch, bool *purge_cache); /* process transaction2 response */ bool (*check_trans2)(struct mid_q_entry *, struct TCP_Server_Info *, char *, int); /* check if we need to negotiate */ bool (*need_neg)(struct TCP_Server_Info *); /* negotiate to the server */ int (*negotiate)(const unsigned int xid, struct cifs_ses *ses, struct TCP_Server_Info *server); /* set negotiated write size */ unsigned int (*negotiate_wsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx); /* set negotiated read size */ unsigned int (*negotiate_rsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx); /* setup smb sessionn */ int (*sess_setup)(const unsigned int, struct cifs_ses *, struct TCP_Server_Info *server, const struct nls_table *); /* close smb session */ int (*logoff)(const unsigned int, struct cifs_ses *); /* connect to a server share */ int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *, struct cifs_tcon *, const struct nls_table *); /* close tree connection */ int (*tree_disconnect)(const unsigned int, struct cifs_tcon *); /* get DFS referrals */ int (*get_dfs_refer)(const unsigned int, struct cifs_ses *, const char *, struct dfs_info3_param **, unsigned int *, const struct nls_table *, int); /* informational QFS call */ void (*qfs_tcon)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *); /* query for server interfaces */ int (*query_server_interfaces)(const unsigned int, struct cifs_tcon *, bool); /* check if a path is accessible or not */ int (*is_path_accessible)(const unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const char *); /* query path data from the server */ int (*query_path_info)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, struct cifs_open_info_data *data); /* query file data from the server */ int (*query_file_info)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *cfile, struct cifs_open_info_data *data); /* query reparse point to determine which type of special file */ int (*query_reparse_point)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, u32 *tag, struct kvec *rsp, int *rsp_buftype); /* get server index number */ int (*get_srv_inum)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, u64 *uniqueid, struct cifs_open_info_data *data); /* set size by path */ int (*set_path_size)(const unsigned int, struct cifs_tcon *, const char *, __u64, struct cifs_sb_info *, bool, struct dentry *); /* set size by file handle */ int (*set_file_size)(const unsigned int, struct cifs_tcon *, struct cifsFileInfo *, __u64, bool); /* set attributes */ int (*set_file_info)(struct inode *, const char *, FILE_BASIC_INFO *, const unsigned int); int (*set_compression)(const unsigned int, struct cifs_tcon *, struct cifsFileInfo *); /* check if we can send an echo or nor */ bool (*can_echo)(struct TCP_Server_Info *); /* send echo request */ int (*echo)(struct TCP_Server_Info *); /* create directory */ int (*posix_mkdir)(const unsigned int xid, struct inode *inode, umode_t mode, struct cifs_tcon *tcon, const char *full_path, struct cifs_sb_info *cifs_sb); int (*mkdir)(const unsigned int xid, struct inode *inode, umode_t mode, struct cifs_tcon *tcon, const char *name, struct cifs_sb_info *sb); /* set info on created directory */ void (*mkdir_setinfo)(struct inode *, const char *, struct cifs_sb_info *, struct cifs_tcon *, const unsigned int); /* remove directory */ int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *); /* unlink file */ int (*unlink)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *, struct dentry *); /* open, rename and delete file */ int (*rename_pending_delete)(const char *, struct dentry *, const unsigned int); /* send rename request */ int (*rename)(const unsigned int xid, struct cifs_tcon *tcon, struct dentry *source_dentry, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); /* send create hardlink request */ int (*create_hardlink)(const unsigned int xid, struct cifs_tcon *tcon, struct dentry *source_dentry, const char *from_name, const char *to_name, struct cifs_sb_info *cifs_sb); /* query symlink target */ int (*query_symlink)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, const char *full_path, char **target_path); /* open a file for non-posix mounts */ int (*open)(const unsigned int xid, struct cifs_open_parms *oparms, __u32 *oplock, void *buf); /* set fid protocol-specific info */ void (*set_fid)(struct cifsFileInfo *, struct cifs_fid *, __u32); /* close a file */ int (*close)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* close a file, returning file attributes and timestamps */ int (*close_getattr)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *pfile_info); /* send a flush request to the server */ int (*flush)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* async read from the server */ int (*async_readv)(struct cifs_io_subrequest *); /* async write to the server */ void (*async_writev)(struct cifs_io_subrequest *); /* sync read from the server */ int (*sync_read)(const unsigned int, struct cifs_fid *, struct cifs_io_parms *, unsigned int *, char **, int *); /* sync write to the server */ int (*sync_write)(const unsigned int, struct cifs_fid *, struct cifs_io_parms *, unsigned int *, struct kvec *, unsigned long); /* open dir, start readdir */ int (*query_dir_first)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *, struct cifs_fid *, __u16, struct cifs_search_info *); /* continue readdir */ int (*query_dir_next)(const unsigned int, struct cifs_tcon *, struct cifs_fid *, __u16, struct cifs_search_info *srch_inf); /* close dir */ int (*close_dir)(const unsigned int, struct cifs_tcon *, struct cifs_fid *); /* calculate a size of SMB message */ unsigned int (*calc_smb_size)(void *buf); /* check for STATUS_PENDING and process the response if yes */ bool (*is_status_pending)(char *buf, struct TCP_Server_Info *server); /* check for STATUS_NETWORK_SESSION_EXPIRED */ bool (*is_session_expired)(char *); /* send oplock break response */ int (*oplock_response)(struct cifs_tcon *tcon, __u64 persistent_fid, __u64 volatile_fid, __u16 net_fid, struct cifsInodeInfo *cifs_inode); /* query remote filesystem */ int (*queryfs)(const unsigned int, struct cifs_tcon *, const char *, struct cifs_sb_info *, struct kstatfs *); /* send mandatory brlock to the server */ int (*mand_lock)(const unsigned int, struct cifsFileInfo *, __u64, __u64, __u32, int, int, bool); /* unlock range of mandatory locks */ int (*mand_unlock_range)(struct cifsFileInfo *, struct file_lock *, const unsigned int); /* push brlocks from the cache to the server */ int (*push_mand_locks)(struct cifsFileInfo *); /* get lease key of the inode */ void (*get_lease_key)(struct inode *, struct cifs_fid *); /* set lease key of the inode */ void (*set_lease_key)(struct inode *, struct cifs_fid *); /* generate new lease key */ void (*new_lease_key)(struct cifs_fid *); int (*generate_signingkey)(struct cifs_ses *ses, struct TCP_Server_Info *server); int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *, bool allocate_crypto); int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon, struct cifsFileInfo *src_file); int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon, struct cifsFileInfo *src_file, void __user *); int (*notify)(const unsigned int xid, struct file *pfile, void __user *pbuf, bool return_changes); int (*query_mf_symlink)(unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const unsigned char *, char *, unsigned int *); int (*create_mf_symlink)(unsigned int, struct cifs_tcon *, struct cifs_sb_info *, const unsigned char *, char *, unsigned int *); /* if we can do cache read operations */ bool (*is_read_op)(__u32); /* set oplock level for the inode */ void (*set_oplock_level)(struct cifsInodeInfo *cinode, __u32 oplock, __u16 epoch, bool *purge_cache); /* create lease context buffer for CREATE request */ char * (*create_lease_buf)(u8 *lease_key, u8 oplock, u8 *parent_lease_key, __le32 le_flags); /* parse lease context buffer and return oplock/epoch info */ __u8 (*parse_lease_buf)(void *buf, __u16 *epoch, char *lkey); ssize_t (*copychunk_range)(const unsigned int, struct cifsFileInfo *src_file, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); int (*duplicate_extents)(const unsigned int, struct cifsFileInfo *src, struct cifsFileInfo *target_file, u64 src_off, u64 len, u64 dest_off); int (*validate_negotiate)(const unsigned int, struct cifs_tcon *); ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *, const unsigned char *, const unsigned char *, char *, size_t, struct cifs_sb_info *); int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *, const char *, const void *, const __u16, const struct nls_table *, struct cifs_sb_info *); struct smb_ntsd * (*get_acl)(struct cifs_sb_info *cifssb, struct inode *ino, const char *patch, u32 *plen, u32 info); struct smb_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *cifssmb, const struct cifs_fid *pfid, u32 *plen, u32 info); int (*set_acl)(struct smb_ntsd *pntsd, __u32 len, struct inode *ino, const char *path, int flag); /* writepages retry size */ unsigned int (*wp_retry_size)(struct inode *); /* get mtu credits */ int (*wait_mtu_credits)(struct TCP_Server_Info *, size_t, size_t *, struct cifs_credits *); /* adjust previously taken mtu credits to request size */ int (*adjust_credits)(struct TCP_Server_Info *server, struct cifs_io_subrequest *subreq, unsigned int /*enum smb3_rw_credits_trace*/ trace); /* check if we need to issue closedir */ bool (*dir_needs_close)(struct cifsFileInfo *); long (*fallocate)(struct file *, struct cifs_tcon *, int, loff_t, loff_t); /* init transform (compress/encrypt) request */ int (*init_transform_rq)(struct TCP_Server_Info *, int num_rqst, struct smb_rqst *, struct smb_rqst *); int (*is_transform_hdr)(void *buf); int (*receive_transform)(struct TCP_Server_Info *, struct mid_q_entry **, char **, int *); enum securityEnum (*select_sectype)(struct TCP_Server_Info *, enum securityEnum); int (*next_header)(struct TCP_Server_Info *server, char *buf, unsigned int *noff); /* ioctl passthrough for query_info */ int (*ioctl_query_info)(const unsigned int xid, struct cifs_tcon *tcon, struct cifs_sb_info *cifs_sb, __le16 *path, int is_dir, unsigned long p); /* make unix special files (block, char, fifo, socket) */ int (*make_node)(unsigned int xid, struct inode *inode, struct dentry *dentry, struct cifs_tcon *tcon, const char *full_path, umode_t mode, dev_t device_number); /* version specific fiemap implementation */ int (*fiemap)(struct cifs_tcon *tcon, struct cifsFileInfo *, struct fiemap_extent_info *, u64, u64); /* version specific llseek implementation */ loff_t (*llseek)(struct file *, struct cifs_tcon *, loff_t, int); /* Check for STATUS_IO_TIMEOUT */ bool (*is_status_io_timeout)(char *buf); /* Check for STATUS_NETWORK_NAME_DELETED */ bool (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv); struct reparse_data_buffer * (*get_reparse_point_buffer)(const struct kvec *rsp_iov, u32 *plen); struct inode * (*create_reparse_inode)(struct cifs_open_info_data *data, struct super_block *sb, const unsigned int xid, struct cifs_tcon *tcon, const char *full_path, bool directory, struct kvec *reparse_iov, struct kvec *xattr_iov); }; struct smb_version_values { char *version_string; __u16 protocol_id; __u32 req_capabilities; __u32 large_lock_type; __u32 exclusive_lock_type; __u32 shared_lock_type; __u32 unlock_lock_type; size_t header_preamble_size; size_t header_size; size_t max_header_size; size_t read_rsp_size; __le16 lock_cmd; unsigned int cap_unix; unsigned int cap_nt_find; unsigned int cap_large_files; unsigned int cap_unicode; __u16 signing_enabled; __u16 signing_required; size_t create_lease_size; }; #define HEADER_SIZE(server) (server->vals->header_size) #define MAX_HEADER_SIZE(server) (server->vals->max_header_size) #define HEADER_PREAMBLE_SIZE(server) (server->vals->header_preamble_size) #define MID_HEADER_SIZE(server) (HEADER_SIZE(server) - 1 - HEADER_PREAMBLE_SIZE(server)) /** * CIFS superblock mount flags (mnt_cifs_flags) to consider when * trying to reuse existing superblock for a new mount */ #define CIFS_MOUNT_MASK (CIFS_MOUNT_NO_PERM | CIFS_MOUNT_SET_UID | \ CIFS_MOUNT_SERVER_INUM | CIFS_MOUNT_DIRECT_IO | \ CIFS_MOUNT_NO_XATTR | CIFS_MOUNT_MAP_SPECIAL_CHR | \ CIFS_MOUNT_MAP_SFM_CHR | \ CIFS_MOUNT_UNX_EMUL | CIFS_MOUNT_NO_BRL | \ CIFS_MOUNT_CIFS_ACL | CIFS_MOUNT_OVERR_UID | \ CIFS_MOUNT_OVERR_GID | CIFS_MOUNT_DYNPERM | \ CIFS_MOUNT_NOPOSIXBRL | CIFS_MOUNT_NOSSYNC | \ CIFS_MOUNT_FSCACHE | CIFS_MOUNT_MF_SYMLINKS | \ CIFS_MOUNT_MULTIUSER | CIFS_MOUNT_STRICT_IO | \ CIFS_MOUNT_CIFS_BACKUPUID | CIFS_MOUNT_CIFS_BACKUPGID | \ CIFS_MOUNT_UID_FROM_ACL | CIFS_MOUNT_NO_HANDLE_CACHE | \ CIFS_MOUNT_NO_DFS | CIFS_MOUNT_MODE_FROM_SID | \ CIFS_MOUNT_RO_CACHE | CIFS_MOUNT_RW_CACHE) /** * Generic VFS superblock mount flags (s_flags) to consider when * trying to reuse existing superblock for a new mount */ #define CIFS_MS_MASK (SB_RDONLY | SB_MANDLOCK | SB_NOEXEC | SB_NOSUID | \ SB_NODEV | SB_SYNCHRONOUS) struct cifs_mnt_data { struct cifs_sb_info *cifs_sb; struct smb3_fs_context *ctx; int flags; }; static inline unsigned int get_rfc1002_length(void *buf) { return be32_to_cpu(*((__be32 *)buf)) & 0xffffff; } struct TCP_Server_Info { struct list_head tcp_ses_list; struct list_head smb_ses_list; struct list_head rlist; /* reconnect list */ spinlock_t srv_lock; /* protect anything here that is not protected */ __u64 conn_id; /* connection identifier (useful for debugging) */ int srv_count; /* reference counter */ int rfc1001_sessinit; /* whether to estasblish netbios session */ bool with_rfc1001; /* if netbios session is used */ /* 15 character server name + 0x20 16th byte indicating type = srv */ char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; struct smb_version_operations *ops; struct smb_version_values *vals; /* updates to tcpStatus protected by cifs_tcp_ses_lock */ enum statusEnum tcpStatus; /* what we think the status is */ char *hostname; /* hostname portion of UNC string */ struct socket *ssocket; struct sockaddr_storage dstaddr; struct sockaddr_storage srcaddr; /* locally bind to this IP */ #ifdef CONFIG_NET_NS struct net *net; #endif wait_queue_head_t response_q; wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ spinlock_t mid_queue_lock; /* protect mid queue */ spinlock_t mid_counter_lock; struct list_head pending_mid_q; bool noblocksnd; /* use blocking sendmsg */ bool noautotune; /* do not autotune send buf sizes */ bool nosharesock; bool tcp_nodelay; bool terminate; unsigned int credits; /* send no more requests at once */ unsigned int max_credits; /* can override large 32000 default at mnt */ unsigned int in_flight; /* number of requests on the wire to server */ unsigned int max_in_flight; /* max number of requests that were on wire */ spinlock_t req_lock; /* protect the two values above */ struct mutex _srv_mutex; unsigned int nofs_flag; struct task_struct *tsk; char server_GUID[16]; __u16 sec_mode; bool sign; /* is signing enabled on this connection? */ bool ignore_signature:1; /* skip validation of signatures in SMB2/3 rsp */ bool session_estab; /* mark when very first sess is established */ int echo_credits; /* echo reserved slots */ int oplock_credits; /* oplock break reserved slots */ bool echoes:1; /* enable echoes */ __u8 client_guid[SMB2_CLIENT_GUID_SIZE]; /* Client GUID */ u16 dialect; /* dialect index that server chose */ bool oplocks:1; /* enable oplocks */ unsigned int maxReq; /* Clients should submit no more */ /* than maxReq distinct unanswered SMBs to the server when using */ /* multiplexed reads or writes (for SMB1/CIFS only, not SMB2/SMB3) */ unsigned int maxBuf; /* maxBuf specifies the maximum */ /* message size the server can send or receive for non-raw SMBs */ /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */ /* when socket is setup (and during reconnect) before NegProt sent */ unsigned int max_rw; /* maxRw specifies the maximum */ /* message size the server can send or receive for */ /* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */ unsigned int capabilities; /* selective disabling of caps by smb sess */ int timeAdj; /* Adjust for difference in server time zone in sec */ __u64 current_mid; /* multiplex id - rotating counter, protected by mid_counter_lock */ char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */ /* 16th byte of RFC1001 workstation name is always null */ char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; __u32 sequence_number; /* for signing, protected by srv_mutex */ __u32 reconnect_instance; /* incremented on each reconnect */ __le32 session_key_id; /* retrieved from negotiate response and send in session setup request */ struct session_key session_key; unsigned long lstrp; /* when we got last response from this server */ unsigned long neg_start; /* when negotiate started (jiffies) */ struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */ #define CIFS_NEGFLAVOR_UNENCAP 1 /* wct == 17, but no ext_sec */ #define CIFS_NEGFLAVOR_EXTENDED 2 /* wct == 17, ext_sec bit set */ char negflavor; /* NEGOTIATE response flavor */ /* extended security flavors that server supports */ bool sec_ntlmssp; /* supports NTLMSSP */ bool sec_kerberosu2u; /* supports U2U Kerberos */ bool sec_kerberos; /* supports plain Kerberos */ bool sec_mskerberos; /* supports legacy MS Kerberos */ bool sec_iakerb; /* supports pass-through auth for Kerberos (krb5 proxy) */ bool large_buf; /* is current buffer large? */ /* use SMBD connection instead of socket */ bool rdma; /* point to the SMBD connection if RDMA is used instead of socket */ struct smbd_connection *smbd_conn; struct delayed_work echo; /* echo ping workqueue job */ char *smallbuf; /* pointer to current "small" buffer */ char *bigbuf; /* pointer to current "big" buffer */ /* Total size of this PDU. Only valid from cifs_demultiplex_thread */ unsigned int pdu_size; unsigned int total_read; /* total amount of data read in this pass */ atomic_t in_send; /* requests trying to send */ atomic_t num_waiters; /* blocked waiting to get in sendrecv */ #ifdef CONFIG_CIFS_STATS2 atomic_t num_cmds[NUMBER_OF_SMB2_COMMANDS]; /* total requests by cmd */ atomic_t smb2slowcmd[NUMBER_OF_SMB2_COMMANDS]; /* count resps > 1 sec */ __u64 time_per_cmd[NUMBER_OF_SMB2_COMMANDS]; /* total time per cmd */ __u32 slowest_cmd[NUMBER_OF_SMB2_COMMANDS]; __u32 fastest_cmd[NUMBER_OF_SMB2_COMMANDS]; #endif /* STATS2 */ unsigned int max_read; unsigned int max_write; unsigned int min_offload; /* * If payload is less than or equal to the threshold, * use RDMA send/recv to send upper layer I/O. * If payload is more than the threshold, * use RDMA read/write through memory registration for I/O. */ unsigned int rdma_readwrite_threshold; unsigned int retrans; struct { bool requested; /* "compress" mount option set*/ bool enabled; /* actually negotiated with server */ __le16 alg; /* preferred alg negotiated with server */ } compression; __u16 signing_algorithm; __le16 cipher_type; /* save initial negprot hash */ __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; bool signing_negotiated; /* true if valid signing context rcvd from server */ bool posix_ext_supported; struct delayed_work reconnect; /* reconnect workqueue job */ struct mutex reconnect_mutex; /* prevent simultaneous reconnects */ unsigned long echo_interval; /* * Number of targets available for reconnect. The more targets * the more tasks have to wait to let the demultiplex thread * reconnect. */ int nr_targets; bool noblockcnt; /* use non-blocking connect() */ /* * If this is a session channel, * primary_server holds the ref-counted * pointer to primary channel connection for the session. */ #define SERVER_IS_CHAN(server) (!!(server)->primary_server) struct TCP_Server_Info *primary_server; __u16 channel_sequence_num; /* incremented on primary channel on each chan reconnect */ #ifdef CONFIG_CIFS_SWN_UPCALL bool use_swn_dstaddr; struct sockaddr_storage swn_dstaddr; #endif /* * Canonical DFS referral path used in cifs_reconnect() for failover as * well as in DFS cache refresher. * * format: \\HOST\SHARE[\OPTIONAL PATH] */ char *leaf_fullpath; bool dfs_conn:1; char dns_dom[CIFS_MAX_DOMAINNAME_LEN + 1]; }; static inline bool is_smb1(struct TCP_Server_Info *server) { return HEADER_PREAMBLE_SIZE(server) != 0; } static inline void cifs_server_lock(struct TCP_Server_Info *server) { unsigned int nofs_flag = memalloc_nofs_save(); mutex_lock(&server->_srv_mutex); server->nofs_flag = nofs_flag; } static inline void cifs_server_unlock(struct TCP_Server_Info *server) { unsigned int nofs_flag = server->nofs_flag; mutex_unlock(&server->_srv_mutex); memalloc_nofs_restore(nofs_flag); } struct cifs_credits { unsigned int value; unsigned int instance; unsigned int in_flight_check; unsigned int rreq_debug_id; unsigned int rreq_debug_index; }; static inline unsigned int in_flight(struct TCP_Server_Info *server) { unsigned int num; spin_lock(&server->req_lock); num = server->in_flight; spin_unlock(&server->req_lock); return num; } static inline bool has_credits(struct TCP_Server_Info *server, int *credits, int num_credits) { int num; spin_lock(&server->req_lock); num = *credits; spin_unlock(&server->req_lock); return num >= num_credits; } static inline void add_credits(struct TCP_Server_Info *server, struct cifs_credits *credits, const int optype) { server->ops->add_credits(server, credits, optype); } static inline void add_credits_and_wake_if(struct TCP_Server_Info *server, struct cifs_credits *credits, const int optype) { if (credits->value) { server->ops->add_credits(server, credits, optype); wake_up(&server->request_q); credits->value = 0; } } static inline void set_credits(struct TCP_Server_Info *server, const int val) { server->ops->set_credits(server, val); } static inline int adjust_credits(struct TCP_Server_Info *server, struct cifs_io_subrequest *subreq, unsigned int /* enum smb3_rw_credits_trace */ trace) { return server->ops->adjust_credits ? server->ops->adjust_credits(server, subreq, trace) : 0; } static inline __le64 get_next_mid64(struct TCP_Server_Info *server) { return cpu_to_le64(server->ops->get_next_mid(server)); } static inline __le16 get_next_mid(struct TCP_Server_Info *server) { __u16 mid = server->ops->get_next_mid(server); /* * The value in the SMB header should be little endian for easy * on-the-wire decoding. */ return cpu_to_le16(mid); } static inline void revert_current_mid(struct TCP_Server_Info *server, const unsigned int val) { if (server->ops->revert_current_mid) server->ops->revert_current_mid(server, val); } static inline void revert_current_mid_from_hdr(struct TCP_Server_Info *server, const struct smb2_hdr *shdr) { unsigned int num = le16_to_cpu(shdr->CreditCharge); return revert_current_mid(server, num > 0 ? num : 1); } static inline __u16 get_mid(const struct smb_hdr *smb) { return le16_to_cpu(smb->Mid); } static inline bool compare_mid(__u16 mid, const struct smb_hdr *smb) { return mid == le16_to_cpu(smb->Mid); } /* * When the server supports very large reads and writes via POSIX extensions, * we can allow up to 2^24-1, minus the size of a READ/WRITE_AND_X header, not * including the RFC1001 length. * * Note that this might make for "interesting" allocation problems during * writeback however as we have to allocate an array of pointers for the * pages. A 16M write means ~32kb page array with PAGE_SIZE == 4096. * * For reads, there is a similar problem as we need to allocate an array * of kvecs to handle the receive, though that should only need to be done * once. */ #define CIFS_MAX_WSIZE ((1<<24) - 1 - sizeof(WRITE_REQ) + 4) #define CIFS_MAX_RSIZE ((1<<24) - sizeof(READ_RSP) + 4) /* * When the server doesn't allow large posix writes, only allow a rsize/wsize * of 2^17-1 minus the size of the call header. That allows for a read or * write up to the maximum size described by RFC1002. */ #define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4) #define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4) /* * Windows only supports a max of 60kb reads and 65535 byte writes. Default to * those values when posix extensions aren't in force. In actuality here, we * use 65536 to allow for a write that is a multiple of 4k. Most servers seem * to be ok with the extra byte even though Windows doesn't send writes that * are that large. * * Citation: * * https://blogs.msdn.com/b/openspecification/archive/2009/04/10/smb-maximum-transmit-buffer-size-and-performance-tuning.aspx */ #define CIFS_DEFAULT_NON_POSIX_RSIZE (60 * 1024) #define CIFS_DEFAULT_NON_POSIX_WSIZE (65536) /* * Macros to allow the TCP_Server_Info->net field and related code to drop out * when CONFIG_NET_NS isn't set. */ #ifdef CONFIG_NET_NS static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) { return srv->net; } static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) { srv->net = net; } #else static inline struct net *cifs_net_ns(struct TCP_Server_Info *srv) { return &init_net; } static inline void cifs_set_net_ns(struct TCP_Server_Info *srv, struct net *net) { } #endif struct cifs_server_iface { struct list_head iface_head; struct kref refcount; size_t speed; size_t weight_fulfilled; unsigned int num_channels; unsigned int rdma_capable : 1; unsigned int rss_capable : 1; unsigned int is_active : 1; /* unset if non existent */ struct sockaddr_storage sockaddr; }; /* release iface when last ref is dropped */ static inline void release_iface(struct kref *ref) { struct cifs_server_iface *iface = container_of(ref, struct cifs_server_iface, refcount); kfree(iface); } struct cifs_chan { unsigned int in_reconnect : 1; /* if session setup in progress for this channel */ struct TCP_Server_Info *server; struct cifs_server_iface *iface; /* interface in use */ __u8 signkey[SMB3_SIGN_KEY_SIZE]; }; #define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) #define CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES (0x2) /* * Session structure. One of these for each uid session with a particular host */ struct cifs_ses { struct list_head smb_ses_list; struct list_head rlist; /* reconnect list */ struct list_head tcon_list; struct list_head dlist; /* dfs list */ struct cifs_tcon *tcon_ipc; spinlock_t ses_lock; /* protect anything here that is not protected */ struct mutex session_mutex; struct TCP_Server_Info *server; /* pointer to server info */ int ses_count; /* reference counter */ enum ses_status_enum ses_status; /* updates protected by cifs_tcp_ses_lock */ unsigned int overrideSecFlg; /* if non-zero override global sec flags */ char *serverOS; /* name of operating system underlying server */ char *serverNOS; /* name of network operating system of server */ char *serverDomain; /* security realm of server */ __u64 Suid; /* remote smb uid */ kuid_t linux_uid; /* overriding owner of files on the mount */ kuid_t cred_uid; /* owner of credentials */ unsigned int capabilities; char ip_addr[INET6_ADDRSTRLEN + 1]; /* Max ipv6 (or v4) addr string len */ char *user_name; /* must not be null except during init of sess and after mount option parsing we fill it */ char *domainName; char *password; char *password2; /* When key rotation used, new password may be set before it expires */ char workstation_name[CIFS_MAX_WORKSTATION_LEN]; struct session_key auth_key; struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ enum securityEnum sectype; /* what security flavor was specified? */ enum upcall_target_enum upcall_target; /* what upcall target was specified? */ bool sign; /* is signing required? */ bool domainAuto:1; bool expired_pwd; /* track if access denied or expired pwd so can know if need to update */ int unicode; unsigned int flags; __u16 session_flags; __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE]; __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE]; __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; /* * Network interfaces available on the server this session is * connected to. * * Other channels can be opened by connecting and binding this * session to interfaces from this list. * * iface_lock should be taken when accessing any of these fields */ spinlock_t iface_lock; /* ========= begin: protected by iface_lock ======== */ struct list_head iface_list; size_t iface_count; unsigned long iface_last_update; /* jiffies */ /* ========= end: protected by iface_lock ======== */ spinlock_t chan_lock; /* ========= begin: protected by chan_lock ======== */ #define CIFS_MAX_CHANNELS 16 #define CIFS_INVAL_CHAN_INDEX (-1) #define CIFS_ALL_CHANNELS_SET(ses) \ ((1UL << (ses)->chan_count) - 1) #define CIFS_ALL_CHANS_GOOD(ses) \ (!(ses)->chans_need_reconnect) #define CIFS_ALL_CHANS_NEED_RECONNECT(ses) \ ((ses)->chans_need_reconnect == CIFS_ALL_CHANNELS_SET(ses)) #define CIFS_SET_ALL_CHANS_NEED_RECONNECT(ses) \ ((ses)->chans_need_reconnect = CIFS_ALL_CHANNELS_SET(ses)) #define CIFS_CHAN_NEEDS_RECONNECT(ses, index) \ test_bit((index), &(ses)->chans_need_reconnect) #define CIFS_CHAN_IN_RECONNECT(ses, index) \ ((ses)->chans[(index)].in_reconnect) struct cifs_chan chans[CIFS_MAX_CHANNELS]; size_t chan_count; size_t chan_max; atomic_t chan_seq; /* round robin state */ /* * chans_need_reconnect is a bitmap indicating which of the channels * under this smb session needs to be reconnected. * If not multichannel session, only one bit will be used. * * We will ask for sess and tcon reconnection only if all the * channels are marked for needing reconnection. This will * enable the sessions on top to continue to live till any * of the channels below are active. */ unsigned long chans_need_reconnect; /* ========= end: protected by chan_lock ======== */ struct cifs_ses *dfs_root_ses; struct nls_table *local_nls; char *dns_dom; /* FQDN of the domain */ }; static inline bool cap_unix(struct cifs_ses *ses) { return ses->server->vals->cap_unix & ses->capabilities; } /* * common struct for holding inode info when searching for or updating an * inode with new info */ #define CIFS_FATTR_JUNCTION 0x1 #define CIFS_FATTR_DELETE_PENDING 0x2 #define CIFS_FATTR_NEED_REVAL 0x4 #define CIFS_FATTR_INO_COLLISION 0x8 #define CIFS_FATTR_UNKNOWN_NLINK 0x10 #define CIFS_FATTR_FAKE_ROOT_INO 0x20 struct cifs_fattr { u32 cf_flags; u32 cf_cifsattrs; u64 cf_uniqueid; u64 cf_eof; u64 cf_bytes; u64 cf_createtime; kuid_t cf_uid; kgid_t cf_gid; umode_t cf_mode; dev_t cf_rdev; unsigned int cf_nlink; unsigned int cf_dtype; struct timespec64 cf_atime; struct timespec64 cf_mtime; struct timespec64 cf_ctime; u32 cf_cifstag; char *cf_symlink_target; }; /* * there is one of these for each connection to a resource on a particular * session */ struct cifs_tcon { struct list_head tcon_list; int debug_id; /* Debugging for tracing */ int tc_count; struct list_head rlist; /* reconnect list */ spinlock_t tc_lock; /* protect anything here that is not protected */ atomic_t num_local_opens; /* num of all opens including disconnected */ atomic_t num_remote_opens; /* num of all network opens on server */ struct list_head openFileList; spinlock_t open_file_lock; /* protects list above */ struct cifs_ses *ses; /* pointer to session associated with */ char tree_name[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */ char *nativeFileSystem; char *password; /* for share-level security */ __u32 tid; /* The 4 byte tree id */ __u16 Flags; /* optional support bits */ enum tid_status_enum status; atomic_t num_smbs_sent; union { struct { atomic_t num_writes; atomic_t num_reads; atomic_t num_flushes; atomic_t num_oplock_brks; atomic_t num_opens; atomic_t num_closes; atomic_t num_deletes; atomic_t num_mkdirs; atomic_t num_posixopens; atomic_t num_posixmkdirs; atomic_t num_rmdirs; atomic_t num_renames; atomic_t num_t2renames; atomic_t num_ffirst; atomic_t num_fnext; atomic_t num_fclose; atomic_t num_hardlinks; atomic_t num_symlinks; atomic_t num_locks; atomic_t num_acl_get; atomic_t num_acl_set; } cifs_stats; struct { atomic_t smb2_com_sent[NUMBER_OF_SMB2_COMMANDS]; atomic_t smb2_com_failed[NUMBER_OF_SMB2_COMMANDS]; } smb2_stats; } stats; __u64 bytes_read; __u64 bytes_written; spinlock_t stat_lock; /* protects the two fields above */ time64_t stats_from_time; FILE_SYSTEM_DEVICE_INFO fsDevInfo; FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */ FILE_SYSTEM_UNIX_INFO fsUnixInfo; bool ipc:1; /* set if connection to IPC$ share (always also pipe) */ bool pipe:1; /* set if connection to pipe share */ bool print:1; /* set if connection to printer share */ bool retry:1; bool nocase:1; bool nohandlecache:1; /* if strange server resource prob can turn off */ bool nodelete:1; bool seal:1; /* transport encryption for this mounted share */ bool unix_ext:1; /* if false disable Linux extensions to CIFS protocol for this mount even if server would support */ bool posix_extensions; /* if true SMB3.11 posix extensions enabled */ bool local_lease:1; /* check leases (only) on local system not remote */ bool broken_posix_open; /* e.g. Samba server versions < 3.3.2, 3.2.9 */ bool broken_sparse_sup; /* if server or share does not support sparse */ bool need_reconnect:1; /* connection reset, tid now invalid */ bool need_reopen_files:1; /* need to reopen tcon file handles */ bool use_resilient:1; /* use resilient instead of durable handles */ bool use_persistent:1; /* use persistent instead of durable handles */ bool no_lease:1; /* Do not request leases on files or directories */ bool use_witness:1; /* use witness protocol */ bool dummy:1; /* dummy tcon used for reconnecting channels */ __le32 capabilities; __u32 share_flags; __u32 maximal_access; __u32 vol_serial_number; __le64 vol_create_time; __u64 snapshot_time; /* for timewarp tokens - timestamp of snapshot */ __u32 handle_timeout; /* persistent and durable handle timeout in ms */ __u32 ss_flags; /* sector size flags */ __u32 perf_sector_size; /* best sector size for perf */ __u32 max_chunks; __u32 max_bytes_chunk; __u32 max_bytes_copy; __u32 max_cached_dirs; #ifdef CONFIG_CIFS_FSCACHE u64 resource_id; /* server resource id */ bool fscache_acquired; /* T if we've tried acquiring a cookie */ struct fscache_volume *fscache; /* cookie for share */ struct mutex fscache_lock; /* Prevent regetting a cookie */ #endif struct list_head pending_opens; /* list of incomplete opens */ struct cached_fids *cfids; struct list_head cifs_sb_list; spinlock_t sb_list_lock; #ifdef CONFIG_CIFS_DFS_UPCALL struct delayed_work dfs_cache_work; struct list_head dfs_ses_list; #endif struct delayed_work query_interfaces; /* query interfaces workqueue job */ char *origin_fullpath; /* canonical copy of smb3_fs_context::source */ }; /* * This is a refcounted and timestamped container for a tcon pointer. The * container holds a tcon reference. It is considered safe to free one of * these when the tl_count goes to 0. The tl_time is the time of the last * "get" on the container. */ struct tcon_link { struct rb_node tl_rbnode; kuid_t tl_uid; unsigned long tl_flags; #define TCON_LINK_MASTER 0 #define TCON_LINK_PENDING 1 #define TCON_LINK_IN_TREE 2 unsigned long tl_time; atomic_t tl_count; struct cifs_tcon *tl_tcon; }; extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb); extern void smb3_free_compound_rqst(int num_rqst, struct smb_rqst *rqst); static inline struct cifs_tcon * tlink_tcon(struct tcon_link *tlink) { return tlink->tl_tcon; } static inline struct tcon_link * cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb) { return cifs_sb->master_tlink; } extern void cifs_put_tlink(struct tcon_link *tlink); static inline struct tcon_link * cifs_get_tlink(struct tcon_link *tlink) { if (tlink && !IS_ERR(tlink)) atomic_inc(&tlink->tl_count); return tlink; } /* This function is always expected to succeed */ extern struct cifs_tcon *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); #define CIFS_OPLOCK_NO_CHANGE 0xfe struct cifs_pending_open { struct list_head olist; struct tcon_link *tlink; __u8 lease_key[16]; __u32 oplock; }; struct cifs_deferred_close { struct list_head dlist; struct tcon_link *tlink; __u16 netfid; __u64 persistent_fid; __u64 volatile_fid; }; /* * This info hangs off the cifsFileInfo structure, pointed to by llist. * This is used to track byte stream locks on the file */ struct cifsLockInfo { struct list_head llist; /* pointer to next cifsLockInfo */ struct list_head blist; /* pointer to locks blocked on this */ wait_queue_head_t block_q; __u64 offset; __u64 length; __u32 pid; __u16 type; __u16 flags; }; /* * One of these for each open instance of a file */ struct cifs_search_info { loff_t index_of_last_entry; __u16 entries_in_buffer; __u16 info_level; __u32 resume_key; char *ntwrk_buf_start; char *srch_entries_start; char *last_entry; const char *presume_name; unsigned int resume_name_len; bool endOfSearch:1; bool emptyDir:1; bool unicode:1; bool smallBuf:1; /* so we know which buf_release function to call */ }; #define ACL_NO_MODE ((umode_t)(-1)) struct cifs_open_parms { struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; int disposition; int desired_access; int create_options; const char *path; struct cifs_fid *fid; umode_t mode; bool reconnect:1; bool replay:1; /* indicates that this open is for a replay */ struct kvec *ea_cctx; __le32 lease_flags; }; struct cifs_fid { __u16 netfid; __u64 persistent_fid; /* persist file id for smb2 */ __u64 volatile_fid; /* volatile file id for smb2 */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for smb2 */ __u8 parent_lease_key[SMB2_LEASE_KEY_SIZE]; __u8 create_guid[16]; __u32 access; struct cifs_pending_open *pending_open; __u16 epoch; #ifdef CONFIG_CIFS_DEBUG2 __u64 mid; #endif /* CIFS_DEBUG2 */ bool purge_cache; }; struct cifs_fid_locks { struct list_head llist; struct cifsFileInfo *cfile; /* fid that owns locks */ struct list_head locks; /* locks held by fid above */ }; struct cifsFileInfo { /* following two lists are protected by tcon->open_file_lock */ struct list_head tlist; /* pointer to next fid owned by tcon */ struct list_head flist; /* next fid (file instance) for this inode */ /* lock list below protected by cifsi->lock_sem */ struct cifs_fid_locks *llist; /* brlocks held by this fid */ kuid_t uid; /* allows finding which FileInfo structure */ __u32 pid; /* process id who opened file */ struct cifs_fid fid; /* file id from remote */ struct list_head rlist; /* reconnect list */ /* BB add lock scope info here if needed */ /* lock scope id (0 if none) */ struct dentry *dentry; struct tcon_link *tlink; unsigned int f_flags; bool invalidHandle:1; /* file closed via session abend */ bool swapfile:1; bool oplock_break_cancelled:1; bool status_file_deleted:1; /* file has been deleted */ bool offload:1; /* offload final part of _put to a wq */ __u16 oplock_epoch; /* epoch from the lease break */ __u32 oplock_level; /* oplock/lease level from the lease break */ int count; spinlock_t file_info_lock; /* protects four flag/count fields above */ struct mutex fh_mutex; /* prevents reopen race after dead ses*/ struct cifs_search_info srch_inf; struct work_struct oplock_break; /* work for oplock breaks */ struct work_struct put; /* work for the final part of _put */ struct work_struct serverclose; /* work for serverclose */ struct delayed_work deferred; bool deferred_close_scheduled; /* Flag to indicate close is scheduled */ char *symlink_target; }; struct cifs_io_parms { __u16 netfid; __u64 persistent_fid; /* persist file id for smb2 */ __u64 volatile_fid; /* volatile file id for smb2 */ __u32 pid; __u64 offset; unsigned int length; struct cifs_tcon *tcon; struct TCP_Server_Info *server; }; struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; pid_t pid; }; /* asynchronous read support */ struct cifs_io_subrequest { union { struct netfs_io_subrequest subreq; struct netfs_io_request *rreq; struct cifs_io_request *req; }; ssize_t got_bytes; unsigned int xid; int result; bool have_xid; bool replay; struct kvec iov[2]; struct TCP_Server_Info *server; #ifdef CONFIG_CIFS_SMB_DIRECT struct smbdirect_mr_io *mr; #endif struct cifs_credits credits; }; /* * Take a reference on the file private data. Must be called with * cfile->file_info_lock held. */ static inline void cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file) { ++cifs_file->count; } struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file); void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr, bool offload); void cifsFileInfo_put(struct cifsFileInfo *cifs_file); int cifs_file_flush(const unsigned int xid, struct inode *inode, struct cifsFileInfo *cfile); int cifs_file_set_size(const unsigned int xid, struct dentry *dentry, const char *full_path, struct cifsFileInfo *open_file, loff_t size); #define CIFS_CACHE_READ_FLG 1 #define CIFS_CACHE_HANDLE_FLG 2 #define CIFS_CACHE_RH_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_HANDLE_FLG) #define CIFS_CACHE_WRITE_FLG 4 #define CIFS_CACHE_RW_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_WRITE_FLG) #define CIFS_CACHE_RHW_FLG (CIFS_CACHE_RW_FLG | CIFS_CACHE_HANDLE_FLG) #define CIFS_CACHE_READ(cinode) ((cinode->oplock & CIFS_CACHE_READ_FLG) || (CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE)) #define CIFS_CACHE_HANDLE(cinode) (cinode->oplock & CIFS_CACHE_HANDLE_FLG) #define CIFS_CACHE_WRITE(cinode) ((cinode->oplock & CIFS_CACHE_WRITE_FLG) || (CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE)) /* * One of these for each file inode */ struct cifsInodeInfo { struct netfs_inode netfs; /* Netfslib context and vfs inode */ bool can_cache_brlcks; struct list_head llist; /* locks helb by this inode */ /* * NOTE: Some code paths call down_read(lock_sem) twice, so * we must always use cifs_down_write() instead of down_write() * for this semaphore to avoid deadlocks. */ struct rw_semaphore lock_sem; /* protect the fields above */ /* BB add in lists for dirty pages i.e. write caching info for oplock */ struct list_head openFileList; spinlock_t open_file_lock; /* protects openFileList */ __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */ unsigned int oplock; /* oplock/lease level we have */ __u16 epoch; /* used to track lease state changes */ #define CIFS_INODE_PENDING_OPLOCK_BREAK (0) /* oplock break in progress */ #define CIFS_INODE_PENDING_WRITERS (1) /* Writes in progress */ #define CIFS_INODE_FLAG_UNUSED (2) /* Unused flag */ #define CIFS_INO_DELETE_PENDING (3) /* delete pending on server */ #define CIFS_INO_INVALID_MAPPING (4) /* pagecache is invalid */ #define CIFS_INO_LOCK (5) /* lock bit for synchronization */ #define CIFS_INO_CLOSE_ON_LOCK (7) /* Not to defer the close when lock is set */ unsigned long flags; spinlock_t writers_lock; unsigned int writers; /* Number of writers on this inode */ unsigned long time; /* jiffies of last update of inode */ u64 uniqueid; /* server inode number */ u64 createtime; /* creation time on server */ __u8 lease_key[SMB2_LEASE_KEY_SIZE]; /* lease key for this inode */ struct list_head deferred_closes; /* list of deferred closes */ spinlock_t deferred_lock; /* protection on deferred list */ bool lease_granted; /* Flag to indicate whether lease or oplock is granted. */ char *symlink_target; __u32 reparse_tag; }; static inline struct cifsInodeInfo * CIFS_I(struct inode *inode) { return container_of(inode, struct cifsInodeInfo, netfs.inode); } static inline struct cifs_sb_info * CIFS_SB(struct super_block *sb) { return sb->s_fs_info; } static inline struct cifs_sb_info * CIFS_FILE_SB(struct file *file) { return CIFS_SB(file_inode(file)->i_sb); } static inline char CIFS_DIR_SEP(const struct cifs_sb_info *cifs_sb) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) return '/'; else return '\\'; } static inline void convert_delimiter(char *path, char delim) { char old_delim, *pos; if (delim == '/') old_delim = '\\'; else old_delim = '/'; pos = path; while ((pos = strchr(pos, old_delim))) *pos = delim; } #define cifs_stats_inc atomic_inc static inline void cifs_stats_bytes_written(struct cifs_tcon *tcon, unsigned int bytes) { if (bytes) { spin_lock(&tcon->stat_lock); tcon->bytes_written += bytes; spin_unlock(&tcon->stat_lock); } } static inline void cifs_stats_bytes_read(struct cifs_tcon *tcon, unsigned int bytes) { spin_lock(&tcon->stat_lock); tcon->bytes_read += bytes; spin_unlock(&tcon->stat_lock); } /* * This is the prototype for the mid receive function. This function is for * receiving the rest of the SMB frame, starting with the WordCount (which is * just after the MID in struct smb_hdr). Note: * * - This will be called by cifsd, with no locks held. * - The mid will still be on the pending_mid_q. * - mid->resp_buf will point to the current buffer. * * Returns zero on a successful receive, or an error. The receive state in * the TCP_Server_Info will also be updated. */ typedef int (mid_receive_t)(struct TCP_Server_Info *server, struct mid_q_entry *mid); /* * This is the prototype for the mid callback function. This is called once the * mid has been received off of the socket. When creating one, take special * care to avoid deadlocks. Things to bear in mind: * * - it will be called by cifsd, with no locks held * - the mid will be removed from any lists */ typedef void (mid_callback_t)(struct mid_q_entry *mid); /* * This is the protopyte for mid handle function. This is called once the mid * has been recognized after decryption of the message. */ typedef int (mid_handle_t)(struct TCP_Server_Info *server, struct mid_q_entry *mid); /* one of these for every pending CIFS request to the server */ struct mid_q_entry { struct list_head qhead; /* mids waiting on reply from this server */ struct kref refcount; struct TCP_Server_Info *server; /* server corresponding to this mid */ __u64 mid; /* multiplex id */ __u16 credits; /* number of credits consumed by this mid */ __u16 credits_received; /* number of credits from the response */ __u32 pid; /* process id */ __u32 sequence_number; /* for CIFS signing */ unsigned long when_alloc; /* when mid was created */ #ifdef CONFIG_CIFS_STATS2 unsigned long when_sent; /* time when smb send finished */ unsigned long when_received; /* when demux complete (taken off wire) */ #endif mid_receive_t *receive; /* call receive callback */ mid_callback_t *callback; /* call completion callback */ mid_handle_t *handle; /* call handle mid callback */ void *callback_data; /* general purpose pointer for callback */ struct task_struct *creator; void *resp_buf; /* pointer to received SMB header */ unsigned int resp_buf_size; int mid_state; /* wish this were enum but can not pass to wait_event */ int mid_rc; /* rc for MID_RC */ __le16 command; /* smb command code */ unsigned int optype; /* operation type */ spinlock_t mid_lock; bool wait_cancelled:1; /* Cancelled while waiting for response */ bool deleted_from_q:1; /* Whether Mid has been dequeued frem pending_mid_q */ bool large_buf:1; /* if valid response, is pointer to large buf */ bool multiRsp:1; /* multiple trans2 responses for one request */ bool multiEnd:1; /* both received */ bool decrypted:1; /* decrypted entry */ }; struct close_cancelled_open { struct cifs_fid fid; struct cifs_tcon *tcon; struct work_struct work; __u64 mid; __u16 cmd; }; /* Make code in transport.c a little cleaner by moving update of optional stats into function below */ static inline void cifs_in_send_inc(struct TCP_Server_Info *server) { atomic_inc(&server->in_send); } static inline void cifs_in_send_dec(struct TCP_Server_Info *server) { atomic_dec(&server->in_send); } static inline void cifs_num_waiters_inc(struct TCP_Server_Info *server) { atomic_inc(&server->num_waiters); } static inline void cifs_num_waiters_dec(struct TCP_Server_Info *server) { atomic_dec(&server->num_waiters); } #ifdef CONFIG_CIFS_STATS2 static inline void cifs_save_when_sent(struct mid_q_entry *mid) { mid->when_sent = jiffies; } #else static inline void cifs_save_when_sent(struct mid_q_entry *mid) { } #endif /* for pending dnotify requests */ struct dir_notify_req { struct list_head lhead; __le16 Pid; __le16 PidHigh; __u16 Mid; __u16 Tid; __u16 Uid; __u16 netfid; __u32 filter; /* CompletionFilter (for multishot) */ int multishot; struct file *pfile; }; struct dfs_info3_param { int flags; /* DFSREF_REFERRAL_SERVER, DFSREF_STORAGE_SERVER*/ int path_consumed; int server_type; int ref_flag; char *path_name; char *node_name; int ttl; }; struct file_list { struct list_head list; struct cifsFileInfo *cfile; }; struct cifs_mount_ctx { struct cifs_sb_info *cifs_sb; struct smb3_fs_context *fs_ctx; unsigned int xid; struct TCP_Server_Info *server; struct cifs_ses *ses; struct cifs_tcon *tcon; }; static inline void __free_dfs_info_param(struct dfs_info3_param *param) { kfree(param->path_name); kfree(param->node_name); } static inline void free_dfs_info_param(struct dfs_info3_param *param) { if (param) __free_dfs_info_param(param); } static inline void zfree_dfs_info_param(struct dfs_info3_param *param) { if (param) { __free_dfs_info_param(param); memset(param, 0, sizeof(*param)); } } static inline void free_dfs_info_array(struct dfs_info3_param *param, int number_of_items) { int i; if ((number_of_items == 0) || (param == NULL)) return; for (i = 0; i < number_of_items; i++) { kfree(param[i].path_name); kfree(param[i].node_name); } kfree(param); } static inline bool is_interrupt_error(int error) { switch (error) { case -EINTR: case -ERESTARTSYS: case -ERESTARTNOHAND: case -ERESTARTNOINTR: return true; } return false; } static inline bool is_retryable_error(int error) { if (is_interrupt_error(error) || error == -EAGAIN) return true; return false; } static inline bool is_replayable_error(int error) { if (error == -EAGAIN || error == -ECONNABORTED) return true; return false; } /* cifs_get_writable_file() flags */ enum cifs_writable_file_flags { FIND_WR_ANY = 0U, FIND_WR_FSUID_ONLY = (1U << 0), FIND_WR_WITH_DELETE = (1U << 1), FIND_WR_NO_PENDING_DELETE = (1U << 2), }; #define MID_FREE 0 #define MID_REQUEST_ALLOCATED 1 #define MID_REQUEST_SUBMITTED 2 #define MID_RESPONSE_RECEIVED 4 #define MID_RETRY_NEEDED 8 /* session closed while this request out */ #define MID_RESPONSE_MALFORMED 0x10 #define MID_SHUTDOWN 0x20 #define MID_RESPONSE_READY 0x40 /* ready for other process handle the rsp */ #define MID_RC 0x80 /* mid_rc contains custom rc */ /* Types of response buffer returned from SendReceive2 */ #define CIFS_NO_BUFFER 0 /* Response buffer not returned */ #define CIFS_SMALL_BUFFER 1 #define CIFS_LARGE_BUFFER 2 #define CIFS_IOVEC 4 /* array of response buffers */ /* Type of Request to SendReceive2 */ #define CIFS_BLOCKING_OP 1 /* operation can block */ #define CIFS_NON_BLOCKING 2 /* do not block waiting for credits */ #define CIFS_TIMEOUT_MASK 0x003 /* only one of above set in req */ #define CIFS_LOG_ERROR 0x010 /* log NT STATUS if non-zero */ #define CIFS_LARGE_BUF_OP 0x020 /* large request buffer */ #define CIFS_NO_RSP_BUF 0x040 /* no response buffer required */ /* Type of request operation */ #define CIFS_ECHO_OP 0x080 /* echo request */ #define CIFS_OBREAK_OP 0x0100 /* oplock break request */ #define CIFS_NEG_OP 0x0200 /* negotiate request */ #define CIFS_CP_CREATE_CLOSE_OP 0x0400 /* compound create+close request */ /* Lower bitmask values are reserved by others below. */ #define CIFS_SESS_OP 0x2000 /* session setup request */ #define CIFS_OP_MASK 0x2780 /* mask request type */ #define CIFS_HAS_CREDITS 0x0400 /* already has credits */ #define CIFS_TRANSFORM_REQ 0x0800 /* transform request before sending */ #define CIFS_NO_SRV_RSP 0x1000 /* there is no server response */ #define CIFS_COMPRESS_REQ 0x4000 /* compress request before sending */ /* Security Flags: indicate type of session setup needed */ #define CIFSSEC_MAY_SIGN 0x00001 #define CIFSSEC_MAY_NTLMV2 0x00004 #define CIFSSEC_MAY_KRB5 0x00008 #define CIFSSEC_MAY_SEAL 0x00040 #define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_MUST_SIGN 0x01001 /* note that only one of the following can be set so the result of setting MUST flags more than once will be to require use of the stronger protocol */ #define CIFSSEC_MUST_NTLMV2 0x04004 #define CIFSSEC_MUST_KRB5 0x08008 #ifdef CONFIG_CIFS_UPCALL #define CIFSSEC_MASK 0xCF0CF /* flags supported if no weak allowed */ #else #define CIFSSEC_MASK 0xC70C7 /* flags supported if no weak allowed */ #endif /* UPCALL */ #define CIFSSEC_MUST_SEAL 0x40040 #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL) #define CIFSSEC_MAX (CIFSSEC_MAY_SIGN | CIFSSEC_MUST_KRB5 | CIFSSEC_MAY_SEAL) #define CIFSSEC_AUTH_MASK (CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_KRB5 | CIFSSEC_MAY_NTLMSSP) /* ***************************************************************** * All constants go here ***************************************************************** */ #define UID_HASH (16) /* * Note that ONE module should define _DECLARE_GLOBALS_HERE to cause the * following to be declared. */ /**************************************************************************** * Here are all the locks (spinlock, mutex, semaphore) in cifs.ko, arranged according * to the locking order. i.e. if two locks are to be held together, the lock that * appears higher in this list needs to be taken before the other. * * If you hold a lock that is lower in this list, and you need to take a higher lock * (or if you think that one of the functions that you're calling may need to), first * drop the lock you hold, pick up the higher lock, then the lower one. This will * ensure that locks are picked up only in one direction in the below table * (top to bottom). * * Also, if you expect a function to be called with a lock held, explicitly document * this in the comments on top of your function definition. * * And also, try to keep the critical sections (lock hold time) to be as minimal as * possible. Blocking / calling other functions with a lock held always increase * the risk of a possible deadlock. * * Following this rule will avoid unnecessary deadlocks, which can get really hard to * debug. Also, any new lock that you introduce, please add to this list in the correct * order. * * Please populate this list whenever you introduce new locks in your changes. Or in * case I've missed some existing locks. Please ensure that it's added in the list * based on the locking order expected. * * ===================================================================================== * Lock Protects Initialization fn * ===================================================================================== * vol_list_lock * vol_info->ctx_lock vol_info->ctx * cifs_sb_info->tlink_tree_lock cifs_sb_info->tlink_tree cifs_setup_cifs_sb * TCP_Server_Info-> TCP_Server_Info cifs_get_tcp_session * reconnect_mutex * TCP_Server_Info->srv_mutex TCP_Server_Info cifs_get_tcp_session * cifs_ses->session_mutex cifs_ses sesInfoAlloc * cifs_tcon->open_file_lock cifs_tcon->openFileList tconInfoAlloc * cifs_tcon->pending_opens * cifs_tcon->stat_lock cifs_tcon->bytes_read tconInfoAlloc * cifs_tcon->bytes_written * cifs_tcp_ses_lock cifs_tcp_ses_list sesInfoAlloc * GlobalMid_Lock GlobalMaxActiveXid init_cifs * GlobalCurrentXid * GlobalTotalActiveXid * TCP_Server_Info->srv_lock (anything in struct not protected by another lock and can change) * TCP_Server_Info->mid_queue_lock TCP_Server_Info->pending_mid_q cifs_get_tcp_session * mid_q_entry->deleted_from_q * TCP_Server_Info->mid_counter_lock TCP_Server_Info->current_mid cifs_get_tcp_session * TCP_Server_Info->req_lock TCP_Server_Info->in_flight cifs_get_tcp_session * ->credits * ->echo_credits * ->oplock_credits * ->reconnect_instance * cifs_ses->ses_lock (anything that is not protected by another lock and can change) * sesInfoAlloc * cifs_ses->iface_lock cifs_ses->iface_list sesInfoAlloc * ->iface_count * ->iface_last_update * cifs_ses->chan_lock cifs_ses->chans sesInfoAlloc * ->chans_need_reconnect * ->chans_in_reconnect * cifs_tcon->tc_lock (anything that is not protected by another lock and can change) * tcon_info_alloc * inode->i_rwsem, taken by fs/netfs/locking.c e.g. should be taken before cifsInodeInfo locks * cifsInodeInfo->open_file_lock cifsInodeInfo->openFileList cifs_alloc_inode * cifsInodeInfo->writers_lock cifsInodeInfo->writers cifsInodeInfo_alloc * cifsInodeInfo->lock_sem cifsInodeInfo->llist cifs_init_once * ->can_cache_brlcks * cifsInodeInfo->deferred_lock cifsInodeInfo->deferred_closes cifsInodeInfo_alloc * cached_fids->cfid_list_lock cifs_tcon->cfids->entries init_cached_dirs * cached_fid->fid_lock (anything that is not protected by another lock and can change) * init_cached_dir * cifsFileInfo->fh_mutex cifsFileInfo cifs_new_fileinfo * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search * ->oplock_break_cancelled * mid_q_entry->mid_lock mid_q_entry->callback alloc_mid * smb2_mid_entry_alloc * (Any fields of mid_q_entry that will need protection) ****************************************************************************/ #ifdef DECLARE_GLOBALS_HERE #define GLOBAL_EXTERN #else #define GLOBAL_EXTERN extern #endif /* * the list of TCP_Server_Info structures, ie each of the sockets * connecting our client to a distinct server (ip address), is * chained together by cifs_tcp_ses_list. The list of all our SMB * sessions (and from that the tree connections) can be found * by iterating over cifs_tcp_ses_list */ extern struct list_head cifs_tcp_ses_list; /* * This lock protects the cifs_tcp_ses_list, the list of smb sessions per * tcp session, and the list of tcon's per smb session. It also protects * the reference counters for the server, smb session, and tcon. * generally the locks should be taken in order tcp_ses_lock before * tcon->open_file_lock and that before file->file_info_lock since the * structure order is cifs_socket-->cifs_ses-->cifs_tcon-->cifs_file */ extern spinlock_t cifs_tcp_ses_lock; /* * Global transaction id (XID) information */ extern unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */ extern unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */ extern unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */ extern spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ /* * Global counters, updated atomically */ extern atomic_t sesInfoAllocCount; extern atomic_t tconInfoAllocCount; extern atomic_t tcpSesNextId; extern atomic_t tcpSesAllocCount; extern atomic_t tcpSesReconnectCount; extern atomic_t tconInfoReconnectCount; /* Various Debug counters */ extern atomic_t buf_alloc_count; /* current number allocated */ extern atomic_t small_buf_alloc_count; #ifdef CONFIG_CIFS_STATS2 extern atomic_t total_buf_alloc_count; /* total allocated over all time */ extern atomic_t total_small_buf_alloc_count; extern unsigned int slow_rsp_threshold; /* number of secs before logging */ #endif /* Misc globals */ extern bool enable_oplocks; /* enable or disable oplocks */ extern bool lookupCacheEnabled; extern unsigned int global_secflags; /* if on, session setup sent with more secure ntlmssp2 challenge/resp */ extern unsigned int sign_CIFS_PDUs; /* enable smb packet signing */ extern bool enable_gcm_256; /* allow optional negotiate of strongest signing (aes-gcm-256) */ extern bool require_gcm_256; /* require use of strongest signing (aes-gcm-256) */ extern bool enable_negotiate_signing; /* request use of faster (GMAC) signing if available */ extern bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/ extern unsigned int CIFSMaxBufSize; /* max size not including hdr */ extern unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */ extern unsigned int cifs_min_small; /* min size of small buf pool */ extern unsigned int cifs_max_pending; /* MAX requests at once to server*/ extern unsigned int dir_cache_timeout; /* max time for directory lease caching of dir */ extern bool disable_legacy_dialects; /* forbid vers=1.0 and vers=2.0 mounts */ extern atomic_t mid_count; void cifs_oplock_break(struct work_struct *work); void cifs_queue_oplock_break(struct cifsFileInfo *cfile); void smb2_deferred_work_close(struct work_struct *work); extern const struct slow_work_ops cifs_oplock_break_ops; extern struct workqueue_struct *cifsiod_wq; extern struct workqueue_struct *decrypt_wq; extern struct workqueue_struct *fileinfo_put_wq; extern struct workqueue_struct *cifsoplockd_wq; extern struct workqueue_struct *deferredclose_wq; extern struct workqueue_struct *serverclose_wq; extern struct workqueue_struct *cfid_put_wq; extern __u32 cifs_lock_secret; extern mempool_t *cifs_sm_req_poolp; extern mempool_t *cifs_req_poolp; extern mempool_t *cifs_mid_poolp; extern mempool_t cifs_io_request_pool; extern mempool_t cifs_io_subrequest_pool; /* Operations for different SMB versions */ #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY extern struct smb_version_operations smb1_operations; extern struct smb_version_values smb1_values; extern struct smb_version_operations smb20_operations; extern struct smb_version_values smb20_values; #endif /* CIFS_ALLOW_INSECURE_LEGACY */ extern struct smb_version_operations smb21_operations; extern struct smb_version_values smb21_values; extern struct smb_version_values smbdefault_values; extern struct smb_version_values smb3any_values; extern struct smb_version_operations smb30_operations; extern struct smb_version_values smb30_values; /*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */ extern struct smb_version_values smb302_values; extern struct smb_version_operations smb311_operations; extern struct smb_version_values smb311_values; static inline char *get_security_type_str(enum securityEnum sectype) { switch (sectype) { case RawNTLMSSP: return "RawNTLMSSP"; case Kerberos: return "Kerberos"; case NTLMv2: return "NTLMv2"; case IAKerb: return "IAKerb"; default: return "Unknown"; } } static inline bool is_smb1_server(struct TCP_Server_Info *server) { return strcmp(server->vals->version_string, SMB1_VERSION_STRING) == 0; } static inline bool is_tcon_dfs(struct cifs_tcon *tcon) { /* * For SMB1, see MS-CIFS 2.4.55 SMB_COM_TREE_CONNECT_ANDX (0x75) and MS-CIFS 3.3.4.4 DFS * Subsystem Notifies That a Share Is a DFS Share. * * For SMB2+, see MS-SMB2 2.2.10 SMB2 TREE_CONNECT Response and MS-SMB2 3.3.4.14 Server * Application Updates a Share. */ if (!tcon || !tcon->ses || !tcon->ses->server) return false; return is_smb1_server(tcon->ses->server) ? tcon->Flags & SMB_SHARE_IS_IN_DFS : tcon->share_flags & (SHI1005_FLAGS_DFS | SHI1005_FLAGS_DFS_ROOT); } static inline bool cifs_is_referral_server(struct cifs_tcon *tcon, const struct dfs_info3_param *ref) { /* * Check if all targets are capable of handling DFS referrals as per * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL. */ return is_tcon_dfs(tcon) || (ref && (ref->flags & DFSREF_REFERRAL_SERVER)); } static inline u64 cifs_flock_len(const struct file_lock *fl) { return (u64)fl->fl_end - fl->fl_start + 1; } static inline size_t ntlmssp_workstation_name_size(const struct cifs_ses *ses) { if (WARN_ON_ONCE(!ses || !ses->server)) return 0; /* * Make workstation name no more than 15 chars when using insecure dialects as some legacy * servers do require it during NTLMSSP. */ if (ses->server->dialect <= SMB20_PROT_ID) return min_t(size_t, sizeof(ses->workstation_name), RFC1001_NAME_LEN_WITH_NULL); return sizeof(ses->workstation_name); } static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const FILE_ALL_INFO *src) { memcpy(dst, src, (size_t)((u8 *)&src->EASize - (u8 *)src)); dst->IndexNumber = 0; dst->EASize = src->EASize; dst->AccessFlags = 0; dst->CurrentByteOffset = 0; dst->Mode = 0; dst->AlignmentRequirement = 0; dst->FileNameLength = src->FileNameLength; } static inline int cifs_get_num_sgs(const struct smb_rqst *rqst, int num_rqst, const u8 *sig) { unsigned int len, skip; unsigned int nents = 0; unsigned long addr; size_t data_size; int i, j; /* * The first rqst has a transform header where the first 20 bytes are * not part of the encrypted blob. */ skip = 20; /* Assumes the first rqst has a transform header as the first iov. * I.e. * rqst[0].rq_iov[0] is transform header * rqst[0].rq_iov[1+] data to be encrypted/decrypted * rqst[1+].rq_iov[0+] data to be encrypted/decrypted */ for (i = 0; i < num_rqst; i++) { data_size = iov_iter_count(&rqst[i].rq_iter); /* We really don't want a mixture of pinned and unpinned pages * in the sglist. It's hard to keep track of which is what. * Instead, we convert to a BVEC-type iterator higher up. */ if (data_size && WARN_ON_ONCE(user_backed_iter(&rqst[i].rq_iter))) return -EIO; /* We also don't want to have any extra refs or pins to clean * up in the sglist. */ if (data_size && WARN_ON_ONCE(iov_iter_extract_will_pin(&rqst[i].rq_iter))) return -EIO; for (j = 0; j < rqst[i].rq_nvec; j++) { struct kvec *iov = &rqst[i].rq_iov[j]; addr = (unsigned long)iov->iov_base + skip; if (is_vmalloc_or_module_addr((void *)addr)) { len = iov->iov_len - skip; nents += DIV_ROUND_UP(offset_in_page(addr) + len, PAGE_SIZE); } else { nents++; } skip = 0; } if (data_size) nents += iov_iter_npages(&rqst[i].rq_iter, INT_MAX); } nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE); return nents; } /* We can not use the normal sg_set_buf() as we will sometimes pass a * stack object as buf. */ static inline void cifs_sg_set_buf(struct sg_table *sgtable, const void *buf, unsigned int buflen) { unsigned long addr = (unsigned long)buf; unsigned int off = offset_in_page(addr); addr &= PAGE_MASK; if (is_vmalloc_or_module_addr((void *)addr)) { do { unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off); sg_set_page(&sgtable->sgl[sgtable->nents++], vmalloc_to_page((void *)addr), len, off); off = 0; addr += PAGE_SIZE; buflen -= len; } while (buflen); } else { sg_set_page(&sgtable->sgl[sgtable->nents++], virt_to_page((void *)addr), buflen, off); } } #define CIFS_OPARMS(_cifs_sb, _tcon, _path, _da, _cd, _co, _mode) \ ((struct cifs_open_parms) { \ .tcon = _tcon, \ .path = _path, \ .desired_access = (_da), \ .disposition = (_cd), \ .create_options = cifs_create_options(_cifs_sb, (_co)), \ .mode = (_mode), \ .cifs_sb = _cifs_sb, \ }) struct smb2_compound_vars { struct cifs_open_parms oparms; struct kvec rsp_iov[MAX_COMPOUND]; struct smb_rqst rqst[MAX_COMPOUND]; struct kvec open_iov[SMB2_CREATE_IOV_SIZE]; struct kvec qi_iov; struct kvec io_iov[SMB2_IOCTL_IOV_SIZE]; struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE]; struct kvec unlink_iov[SMB2_SET_INFO_IOV_SIZE]; struct kvec rename_iov[SMB2_SET_INFO_IOV_SIZE]; struct kvec close_iov; struct smb2_file_rename_info_hdr rename_info; struct smb2_file_link_info_hdr link_info; struct kvec ea_iov; }; static inline bool cifs_ses_exiting(struct cifs_ses *ses) { bool ret; spin_lock(&ses->ses_lock); ret = ses->ses_status == SES_EXITING; spin_unlock(&ses->ses_lock); return ret; } static inline bool cifs_netbios_name(const char *name, size_t namelen) { bool ret = false; size_t i; if (namelen >= 1 && namelen <= RFC1001_NAME_LEN) { for (i = 0; i < namelen; i++) { const unsigned char c = name[i]; if (c == '\\' || c == '/' || c == ':' || c == '*' || c == '?' || c == '"' || c == '<' || c == '>' || c == '|' || c == '.') return false; if (!ret && isalpha(c)) ret = true; } } return ret; } /* * Execute mid callback atomically - ensures callback runs exactly once * and prevents sleeping in atomic context. */ static inline void mid_execute_callback(struct mid_q_entry *mid) { void (*callback)(struct mid_q_entry *mid); spin_lock(&mid->mid_lock); callback = mid->callback; mid->callback = NULL; /* Mark as executed, */ spin_unlock(&mid->mid_lock); if (callback) callback(mid); } #define CIFS_REPARSE_SUPPORT(tcon) \ ((tcon)->posix_extensions || \ (le32_to_cpu((tcon)->fsAttrInfo.Attributes) & \ FILE_SUPPORTS_REPARSE_POINTS)) #endif /* _CIFS_GLOB_H */
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2016-present, Facebook, Inc. * All rights reserved. * */ #include <linux/bio.h> #include <linux/bitmap.h> #include <linux/err.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/mm.h> #include <linux/sched/mm.h> #include <linux/pagemap.h> #include <linux/refcount.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/zstd.h> #include "misc.h" #include "fs.h" #include "btrfs_inode.h" #include "compression.h" #include "super.h" #define ZSTD_BTRFS_MAX_WINDOWLOG 17 #define ZSTD_BTRFS_MAX_INPUT (1U << ZSTD_BTRFS_MAX_WINDOWLOG) #define ZSTD_BTRFS_DEFAULT_LEVEL 3 #define ZSTD_BTRFS_MIN_LEVEL -15 #define ZSTD_BTRFS_MAX_LEVEL 15 /* 307s to avoid pathologically clashing with transaction commit */ #define ZSTD_BTRFS_RECLAIM_JIFFIES (307 * HZ) static zstd_parameters zstd_get_btrfs_parameters(int level, size_t src_len) { zstd_parameters params = zstd_get_params(level, src_len); if (params.cParams.windowLog > ZSTD_BTRFS_MAX_WINDOWLOG) params.cParams.windowLog = ZSTD_BTRFS_MAX_WINDOWLOG; WARN_ON(src_len > ZSTD_BTRFS_MAX_INPUT); return params; } struct workspace { void *mem; size_t size; char *buf; int level; int req_level; unsigned long last_used; /* jiffies */ struct list_head list; struct list_head lru_list; zstd_in_buffer in_buf; zstd_out_buffer out_buf; zstd_parameters params; }; /* * Zstd Workspace Management * * Zstd workspaces have different memory requirements depending on the level. * The zstd workspaces are managed by having individual lists for each level * and a global lru. Forward progress is maintained by protecting a max level * workspace. * * Getting a workspace is done by using the bitmap to identify the levels that * have available workspaces and scans up. This lets us recycle higher level * workspaces because of the monotonic memory guarantee. A workspace's * last_used is only updated if it is being used by the corresponding memory * level. Putting a workspace involves adding it back to the appropriate places * and adding it back to the lru if necessary. * * A timer is used to reclaim workspaces if they have not been used for * ZSTD_BTRFS_RECLAIM_JIFFIES. This helps keep only active workspaces around. * The upper bound is provided by the workqueue limit which is 2 (percpu limit). */ struct zstd_workspace_manager { spinlock_t lock; struct list_head lru_list; struct list_head idle_ws[ZSTD_BTRFS_MAX_LEVEL]; unsigned long active_map; wait_queue_head_t wait; struct timer_list timer; }; static size_t zstd_ws_mem_sizes[ZSTD_BTRFS_MAX_LEVEL]; static inline struct workspace *list_to_workspace(struct list_head *list) { return container_of(list, struct workspace, list); } static inline int clip_level(int level) { return max(0, level - 1); } /* * Timer callback to free unused workspaces. * * @t: timer * * This scans the lru_list and attempts to reclaim any workspace that hasn't * been used for ZSTD_BTRFS_RECLAIM_JIFFIES. * * The context is softirq and does not need the _bh locking primitives. */ static void zstd_reclaim_timer_fn(struct timer_list *timer) { struct zstd_workspace_manager *zwsm = container_of(timer, struct zstd_workspace_manager, timer); unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES; struct list_head *pos, *next; spin_lock(&zwsm->lock); if (list_empty(&zwsm->lru_list)) { spin_unlock(&zwsm->lock); return; } list_for_each_prev_safe(pos, next, &zwsm->lru_list) { struct workspace *victim = container_of(pos, struct workspace, lru_list); int level; if (time_after(victim->last_used, reclaim_threshold)) break; /* workspace is in use */ if (victim->req_level) continue; level = victim->level; list_del(&victim->lru_list); list_del(&victim->list); zstd_free_workspace(&victim->list); if (list_empty(&zwsm->idle_ws[level])) clear_bit(level, &zwsm->active_map); } if (!list_empty(&zwsm->lru_list)) mod_timer(&zwsm->timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES); spin_unlock(&zwsm->lock); } /* * Calculate monotonic memory bounds. * * It is possible based on the level configurations that a higher level * workspace uses less memory than a lower level workspace. In order to reuse * workspaces, this must be made a monotonic relationship. This precomputes * the required memory for each level and enforces the monotonicity between * level and memory required. */ static void zstd_calc_ws_mem_sizes(void) { size_t max_size = 0; int level; for (level = ZSTD_BTRFS_MIN_LEVEL; level <= ZSTD_BTRFS_MAX_LEVEL; level++) { if (level == 0) continue; zstd_parameters params = zstd_get_btrfs_parameters(level, ZSTD_BTRFS_MAX_INPUT); size_t level_size = max_t(size_t, zstd_cstream_workspace_bound(&params.cParams), zstd_dstream_workspace_bound(ZSTD_BTRFS_MAX_INPUT)); max_size = max_t(size_t, max_size, level_size); /* Use level 1 workspace size for all the fast mode negative levels. */ zstd_ws_mem_sizes[clip_level(level)] = max_size; } } int zstd_alloc_workspace_manager(struct btrfs_fs_info *fs_info) { struct zstd_workspace_manager *zwsm; struct list_head *ws; ASSERT(fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] == NULL); zwsm = kzalloc(sizeof(*zwsm), GFP_KERNEL); if (!zwsm) return -ENOMEM; zstd_calc_ws_mem_sizes(); spin_lock_init(&zwsm->lock); init_waitqueue_head(&zwsm->wait); timer_setup(&zwsm->timer, zstd_reclaim_timer_fn, 0); INIT_LIST_HEAD(&zwsm->lru_list); for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) INIT_LIST_HEAD(&zwsm->idle_ws[i]); fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = zwsm; ws = zstd_alloc_workspace(fs_info, ZSTD_BTRFS_MAX_LEVEL); if (IS_ERR(ws)) { btrfs_warn(NULL, "cannot preallocate zstd compression workspace"); } else { set_bit(ZSTD_BTRFS_MAX_LEVEL - 1, &zwsm->active_map); list_add(ws, &zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1]); } return 0; } void zstd_free_workspace_manager(struct btrfs_fs_info *fs_info) { struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD]; struct workspace *workspace; if (!zwsm) return; fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD] = NULL; spin_lock_bh(&zwsm->lock); for (int i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) { while (!list_empty(&zwsm->idle_ws[i])) { workspace = container_of(zwsm->idle_ws[i].next, struct workspace, list); list_del(&workspace->list); list_del(&workspace->lru_list); zstd_free_workspace(&workspace->list); } } spin_unlock_bh(&zwsm->lock); timer_delete_sync(&zwsm->timer); kfree(zwsm); } /* * Find workspace for given level. * * @level: compression level * * This iterates over the set bits in the active_map beginning at the requested * compression level. This lets us utilize already allocated workspaces before * allocating a new one. If the workspace is of a larger size, it is used, but * the place in the lru_list and last_used times are not updated. This is to * offer the opportunity to reclaim the workspace in favor of allocating an * appropriately sized one in the future. */ static struct list_head *zstd_find_workspace(struct btrfs_fs_info *fs_info, int level) { struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD]; struct list_head *ws; struct workspace *workspace; int i = clip_level(level); ASSERT(zwsm); spin_lock_bh(&zwsm->lock); for_each_set_bit_from(i, &zwsm->active_map, ZSTD_BTRFS_MAX_LEVEL) { if (!list_empty(&zwsm->idle_ws[i])) { ws = zwsm->idle_ws[i].next; workspace = list_to_workspace(ws); list_del_init(ws); /* keep its place if it's a lower level using this */ workspace->req_level = level; if (clip_level(level) == workspace->level) list_del(&workspace->lru_list); if (list_empty(&zwsm->idle_ws[i])) clear_bit(i, &zwsm->active_map); spin_unlock_bh(&zwsm->lock); return ws; } } spin_unlock_bh(&zwsm->lock); return NULL; } /* * Zstd get_workspace for level. * * @level: compression level * * If @level is 0, then any compression level can be used. Therefore, we begin * scanning from 1. We first scan through possible workspaces and then after * attempt to allocate a new workspace. If we fail to allocate one due to * memory pressure, go to sleep waiting for the max level workspace to free up. */ struct list_head *zstd_get_workspace(struct btrfs_fs_info *fs_info, int level) { struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD]; struct list_head *ws; unsigned int nofs_flag; ASSERT(zwsm); /* level == 0 means we can use any workspace */ if (!level) level = 1; again: ws = zstd_find_workspace(fs_info, level); if (ws) return ws; nofs_flag = memalloc_nofs_save(); ws = zstd_alloc_workspace(fs_info, level); memalloc_nofs_restore(nofs_flag); if (IS_ERR(ws)) { DEFINE_WAIT(wait); prepare_to_wait(&zwsm->wait, &wait, TASK_UNINTERRUPTIBLE); schedule(); finish_wait(&zwsm->wait, &wait); goto again; } return ws; } /* * Zstd put_workspace. * * @ws: list_head for the workspace * * When putting back a workspace, we only need to update the LRU if we are of * the requested compression level. Here is where we continue to protect the * max level workspace or update last_used accordingly. If the reclaim timer * isn't set, it is also set here. Only the max level workspace tries and wakes * up waiting workspaces. */ void zstd_put_workspace(struct btrfs_fs_info *fs_info, struct list_head *ws) { struct zstd_workspace_manager *zwsm = fs_info->compr_wsm[BTRFS_COMPRESS_ZSTD]; struct workspace *workspace = list_to_workspace(ws); ASSERT(zwsm); spin_lock_bh(&zwsm->lock); /* A node is only taken off the lru if we are the corresponding level */ if (clip_level(workspace->req_level) == workspace->level) { /* Hide a max level workspace from reclaim */ if (list_empty(&zwsm->idle_ws[ZSTD_BTRFS_MAX_LEVEL - 1])) { INIT_LIST_HEAD(&workspace->lru_list); } else { workspace->last_used = jiffies; list_add(&workspace->lru_list, &zwsm->lru_list); if (!timer_pending(&zwsm->timer)) mod_timer(&zwsm->timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES); } } set_bit(workspace->level, &zwsm->active_map); list_add(&workspace->list, &zwsm->idle_ws[workspace->level]); workspace->req_level = 0; spin_unlock_bh(&zwsm->lock); if (workspace->level == clip_level(ZSTD_BTRFS_MAX_LEVEL)) cond_wake_up(&zwsm->wait); } void zstd_free_workspace(struct list_head *ws) { struct workspace *workspace = list_entry(ws, struct workspace, list); kvfree(workspace->mem); kfree(workspace->buf); kfree(workspace); } struct list_head *zstd_alloc_workspace(struct btrfs_fs_info *fs_info, int level) { const u32 blocksize = fs_info->sectorsize; struct workspace *workspace; workspace = kzalloc(sizeof(*workspace), GFP_KERNEL); if (!workspace) return ERR_PTR(-ENOMEM); /* Use level 1 workspace size for all the fast mode negative levels. */ workspace->size = zstd_ws_mem_sizes[clip_level(level)]; workspace->level = clip_level(level); workspace->req_level = level; workspace->last_used = jiffies; workspace->mem = kvmalloc(workspace->size, GFP_KERNEL | __GFP_NOWARN); workspace->buf = kmalloc(blocksize, GFP_KERNEL); if (!workspace->mem || !workspace->buf) goto fail; INIT_LIST_HEAD(&workspace->list); INIT_LIST_HEAD(&workspace->lru_list); return &workspace->list; fail: zstd_free_workspace(&workspace->list); return ERR_PTR(-ENOMEM); } int zstd_compress_folios(struct list_head *ws, struct btrfs_inode *inode, u64 start, struct folio **folios, unsigned long *out_folios, unsigned long *total_in, unsigned long *total_out) { struct btrfs_fs_info *fs_info = inode->root->fs_info; struct workspace *workspace = list_entry(ws, struct workspace, list); struct address_space *mapping = inode->vfs_inode.i_mapping; zstd_cstream *stream; int ret = 0; int nr_folios = 0; struct folio *in_folio = NULL; /* The current folio to read. */ struct folio *out_folio = NULL; /* The current folio to write to. */ unsigned long tot_in = 0; unsigned long tot_out = 0; unsigned long len = *total_out; const unsigned long nr_dest_folios = *out_folios; const u64 orig_end = start + len; const u32 blocksize = fs_info->sectorsize; const u32 min_folio_size = btrfs_min_folio_size(fs_info); unsigned long max_out = nr_dest_folios * min_folio_size; unsigned int cur_len; workspace->params = zstd_get_btrfs_parameters(workspace->req_level, len); *out_folios = 0; *total_out = 0; *total_in = 0; /* Initialize the stream */ stream = zstd_init_cstream(&workspace->params, len, workspace->mem, workspace->size); if (unlikely(!stream)) { btrfs_err(fs_info, "zstd compression init level %d failed, root %llu inode %llu offset %llu", workspace->req_level, btrfs_root_id(inode->root), btrfs_ino(inode), start); ret = -EIO; goto out; } /* map in the first page of input data */ ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio); if (ret < 0) goto out; cur_len = btrfs_calc_input_length(in_folio, orig_end, start); workspace->in_buf.src = kmap_local_folio(in_folio, offset_in_folio(in_folio, start)); workspace->in_buf.pos = 0; workspace->in_buf.size = cur_len; /* Allocate and map in the output buffer */ out_folio = btrfs_alloc_compr_folio(fs_info); if (out_folio == NULL) { ret = -ENOMEM; goto out; } folios[nr_folios++] = out_folio; workspace->out_buf.dst = folio_address(out_folio); workspace->out_buf.pos = 0; workspace->out_buf.size = min_t(size_t, max_out, min_folio_size); while (1) { size_t ret2; ret2 = zstd_compress_stream(stream, &workspace->out_buf, &workspace->in_buf); if (unlikely(zstd_is_error(ret2))) { btrfs_warn(fs_info, "zstd compression level %d failed, error %d root %llu inode %llu offset %llu", workspace->req_level, zstd_get_error_code(ret2), btrfs_root_id(inode->root), btrfs_ino(inode), start); ret = -EIO; goto out; } /* Check to see if we are making it bigger */ if (tot_in + workspace->in_buf.pos > blocksize * 2 && tot_in + workspace->in_buf.pos < tot_out + workspace->out_buf.pos) { ret = -E2BIG; goto out; } /* We've reached the end of our output range */ if (workspace->out_buf.pos >= max_out) { tot_out += workspace->out_buf.pos; ret = -E2BIG; goto out; } /* Check if we need more output space */ if (workspace->out_buf.pos == workspace->out_buf.size) { tot_out += min_folio_size; max_out -= min_folio_size; if (nr_folios == nr_dest_folios) { ret = -E2BIG; goto out; } out_folio = btrfs_alloc_compr_folio(fs_info); if (out_folio == NULL) { ret = -ENOMEM; goto out; } folios[nr_folios++] = out_folio; workspace->out_buf.dst = folio_address(out_folio); workspace->out_buf.pos = 0; workspace->out_buf.size = min_t(size_t, max_out, min_folio_size); } /* We've reached the end of the input */ if (workspace->in_buf.pos >= len) { tot_in += workspace->in_buf.pos; break; } /* Check if we need more input */ if (workspace->in_buf.pos == workspace->in_buf.size) { tot_in += workspace->in_buf.size; kunmap_local(workspace->in_buf.src); workspace->in_buf.src = NULL; folio_put(in_folio); start += cur_len; len -= cur_len; ret = btrfs_compress_filemap_get_folio(mapping, start, &in_folio); if (ret < 0) goto out; cur_len = btrfs_calc_input_length(in_folio, orig_end, start); workspace->in_buf.src = kmap_local_folio(in_folio, offset_in_folio(in_folio, start)); workspace->in_buf.pos = 0; workspace->in_buf.size = cur_len; } } while (1) { size_t ret2; ret2 = zstd_end_stream(stream, &workspace->out_buf); if (unlikely(zstd_is_error(ret2))) { btrfs_err(fs_info, "zstd compression end level %d failed, error %d root %llu inode %llu offset %llu", workspace->req_level, zstd_get_error_code(ret2), btrfs_root_id(inode->root), btrfs_ino(inode), start); ret = -EIO; goto out; } if (ret2 == 0) { tot_out += workspace->out_buf.pos; break; } if (workspace->out_buf.pos >= max_out) { tot_out += workspace->out_buf.pos; ret = -E2BIG; goto out; } tot_out += min_folio_size; max_out -= min_folio_size; if (nr_folios == nr_dest_folios) { ret = -E2BIG; goto out; } out_folio = btrfs_alloc_compr_folio(fs_info); if (out_folio == NULL) { ret = -ENOMEM; goto out; } folios[nr_folios++] = out_folio; workspace->out_buf.dst = folio_address(out_folio); workspace->out_buf.pos = 0; workspace->out_buf.size = min_t(size_t, max_out, min_folio_size); } if (tot_out >= tot_in) { ret = -E2BIG; goto out; } ret = 0; *total_in = tot_in; *total_out = tot_out; out: *out_folios = nr_folios; if (workspace->in_buf.src) { kunmap_local(workspace->in_buf.src); folio_put(in_folio); } return ret; } int zstd_decompress_bio(struct list_head *ws, struct compressed_bio *cb) { struct btrfs_fs_info *fs_info = cb_to_fs_info(cb); struct workspace *workspace = list_entry(ws, struct workspace, list); struct folio **folios_in = cb->compressed_folios; size_t srclen = cb->compressed_len; zstd_dstream *stream; int ret = 0; const u32 blocksize = fs_info->sectorsize; const unsigned int min_folio_size = btrfs_min_folio_size(fs_info); unsigned long folio_in_index = 0; unsigned long total_folios_in = DIV_ROUND_UP(srclen, min_folio_size); unsigned long buf_start; unsigned long total_out = 0; stream = zstd_init_dstream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); if (unlikely(!stream)) { struct btrfs_inode *inode = cb->bbio.inode; btrfs_err(inode->root->fs_info, "zstd decompression init failed, root %llu inode %llu offset %llu", btrfs_root_id(inode->root), btrfs_ino(inode), cb->start); ret = -EIO; goto done; } workspace->in_buf.src = kmap_local_folio(folios_in[folio_in_index], 0); workspace->in_buf.pos = 0; workspace->in_buf.size = min_t(size_t, srclen, min_folio_size); workspace->out_buf.dst = workspace->buf; workspace->out_buf.pos = 0; workspace->out_buf.size = blocksize; while (1) { size_t ret2; ret2 = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf); if (unlikely(zstd_is_error(ret2))) { struct btrfs_inode *inode = cb->bbio.inode; btrfs_err(inode->root->fs_info, "zstd decompression failed, error %d root %llu inode %llu offset %llu", zstd_get_error_code(ret2), btrfs_root_id(inode->root), btrfs_ino(inode), cb->start); ret = -EIO; goto done; } buf_start = total_out; total_out += workspace->out_buf.pos; workspace->out_buf.pos = 0; ret = btrfs_decompress_buf2page(workspace->out_buf.dst, total_out - buf_start, cb, buf_start); if (ret == 0) break; if (workspace->in_buf.pos >= srclen) break; /* Check if we've hit the end of a frame */ if (ret2 == 0) break; if (workspace->in_buf.pos == workspace->in_buf.size) { kunmap_local(workspace->in_buf.src); folio_in_index++; if (unlikely(folio_in_index >= total_folios_in)) { workspace->in_buf.src = NULL; ret = -EIO; goto done; } srclen -= min_folio_size; workspace->in_buf.src = kmap_local_folio(folios_in[folio_in_index], 0); workspace->in_buf.pos = 0; workspace->in_buf.size = min_t(size_t, srclen, min_folio_size); } } ret = 0; done: if (workspace->in_buf.src) kunmap_local(workspace->in_buf.src); return ret; } int zstd_decompress(struct list_head *ws, const u8 *data_in, struct folio *dest_folio, unsigned long dest_pgoff, size_t srclen, size_t destlen) { struct workspace *workspace = list_entry(ws, struct workspace, list); struct btrfs_fs_info *fs_info = btrfs_sb(folio_inode(dest_folio)->i_sb); const u32 sectorsize = fs_info->sectorsize; zstd_dstream *stream; int ret = 0; unsigned long to_copy = 0; stream = zstd_init_dstream( ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size); if (unlikely(!stream)) { struct btrfs_inode *inode = folio_to_inode(dest_folio); btrfs_err(inode->root->fs_info, "zstd decompression init failed, root %llu inode %llu offset %llu", btrfs_root_id(inode->root), btrfs_ino(inode), folio_pos(dest_folio)); ret = -EIO; goto finish; } workspace->in_buf.src = data_in; workspace->in_buf.pos = 0; workspace->in_buf.size = srclen; workspace->out_buf.dst = workspace->buf; workspace->out_buf.pos = 0; workspace->out_buf.size = sectorsize; /* * Since both input and output buffers should not exceed one sector, * one call should end the decompression. */ ret = zstd_decompress_stream(stream, &workspace->out_buf, &workspace->in_buf); if (unlikely(zstd_is_error(ret))) { struct btrfs_inode *inode = folio_to_inode(dest_folio); btrfs_err(inode->root->fs_info, "zstd decompression failed, error %d root %llu inode %llu offset %llu", zstd_get_error_code(ret), btrfs_root_id(inode->root), btrfs_ino(inode), folio_pos(dest_folio)); goto finish; } to_copy = workspace->out_buf.pos; memcpy_to_folio(dest_folio, dest_pgoff, workspace->out_buf.dst, to_copy); finish: /* Error or early end. */ if (unlikely(to_copy < destlen)) { ret = -EIO; folio_zero_range(dest_folio, dest_pgoff + to_copy, destlen - to_copy); } return ret; } const struct btrfs_compress_levels btrfs_zstd_compress = { .min_level = ZSTD_BTRFS_MIN_LEVEL, .max_level = ZSTD_BTRFS_MAX_LEVEL, .default_level = ZSTD_BTRFS_DEFAULT_LEVEL, };
2 2 2 2 6 5 1 5 2 4 6 100 99 99 2 2 99 51 2 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 // SPDX-License-Identifier: GPL-2.0-or-later /* * Virtual NCI device simulation driver * * Copyright (C) 2020 Samsung Electronics * Bongsu Jeon <bongsu.jeon@samsung.com> */ #include <linux/kernel.h> #include <linux/module.h> #include <linux/miscdevice.h> #include <linux/mutex.h> #include <linux/wait.h> #include <net/nfc/nci_core.h> #define IOCTL_GET_NCIDEV_IDX 0 #define VIRTUAL_NFC_PROTOCOLS (NFC_PROTO_JEWEL_MASK | \ NFC_PROTO_MIFARE_MASK | \ NFC_PROTO_FELICA_MASK | \ NFC_PROTO_ISO14443_MASK | \ NFC_PROTO_ISO14443_B_MASK | \ NFC_PROTO_ISO15693_MASK) struct virtual_nci_dev { struct nci_dev *ndev; struct mutex mtx; struct sk_buff *send_buff; struct wait_queue_head wq; bool running; }; static int virtual_nci_open(struct nci_dev *ndev) { struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); vdev->running = true; return 0; } static int virtual_nci_close(struct nci_dev *ndev) { struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); mutex_lock(&vdev->mtx); kfree_skb(vdev->send_buff); vdev->send_buff = NULL; vdev->running = false; mutex_unlock(&vdev->mtx); return 0; } static int virtual_nci_send(struct nci_dev *ndev, struct sk_buff *skb) { struct virtual_nci_dev *vdev = nci_get_drvdata(ndev); mutex_lock(&vdev->mtx); if (vdev->send_buff || !vdev->running) { mutex_unlock(&vdev->mtx); kfree_skb(skb); return -1; } vdev->send_buff = skb_copy(skb, GFP_KERNEL); if (!vdev->send_buff) { mutex_unlock(&vdev->mtx); kfree_skb(skb); return -1; } mutex_unlock(&vdev->mtx); wake_up_interruptible(&vdev->wq); consume_skb(skb); return 0; } static const struct nci_ops virtual_nci_ops = { .open = virtual_nci_open, .close = virtual_nci_close, .send = virtual_nci_send }; static ssize_t virtual_ncidev_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { struct virtual_nci_dev *vdev = file->private_data; size_t actual_len; mutex_lock(&vdev->mtx); while (!vdev->send_buff) { mutex_unlock(&vdev->mtx); if (wait_event_interruptible(vdev->wq, vdev->send_buff)) return -EFAULT; mutex_lock(&vdev->mtx); } actual_len = min_t(size_t, count, vdev->send_buff->len); if (copy_to_user(buf, vdev->send_buff->data, actual_len)) { mutex_unlock(&vdev->mtx); return -EFAULT; } skb_pull(vdev->send_buff, actual_len); if (vdev->send_buff->len == 0) { consume_skb(vdev->send_buff); vdev->send_buff = NULL; } mutex_unlock(&vdev->mtx); return actual_len; } static ssize_t virtual_ncidev_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct virtual_nci_dev *vdev = file->private_data; struct sk_buff *skb; skb = alloc_skb(count, GFP_KERNEL); if (!skb) return -ENOMEM; if (copy_from_user(skb_put(skb, count), buf, count)) { kfree_skb(skb); return -EFAULT; } if (strnlen(skb->data, count) != count) { kfree_skb(skb); return -EINVAL; } nci_recv_frame(vdev->ndev, skb); return count; } static int virtual_ncidev_open(struct inode *inode, struct file *file) { int ret = 0; struct virtual_nci_dev *vdev; vdev = kzalloc(sizeof(*vdev), GFP_KERNEL); if (!vdev) return -ENOMEM; vdev->ndev = nci_allocate_device(&virtual_nci_ops, VIRTUAL_NFC_PROTOCOLS, 0, 0); if (!vdev->ndev) { kfree(vdev); return -ENOMEM; } mutex_init(&vdev->mtx); init_waitqueue_head(&vdev->wq); file->private_data = vdev; nci_set_drvdata(vdev->ndev, vdev); ret = nci_register_device(vdev->ndev); if (ret < 0) { nci_free_device(vdev->ndev); mutex_destroy(&vdev->mtx); kfree(vdev); return ret; } return 0; } static int virtual_ncidev_close(struct inode *inode, struct file *file) { struct virtual_nci_dev *vdev = file->private_data; nci_unregister_device(vdev->ndev); nci_free_device(vdev->ndev); mutex_destroy(&vdev->mtx); kfree(vdev); return 0; } static long virtual_ncidev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct virtual_nci_dev *vdev = file->private_data; const struct nfc_dev *nfc_dev = vdev->ndev->nfc_dev; void __user *p = (void __user *)arg; if (cmd != IOCTL_GET_NCIDEV_IDX) return -ENOTTY; if (copy_to_user(p, &nfc_dev->idx, sizeof(nfc_dev->idx))) return -EFAULT; return 0; } static const struct file_operations virtual_ncidev_fops = { .owner = THIS_MODULE, .read = virtual_ncidev_read, .write = virtual_ncidev_write, .open = virtual_ncidev_open, .release = virtual_ncidev_close, .unlocked_ioctl = virtual_ncidev_ioctl }; static struct miscdevice miscdev = { .minor = MISC_DYNAMIC_MINOR, .name = "virtual_nci", .fops = &virtual_ncidev_fops, .mode = 0600, }; module_misc_device(miscdev); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Virtual NCI device simulation driver"); MODULE_AUTHOR("Bongsu Jeon <bongsu.jeon@samsung.com>");
12 12 12 12 12 12 12 12 12 12 12 12 12 12 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 // SPDX-License-Identifier: GPL-2.0-or-later /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket * interface as the means of communication with the user level. * * Support for INET6 connection oriented protocols. * * Authors: See the TCPv6 sources */ #include <linux/module.h> #include <linux/in6.h> #include <linux/ipv6.h> #include <linux/jhash.h> #include <linux/slab.h> #include <net/addrconf.h> #include <net/inet_connection_sock.h> #include <net/inet_ecn.h> #include <net/inet_hashtables.h> #include <net/ip6_route.h> #include <net/sock.h> #include <net/inet6_connection_sock.h> #include <net/sock_reuseport.h> struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, const struct request_sock *req, u8 proto) { struct inet_request_sock *ireq = inet_rsk(req); const struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = proto; fl6->daddr = ireq->ir_v6_rmt_addr; rcu_read_lock(); final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); fl6->saddr = ireq->ir_v6_loc_addr; fl6->flowi6_oif = ireq->ir_iif; fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); fl6->flowi6_uid = sk_uid(sk); security_req_classify_flow(req, flowi6_to_flowi_common(fl6)); dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (IS_ERR(dst)) return NULL; return dst; } static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) { return __sk_dst_check(sk, cookie); } static struct dst_entry *inet6_csk_route_socket(struct sock *sk, struct flowi6 *fl6) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *final_p, final; struct dst_entry *dst; memset(fl6, 0, sizeof(*fl6)); fl6->flowi6_proto = sk->sk_protocol; fl6->daddr = sk->sk_v6_daddr; fl6->saddr = np->saddr; fl6->flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl6->flowlabel); fl6->flowi6_oif = sk->sk_bound_dev_if; fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; fl6->fl6_dport = inet->inet_dport; fl6->flowi6_uid = sk_uid(sk); security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6)); rcu_read_lock(); final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final); rcu_read_unlock(); dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (!IS_ERR(dst)) ip6_dst_store(sk, dst, false, false); } return dst; } int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused) { struct ipv6_pinfo *np = inet6_sk(sk); struct flowi6 fl6; struct dst_entry *dst; int res; dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) { WRITE_ONCE(sk->sk_err_soft, -PTR_ERR(dst)); sk->sk_route_caps = 0; kfree_skb(skb); return PTR_ERR(dst); } rcu_read_lock(); skb_dst_set_noref(skb, dst); /* Restore final destination back after routing done */ fl6.daddr = sk->sk_v6_daddr; res = ip6_xmit(sk, skb, &fl6, sk->sk_mark, rcu_dereference(np->opt), np->tclass, READ_ONCE(sk->sk_priority)); rcu_read_unlock(); return res; } EXPORT_SYMBOL_GPL(inet6_csk_xmit); struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) { struct flowi6 fl6; struct dst_entry *dst = inet6_csk_route_socket(sk, &fl6); if (IS_ERR(dst)) return NULL; dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = inet6_csk_route_socket(sk, &fl6); return IS_ERR(dst) ? NULL : dst; }
19 19 10 19 19 15 3 13 13 19 6 6 5 5 5 5 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 // SPDX-License-Identifier: GPL-2.0-only #include <linux/module.h> #include <linux/errno.h> #include <linux/socket.h> #include <linux/udp.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/in6.h> #include <net/udp.h> #include <net/udp_tunnel.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/ip6_tunnel.h> #include <net/ip6_checksum.h> int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp) { struct sockaddr_in6 udp6_addr = {}; int err; struct socket *sock = NULL; err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; if (cfg->ipv6_v6only) { err = ip6_sock_set_v6only(sock->sk); if (err < 0) goto error; } if (cfg->bind_ifindex) { err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true); if (err < 0) goto error; } udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); udp6_addr.sin6_port = cfg->local_udp_port; err = kernel_bind(sock, (struct sockaddr *)&udp6_addr, sizeof(udp6_addr)); if (err < 0) goto error; if (cfg->peer_udp_port) { memset(&udp6_addr, 0, sizeof(udp6_addr)); udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->peer_ip6, sizeof(udp6_addr.sin6_addr)); udp6_addr.sin6_port = cfg->peer_udp_port; err = kernel_connect(sock, (struct sockaddr *)&udp6_addr, sizeof(udp6_addr), 0); } if (err < 0) goto error; udp_set_no_check6_tx(sock->sk, !cfg->use_udp6_tx_checksums); udp_set_no_check6_rx(sock->sk, !cfg->use_udp6_rx_checksums); *sockp = sock; return 0; error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); sock_release(sock); } *sockp = NULL; return err; } EXPORT_SYMBOL_GPL(udp_sock_create6); void udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, struct net_device *dev, const struct in6_addr *saddr, const struct in6_addr *daddr, __u8 prio, __u8 ttl, __be32 label, __be16 src_port, __be16 dst_port, bool nocheck, u16 ip6cb_flags) { struct udphdr *uh; struct ipv6hdr *ip6h; __skb_push(skb, sizeof(*uh)); skb_reset_transport_header(skb); uh = udp_hdr(skb); uh->dest = dst_port; uh->source = src_port; uh->len = htons(skb->len); skb_dst_set(skb, dst); udp6_set_csum(nocheck, skb, saddr, daddr, skb->len); __skb_push(skb, sizeof(*ip6h)); skb_reset_network_header(skb); ip6h = ipv6_hdr(skb); ip6_flow_hdr(ip6h, prio, label); ip6h->payload_len = htons(skb->len); ip6h->nexthdr = IPPROTO_UDP; ip6h->hop_limit = ttl; ip6h->daddr = *daddr; ip6h->saddr = *saddr; ip6tunnel_xmit(sk, skb, dev, ip6cb_flags); } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); /** * udp_tunnel6_dst_lookup - perform route lookup on UDP tunnel * @skb: Packet for which lookup is done * @dev: Tunnel device * @net: Network namespace of tunnel device * @sock: Socket which provides route info * @oif: Index of the output interface * @saddr: Memory to store the src ip address * @key: Tunnel information * @sport: UDP source port * @dport: UDP destination port * @dsfield: The traffic class field * @dst_cache: The dst cache to use for lookup * This function performs a route lookup on a UDP tunnel * * It returns a valid dst pointer and stores src address to be used in * tunnel in param saddr on success, else a pointer encoded error code. */ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, struct net_device *dev, struct net *net, struct socket *sock, int oif, struct in6_addr *saddr, const struct ip_tunnel_key *key, __be16 sport, __be16 dport, u8 dsfield, struct dst_cache *dst_cache) { struct dst_entry *dst = NULL; struct flowi6 fl6; #ifdef CONFIG_DST_CACHE if (dst_cache) { dst = dst_cache_get_ip6(dst_cache, saddr); if (dst) return dst; } #endif memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_mark = skb->mark; fl6.flowi6_proto = IPPROTO_UDP; fl6.flowi6_oif = oif; fl6.daddr = key->u.ipv6.dst; fl6.saddr = key->u.ipv6.src; fl6.fl6_sport = sport; fl6.fl6_dport = dport; fl6.flowlabel = ip6_make_flowinfo(dsfield, key->label); dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, NULL); if (IS_ERR(dst)) { netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); return ERR_PTR(-ENETUNREACH); } if (dst_dev(dst) == dev) { /* is this necessary? */ netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); dst_release(dst); return ERR_PTR(-ELOOP); } #ifdef CONFIG_DST_CACHE if (dst_cache) dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); #endif *saddr = fl6.saddr; return dst; } EXPORT_SYMBOL_GPL(udp_tunnel6_dst_lookup); MODULE_DESCRIPTION("IPv6 Foo over UDP tunnel driver"); MODULE_LICENSE("GPL");
295 293 295 285 352 5 4 12 8 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 #include <linux/bpf.h> #include <linux/vmalloc.h> #include <linux/file.h> #include <linux/fs.h> #include <linux/kernel.h> #include <linux/idr.h> #include <linux/namei.h> #include <linux/user_namespace.h> #include <linux/security.h> static bool bpf_ns_capable(struct user_namespace *ns, int cap) { return ns_capable(ns, cap) || (cap != CAP_SYS_ADMIN && ns_capable(ns, CAP_SYS_ADMIN)); } bool bpf_token_capable(const struct bpf_token *token, int cap) { struct user_namespace *userns; /* BPF token allows ns_capable() level of capabilities */ userns = token ? token->userns : &init_user_ns; if (!bpf_ns_capable(userns, cap)) return false; if (token && security_bpf_token_capable(token, cap) < 0) return false; return true; } void bpf_token_inc(struct bpf_token *token) { atomic64_inc(&token->refcnt); } static void bpf_token_free(struct bpf_token *token) { security_bpf_token_free(token); put_user_ns(token->userns); kfree(token); } static void bpf_token_put_deferred(struct work_struct *work) { struct bpf_token *token = container_of(work, struct bpf_token, work); bpf_token_free(token); } void bpf_token_put(struct bpf_token *token) { if (!token) return; if (!atomic64_dec_and_test(&token->refcnt)) return; INIT_WORK(&token->work, bpf_token_put_deferred); schedule_work(&token->work); } static int bpf_token_release(struct inode *inode, struct file *filp) { struct bpf_token *token = filp->private_data; bpf_token_put(token); return 0; } static void bpf_token_show_fdinfo(struct seq_file *m, struct file *filp) { struct bpf_token *token = filp->private_data; u64 mask; BUILD_BUG_ON(__MAX_BPF_CMD >= 64); mask = BIT_ULL(__MAX_BPF_CMD) - 1; if ((token->allowed_cmds & mask) == mask) seq_printf(m, "allowed_cmds:\tany\n"); else seq_printf(m, "allowed_cmds:\t0x%llx\n", token->allowed_cmds); BUILD_BUG_ON(__MAX_BPF_MAP_TYPE >= 64); mask = BIT_ULL(__MAX_BPF_MAP_TYPE) - 1; if ((token->allowed_maps & mask) == mask) seq_printf(m, "allowed_maps:\tany\n"); else seq_printf(m, "allowed_maps:\t0x%llx\n", token->allowed_maps); BUILD_BUG_ON(__MAX_BPF_PROG_TYPE >= 64); mask = BIT_ULL(__MAX_BPF_PROG_TYPE) - 1; if ((token->allowed_progs & mask) == mask) seq_printf(m, "allowed_progs:\tany\n"); else seq_printf(m, "allowed_progs:\t0x%llx\n", token->allowed_progs); BUILD_BUG_ON(__MAX_BPF_ATTACH_TYPE >= 64); mask = BIT_ULL(__MAX_BPF_ATTACH_TYPE) - 1; if ((token->allowed_attachs & mask) == mask) seq_printf(m, "allowed_attachs:\tany\n"); else seq_printf(m, "allowed_attachs:\t0x%llx\n", token->allowed_attachs); } #define BPF_TOKEN_INODE_NAME "bpf-token" static const struct inode_operations bpf_token_iops = { }; const struct file_operations bpf_token_fops = { .release = bpf_token_release, .show_fdinfo = bpf_token_show_fdinfo, }; int bpf_token_create(union bpf_attr *attr) { struct bpf_mount_opts *mnt_opts; struct bpf_token *token = NULL; struct user_namespace *userns; struct inode *inode; struct file *file; CLASS(fd, f)(attr->token_create.bpffs_fd); struct path path; struct super_block *sb; umode_t mode; int err, fd; if (fd_empty(f)) return -EBADF; path = fd_file(f)->f_path; sb = path.dentry->d_sb; if (path.dentry != sb->s_root) return -EINVAL; if (sb->s_op != &bpf_super_ops) return -EINVAL; err = path_permission(&path, MAY_ACCESS); if (err) return err; userns = sb->s_user_ns; /* * Enforce that creators of BPF tokens are in the same user * namespace as the BPF FS instance. This makes reasoning about * permissions a lot easier and we can always relax this later. */ if (current_user_ns() != userns) return -EPERM; if (!ns_capable(userns, CAP_BPF)) return -EPERM; /* Creating BPF token in init_user_ns doesn't make much sense. */ if (current_user_ns() == &init_user_ns) return -EOPNOTSUPP; mnt_opts = sb->s_fs_info; if (mnt_opts->delegate_cmds == 0 && mnt_opts->delegate_maps == 0 && mnt_opts->delegate_progs == 0 && mnt_opts->delegate_attachs == 0) return -ENOENT; /* no BPF token delegation is set up */ mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); inode = bpf_get_inode(sb, NULL, mode); if (IS_ERR(inode)) return PTR_ERR(inode); inode->i_op = &bpf_token_iops; inode->i_fop = &bpf_token_fops; clear_nlink(inode); /* make sure it is unlinked */ file = alloc_file_pseudo(inode, path.mnt, BPF_TOKEN_INODE_NAME, O_RDWR, &bpf_token_fops); if (IS_ERR(file)) { iput(inode); return PTR_ERR(file); } token = kzalloc(sizeof(*token), GFP_USER); if (!token) { err = -ENOMEM; goto out_file; } atomic64_set(&token->refcnt, 1); /* remember bpffs owning userns for future ns_capable() checks */ token->userns = get_user_ns(userns); token->allowed_cmds = mnt_opts->delegate_cmds; token->allowed_maps = mnt_opts->delegate_maps; token->allowed_progs = mnt_opts->delegate_progs; token->allowed_attachs = mnt_opts->delegate_attachs; err = security_bpf_token_create(token, attr, &path); if (err) goto out_token; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) { err = fd; goto out_token; } file->private_data = token; fd_install(fd, file); return fd; out_token: bpf_token_free(token); out_file: fput(file); return err; } int bpf_token_get_info_by_fd(struct bpf_token *token, const union bpf_attr *attr, union bpf_attr __user *uattr) { struct bpf_token_info __user *uinfo = u64_to_user_ptr(attr->info.info); struct bpf_token_info info; u32 info_len = attr->info.info_len; info_len = min_t(u32, info_len, sizeof(info)); memset(&info, 0, sizeof(info)); info.allowed_cmds = token->allowed_cmds; info.allowed_maps = token->allowed_maps; info.allowed_progs = token->allowed_progs; info.allowed_attachs = token->allowed_attachs; if (copy_to_user(uinfo, &info, info_len) || put_user(info_len, &uattr->info.info_len)) return -EFAULT; return 0; } struct bpf_token *bpf_token_get_from_fd(u32 ufd) { CLASS(fd, f)(ufd); struct bpf_token *token; if (fd_empty(f)) return ERR_PTR(-EBADF); if (fd_file(f)->f_op != &bpf_token_fops) return ERR_PTR(-EINVAL); token = fd_file(f)->private_data; bpf_token_inc(token); return token; } bool bpf_token_allow_cmd(const struct bpf_token *token, enum bpf_cmd cmd) { if (!token) return false; if (!(token->allowed_cmds & BIT_ULL(cmd))) return false; return security_bpf_token_cmd(token, cmd) == 0; } bool bpf_token_allow_map_type(const struct bpf_token *token, enum bpf_map_type type) { if (!token || type >= __MAX_BPF_MAP_TYPE) return false; return token->allowed_maps & BIT_ULL(type); } bool bpf_token_allow_prog_type(const struct bpf_token *token, enum bpf_prog_type prog_type, enum bpf_attach_type attach_type) { if (!token || prog_type >= __MAX_BPF_PROG_TYPE || attach_type >= __MAX_BPF_ATTACH_TYPE) return false; return (token->allowed_progs & BIT_ULL(prog_type)) && (token->allowed_attachs & BIT_ULL(attach_type)); }
972 976 57 56 973 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 // SPDX-License-Identifier: GPL-2.0-or-later /* * net/sched/sch_cbs.c Credit Based Shaper * * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> */ /* Credit Based Shaper (CBS) * ========================= * * This is a simple rate-limiting shaper aimed at TSN applications on * systems with known traffic workloads. * * Its algorithm is defined by the IEEE 802.1Q-2014 Specification, * Section 8.6.8.2, and explained in more detail in the Annex L of the * same specification. * * There are four tunables to be considered: * * 'idleslope': Idleslope is the rate of credits that is * accumulated (in kilobits per second) when there is at least * one packet waiting for transmission. Packets are transmitted * when the current value of credits is equal or greater than * zero. When there is no packet to be transmitted the amount of * credits is set to zero. This is the main tunable of the CBS * algorithm. * * 'sendslope': * Sendslope is the rate of credits that is depleted (it should be a * negative number of kilobits per second) when a transmission is * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section * 8.6.8.2 item g): * * sendslope = idleslope - port_transmit_rate * * 'hicredit': Hicredit defines the maximum amount of credits (in * bytes) that can be accumulated. Hicredit depends on the * characteristics of interfering traffic, * 'max_interference_size' is the maximum size of any burst of * traffic that can delay the transmission of a frame that is * available for transmission for this traffic class, (IEEE * 802.1Q-2014 Annex L, Equation L-3): * * hicredit = max_interference_size * (idleslope / port_transmit_rate) * * 'locredit': Locredit is the minimum amount of credits that can * be reached. It is a function of the traffic flowing through * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2): * * locredit = max_frame_size * (sendslope / port_transmit_rate) */ #include <linux/ethtool.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> #include <linux/units.h> #include <net/netevent.h> #include <net/netlink.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> static LIST_HEAD(cbs_list); static DEFINE_SPINLOCK(cbs_list_lock); struct cbs_sched_data { bool offload; int queue; atomic64_t port_rate; /* in bytes/s */ s64 last; /* timestamp in ns */ s64 credits; /* in bytes */ s32 locredit; /* in bytes */ s32 hicredit; /* in bytes */ s64 sendslope; /* in bytes/s */ s64 idleslope; /* in bytes/s */ struct qdisc_watchdog watchdog; int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free); struct sk_buff *(*dequeue)(struct Qdisc *sch); struct Qdisc *qdisc; struct list_head cbs_list; }; static int cbs_child_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct Qdisc *child, struct sk_buff **to_free) { unsigned int len = qdisc_pkt_len(skb); int err; err = child->ops->enqueue(skb, child, to_free); if (err != NET_XMIT_SUCCESS) return err; sch->qstats.backlog += len; sch->q.qlen++; return NET_XMIT_SUCCESS; } static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; return cbs_child_enqueue(skb, sch, qdisc, to_free); } static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; if (sch->q.qlen == 0 && q->credits > 0) { /* We need to stop accumulating credits when there's * no enqueued packets and q->credits is positive. */ q->credits = 0; q->last = ktime_get_ns(); } return cbs_child_enqueue(skb, sch, qdisc, to_free); } static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { struct cbs_sched_data *q = qdisc_priv(sch); return q->enqueue(skb, sch, to_free); } /* timediff is in ns, slope is in bytes/s */ static s64 timediff_to_credits(s64 timediff, s64 slope) { return div64_s64(timediff * slope, NSEC_PER_SEC); } static s64 delay_from_credits(s64 credits, s64 slope) { if (unlikely(slope == 0)) return S64_MAX; return div64_s64(-credits * NSEC_PER_SEC, slope); } static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate) { if (unlikely(port_rate == 0)) return S64_MAX; return div64_s64(len * slope, port_rate); } static struct sk_buff *cbs_child_dequeue(struct Qdisc *sch, struct Qdisc *child) { struct sk_buff *skb; skb = child->ops->dequeue(child); if (!skb) return NULL; qdisc_qstats_backlog_dec(sch, skb); qdisc_bstats_update(sch, skb); sch->q.qlen--; return skb; } static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; s64 now = ktime_get_ns(); struct sk_buff *skb; s64 credits; int len; /* The previous packet is still being sent */ if (now < q->last) { qdisc_watchdog_schedule_ns(&q->watchdog, q->last); return NULL; } if (q->credits < 0) { credits = timediff_to_credits(now - q->last, q->idleslope); credits = q->credits + credits; q->credits = min_t(s64, credits, q->hicredit); if (q->credits < 0) { s64 delay; delay = delay_from_credits(q->credits, q->idleslope); qdisc_watchdog_schedule_ns(&q->watchdog, now + delay); q->last = now; return NULL; } } skb = cbs_child_dequeue(sch, qdisc); if (!skb) return NULL; len = qdisc_pkt_len(skb); /* As sendslope is a negative number, this will decrease the * amount of q->credits. */ credits = credits_from_len(len, q->sendslope, atomic64_read(&q->port_rate)); credits += q->credits; q->credits = max_t(s64, credits, q->locredit); /* Estimate of the transmission of the last byte of the packet in ns */ if (unlikely(atomic64_read(&q->port_rate) == 0)) q->last = now; else q->last = now + div64_s64(len * NSEC_PER_SEC, atomic64_read(&q->port_rate)); return skb; } static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); struct Qdisc *qdisc = q->qdisc; return cbs_child_dequeue(sch, qdisc); } static struct sk_buff *cbs_dequeue(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); return q->dequeue(sch); } static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, }; static void cbs_disable_offload(struct net_device *dev, struct cbs_sched_data *q) { struct tc_cbs_qopt_offload cbs = { }; const struct net_device_ops *ops; int err; if (!q->offload) return; q->enqueue = cbs_enqueue_soft; q->dequeue = cbs_dequeue_soft; ops = dev->netdev_ops; if (!ops->ndo_setup_tc) return; cbs.queue = q->queue; cbs.enable = 0; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); if (err < 0) pr_warn("Couldn't disable CBS offload for queue %d\n", cbs.queue); } static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, const struct tc_cbs_qopt *opt, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; struct tc_cbs_qopt_offload cbs = { }; int err; if (!ops->ndo_setup_tc) { NL_SET_ERR_MSG(extack, "Specified device does not support cbs offload"); return -EOPNOTSUPP; } cbs.queue = q->queue; cbs.enable = 1; cbs.hicredit = opt->hicredit; cbs.locredit = opt->locredit; cbs.idleslope = opt->idleslope; cbs.sendslope = opt->sendslope; err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); if (err < 0) { NL_SET_ERR_MSG(extack, "Specified device failed to setup cbs hardware offload"); return err; } q->enqueue = cbs_enqueue_offload; q->dequeue = cbs_dequeue_offload; return 0; } static void cbs_set_port_rate(struct net_device *dev, struct cbs_sched_data *q) { struct ethtool_link_ksettings ecmd; int speed = SPEED_10; s64 port_rate; int err; err = __ethtool_get_link_ksettings(dev, &ecmd); if (err < 0) goto skip; if (ecmd.base.speed && ecmd.base.speed != SPEED_UNKNOWN) speed = ecmd.base.speed; skip: port_rate = speed * 1000 * BYTES_PER_KBIT; atomic64_set(&q->port_rate, port_rate); netdev_dbg(dev, "cbs: set %s's port_rate to: %lld, linkspeed: %d\n", dev->name, (long long)atomic64_read(&q->port_rate), ecmd.base.speed); } static int cbs_dev_notifier(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct cbs_sched_data *q; struct net_device *qdev; bool found = false; ASSERT_RTNL(); if (event != NETDEV_UP && event != NETDEV_CHANGE) return NOTIFY_DONE; spin_lock(&cbs_list_lock); list_for_each_entry(q, &cbs_list, cbs_list) { qdev = qdisc_dev(q->qdisc); if (qdev == dev) { found = true; break; } } spin_unlock(&cbs_list_lock); if (found) cbs_set_port_rate(dev, q); return NOTIFY_DONE; } static int cbs_change(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); struct nlattr *tb[TCA_CBS_MAX + 1]; struct tc_cbs_qopt *qopt; int err; err = nla_parse_nested_deprecated(tb, TCA_CBS_MAX, opt, cbs_policy, extack); if (err < 0) return err; if (!tb[TCA_CBS_PARMS]) { NL_SET_ERR_MSG(extack, "Missing CBS parameter which are mandatory"); return -EINVAL; } qopt = nla_data(tb[TCA_CBS_PARMS]); if (!qopt->offload) { cbs_set_port_rate(dev, q); cbs_disable_offload(dev, q); } else { err = cbs_enable_offload(dev, q, qopt, extack); if (err < 0) return err; } /* Everything went OK, save the parameters used. */ WRITE_ONCE(q->hicredit, qopt->hicredit); WRITE_ONCE(q->locredit, qopt->locredit); WRITE_ONCE(q->idleslope, qopt->idleslope * BYTES_PER_KBIT); WRITE_ONCE(q->sendslope, qopt->sendslope * BYTES_PER_KBIT); WRITE_ONCE(q->offload, qopt->offload); return 0; } static int cbs_init(struct Qdisc *sch, struct nlattr *opt, struct netlink_ext_ack *extack) { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); if (!opt) { NL_SET_ERR_MSG(extack, "Missing CBS qdisc options which are mandatory"); return -EINVAL; } q->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, extack); if (!q->qdisc) return -ENOMEM; spin_lock(&cbs_list_lock); list_add(&q->cbs_list, &cbs_list); spin_unlock(&cbs_list_lock); qdisc_hash_add(q->qdisc, false); q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); q->enqueue = cbs_enqueue_soft; q->dequeue = cbs_dequeue_soft; qdisc_watchdog_init(&q->watchdog, sch); return cbs_change(sch, opt, extack); } static void cbs_destroy(struct Qdisc *sch) { struct cbs_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); /* Nothing to do if we couldn't create the underlying qdisc */ if (!q->qdisc) return; qdisc_watchdog_cancel(&q->watchdog); cbs_disable_offload(dev, q); spin_lock(&cbs_list_lock); list_del(&q->cbs_list); spin_unlock(&cbs_list_lock); qdisc_put(q->qdisc); } static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) { struct cbs_sched_data *q = qdisc_priv(sch); struct tc_cbs_qopt opt = { }; struct nlattr *nest; nest = nla_nest_start_noflag(skb, TCA_OPTIONS); if (!nest) goto nla_put_failure; opt.hicredit = READ_ONCE(q->hicredit); opt.locredit = READ_ONCE(q->locredit); opt.sendslope = div64_s64(READ_ONCE(q->sendslope), BYTES_PER_KBIT); opt.idleslope = div64_s64(READ_ONCE(q->idleslope), BYTES_PER_KBIT); opt.offload = READ_ONCE(q->offload); if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt)) goto nla_put_failure; return nla_nest_end(skb, nest); nla_put_failure: nla_nest_cancel(skb, nest); return -1; } static int cbs_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { struct cbs_sched_data *q = qdisc_priv(sch); if (cl != 1 || !q->qdisc) /* only one class */ return -ENOENT; tcm->tcm_handle |= TC_H_MIN(1); tcm->tcm_info = q->qdisc->handle; return 0; } static int cbs_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct Qdisc **old, struct netlink_ext_ack *extack) { struct cbs_sched_data *q = qdisc_priv(sch); if (!new) { new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle, NULL); if (!new) new = &noop_qdisc; } *old = qdisc_replace(sch, new, &q->qdisc); return 0; } static struct Qdisc *cbs_leaf(struct Qdisc *sch, unsigned long arg) { struct cbs_sched_data *q = qdisc_priv(sch); return q->qdisc; } static unsigned long cbs_find(struct Qdisc *sch, u32 classid) { return 1; } static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { tc_qdisc_stats_dump(sch, 1, walker); } } static const struct Qdisc_class_ops cbs_class_ops = { .graft = cbs_graft, .leaf = cbs_leaf, .find = cbs_find, .walk = cbs_walk, .dump = cbs_dump_class, }; static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { .id = "cbs", .cl_ops = &cbs_class_ops, .priv_size = sizeof(struct cbs_sched_data), .enqueue = cbs_enqueue, .dequeue = cbs_dequeue, .peek = qdisc_peek_dequeued, .init = cbs_init, .reset = qdisc_reset_queue, .destroy = cbs_destroy, .change = cbs_change, .dump = cbs_dump, .owner = THIS_MODULE, }; MODULE_ALIAS_NET_SCH("cbs"); static struct notifier_block cbs_device_notifier = { .notifier_call = cbs_dev_notifier, }; static int __init cbs_module_init(void) { int err; err = register_netdevice_notifier(&cbs_device_notifier); if (err) return err; err = register_qdisc(&cbs_qdisc_ops); if (err) unregister_netdevice_notifier(&cbs_device_notifier); return err; } static void __exit cbs_module_exit(void) { unregister_qdisc(&cbs_qdisc_ops); unregister_netdevice_notifier(&cbs_device_notifier); } module_init(cbs_module_init) module_exit(cbs_module_exit) MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Credit Based shaper");
62 63 63 10 10 4 4 62 63 2 2 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 /* * Copyright (c) 2014 Samsung Electronics Co., Ltd * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sub license, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include <linux/debugfs.h> #include <linux/err.h> #include <linux/export.h> #include <linux/media-bus-format.h> #include <linux/module.h> #include <linux/mutex.h> #include <drm/drm_atomic_state_helper.h> #include <drm/drm_bridge.h> #include <drm/drm_debugfs.h> #include <drm/drm_edid.h> #include <drm/drm_encoder.h> #include <drm/drm_file.h> #include <drm/drm_of.h> #include <drm/drm_print.h> #include "drm_crtc_internal.h" /** * DOC: overview * * &struct drm_bridge represents a device that hangs on to an encoder. These are * handy when a regular &drm_encoder entity isn't enough to represent the entire * encoder chain. * * A bridge is always attached to a single &drm_encoder at a time, but can be * either connected to it directly, or through a chain of bridges:: * * [ CRTC ---> ] Encoder ---> Bridge A ---> Bridge B * * Here, the output of the encoder feeds to bridge A, and that furthers feeds to * bridge B. Bridge chains can be arbitrarily long, and shall be fully linear: * Chaining multiple bridges to the output of a bridge, or the same bridge to * the output of different bridges, is not supported. * * &drm_bridge, like &drm_panel, aren't &drm_mode_object entities like planes, * CRTCs, encoders or connectors and hence are not visible to userspace. They * just provide additional hooks to get the desired output at the end of the * encoder chain. */ /** * DOC: display driver integration * * Display drivers are responsible for linking encoders with the first bridge * in the chains. This is done by acquiring the appropriate bridge with * devm_drm_of_get_bridge(). Once acquired, the bridge shall be attached to the * encoder with a call to drm_bridge_attach(). * * Bridges are responsible for linking themselves with the next bridge in the * chain, if any. This is done the same way as for encoders, with the call to * drm_bridge_attach() occurring in the &drm_bridge_funcs.attach operation. * * Once these links are created, the bridges can participate along with encoder * functions to perform mode validation and fixup (through * drm_bridge_chain_mode_valid() and drm_atomic_bridge_chain_check()), mode * setting (through drm_bridge_chain_mode_set()), enable (through * drm_atomic_bridge_chain_pre_enable() and drm_atomic_bridge_chain_enable()) * and disable (through drm_atomic_bridge_chain_disable() and * drm_atomic_bridge_chain_post_disable()). Those functions call the * corresponding operations provided in &drm_bridge_funcs in sequence for all * bridges in the chain. * * For display drivers that use the atomic helpers * drm_atomic_helper_check_modeset(), * drm_atomic_helper_commit_modeset_enables() and * drm_atomic_helper_commit_modeset_disables() (either directly in hand-rolled * commit check and commit tail handlers, or through the higher-level * drm_atomic_helper_check() and drm_atomic_helper_commit_tail() or * drm_atomic_helper_commit_tail_rpm() helpers), this is done transparently and * requires no intervention from the driver. For other drivers, the relevant * DRM bridge chain functions shall be called manually. * * Bridges also participate in implementing the &drm_connector at the end of * the bridge chain. Display drivers may use the drm_bridge_connector_init() * helper to create the &drm_connector, or implement it manually on top of the * connector-related operations exposed by the bridge (see the overview * documentation of bridge operations for more details). */ /** * DOC: special care dsi * * The interaction between the bridges and other frameworks involved in * the probing of the upstream driver and the bridge driver can be * challenging. Indeed, there's multiple cases that needs to be * considered: * * - The upstream driver doesn't use the component framework and isn't a * MIPI-DSI host. In this case, the bridge driver will probe at some * point and the upstream driver should try to probe again by returning * EPROBE_DEFER as long as the bridge driver hasn't probed. * * - The upstream driver doesn't use the component framework, but is a * MIPI-DSI host. The bridge device uses the MIPI-DCS commands to be * controlled. In this case, the bridge device is a child of the * display device and when it will probe it's assured that the display * device (and MIPI-DSI host) is present. The upstream driver will be * assured that the bridge driver is connected between the * &mipi_dsi_host_ops.attach and &mipi_dsi_host_ops.detach operations. * Therefore, it must run mipi_dsi_host_register() in its probe * function, and then run drm_bridge_attach() in its * &mipi_dsi_host_ops.attach hook. * * - The upstream driver uses the component framework and is a MIPI-DSI * host. The bridge device uses the MIPI-DCS commands to be * controlled. This is the same situation than above, and can run * mipi_dsi_host_register() in either its probe or bind hooks. * * - The upstream driver uses the component framework and is a MIPI-DSI * host. The bridge device uses a separate bus (such as I2C) to be * controlled. In this case, there's no correlation between the probe * of the bridge and upstream drivers, so care must be taken to avoid * an endless EPROBE_DEFER loop, with each driver waiting for the * other to probe. * * The ideal pattern to cover the last item (and all the others in the * MIPI-DSI host driver case) is to split the operations like this: * * - The MIPI-DSI host driver must run mipi_dsi_host_register() in its * probe hook. It will make sure that the MIPI-DSI host sticks around, * and that the driver's bind can be called. * * - In its probe hook, the bridge driver must try to find its MIPI-DSI * host, register as a MIPI-DSI device and attach the MIPI-DSI device * to its host. The bridge driver is now functional. * * - In its &struct mipi_dsi_host_ops.attach hook, the MIPI-DSI host can * now add its component. Its bind hook will now be called and since * the bridge driver is attached and registered, we can now look for * and attach it. * * At this point, we're now certain that both the upstream driver and * the bridge driver are functional and we can't have a deadlock-like * situation when probing. */ /** * DOC: dsi bridge operations * * DSI host interfaces are expected to be implemented as bridges rather than * encoders, however there are a few aspects of their operation that need to * be defined in order to provide a consistent interface. * * A DSI host should keep the PHY powered down until the pre_enable operation is * called. All lanes are in an undefined idle state up to this point, and it * must not be assumed that it is LP-11. * pre_enable should initialise the PHY, set the data lanes to LP-11, and the * clock lane to either LP-11 or HS depending on the mode_flag * %MIPI_DSI_CLOCK_NON_CONTINUOUS. * * Ordinarily the downstream bridge DSI peripheral pre_enable will have been * called before the DSI host. If the DSI peripheral requires LP-11 and/or * the clock lane to be in HS mode prior to pre_enable, then it can set the * &pre_enable_prev_first flag to request the pre_enable (and * post_disable) order to be altered to enable the DSI host first. * * Either the CRTC being enabled, or the DSI host enable operation should switch * the host to actively transmitting video on the data lanes. * * The reverse also applies. The DSI host disable operation or stopping the CRTC * should stop transmitting video, and the data lanes should return to the LP-11 * state. The DSI host &post_disable operation should disable the PHY. * If the &pre_enable_prev_first flag is set, then the DSI peripheral's * bridge &post_disable will be called before the DSI host's post_disable. * * Whilst it is valid to call &host_transfer prior to pre_enable or after * post_disable, the exact state of the lanes is undefined at this point. The * DSI host should initialise the interface, transmit the data, and then disable * the interface again. * * Ultra Low Power State (ULPS) is not explicitly supported by DRM. If * implemented, it therefore needs to be handled entirely within the DSI Host * driver. */ static DEFINE_MUTEX(bridge_lock); static LIST_HEAD(bridge_list); static void __drm_bridge_free(struct kref *kref) { struct drm_bridge *bridge = container_of(kref, struct drm_bridge, refcount); if (bridge->funcs->destroy) bridge->funcs->destroy(bridge); kfree(bridge->container); } /** * drm_bridge_get - Acquire a bridge reference * @bridge: DRM bridge * * This function increments the bridge's refcount. * * Returns: * Pointer to @bridge. */ struct drm_bridge *drm_bridge_get(struct drm_bridge *bridge) { if (bridge) kref_get(&bridge->refcount); return bridge; } EXPORT_SYMBOL(drm_bridge_get); /** * drm_bridge_put - Release a bridge reference * @bridge: DRM bridge * * This function decrements the bridge's reference count and frees the * object if the reference count drops to zero. */ void drm_bridge_put(struct drm_bridge *bridge) { if (bridge) kref_put(&bridge->refcount, __drm_bridge_free); } EXPORT_SYMBOL(drm_bridge_put); /** * drm_bridge_put_void - wrapper to drm_bridge_put() taking a void pointer * * @data: pointer to @struct drm_bridge, cast to a void pointer * * Wrapper of drm_bridge_put() to be used when a function taking a void * pointer is needed, for example as a devm action. */ static void drm_bridge_put_void(void *data) { struct drm_bridge *bridge = (struct drm_bridge *)data; drm_bridge_put(bridge); } void *__devm_drm_bridge_alloc(struct device *dev, size_t size, size_t offset, const struct drm_bridge_funcs *funcs) { void *container; struct drm_bridge *bridge; int err; if (!funcs) { dev_warn(dev, "Missing funcs pointer\n"); return ERR_PTR(-EINVAL); } container = kzalloc(size, GFP_KERNEL); if (!container) return ERR_PTR(-ENOMEM); bridge = container + offset; bridge->container = container; bridge->funcs = funcs; kref_init(&bridge->refcount); err = devm_add_action_or_reset(dev, drm_bridge_put_void, bridge); if (err) return ERR_PTR(err); return container; } EXPORT_SYMBOL(__devm_drm_bridge_alloc); /** * drm_bridge_add - add the given bridge to the global bridge list * * @bridge: bridge control structure * * The bridge to be added must have been allocated by * devm_drm_bridge_alloc(). */ void drm_bridge_add(struct drm_bridge *bridge) { if (!bridge->container) DRM_WARN("DRM bridge corrupted or not allocated by devm_drm_bridge_alloc()\n"); drm_bridge_get(bridge); mutex_init(&bridge->hpd_mutex); if (bridge->ops & DRM_BRIDGE_OP_HDMI) bridge->ycbcr_420_allowed = !!(bridge->supported_formats & BIT(HDMI_COLORSPACE_YUV420)); mutex_lock(&bridge_lock); list_add_tail(&bridge->list, &bridge_list); mutex_unlock(&bridge_lock); } EXPORT_SYMBOL(drm_bridge_add); static void drm_bridge_remove_void(void *bridge) { drm_bridge_remove(bridge); } /** * devm_drm_bridge_add - devm managed version of drm_bridge_add() * * @dev: device to tie the bridge lifetime to * @bridge: bridge control structure * * This is the managed version of drm_bridge_add() which automatically * calls drm_bridge_remove() when @dev is unbound. * * Return: 0 if no error or negative error code. */ int devm_drm_bridge_add(struct device *dev, struct drm_bridge *bridge) { drm_bridge_add(bridge); return devm_add_action_or_reset(dev, drm_bridge_remove_void, bridge); } EXPORT_SYMBOL(devm_drm_bridge_add); /** * drm_bridge_remove - remove the given bridge from the global bridge list * * @bridge: bridge control structure */ void drm_bridge_remove(struct drm_bridge *bridge) { mutex_lock(&bridge_lock); list_del_init(&bridge->list); mutex_unlock(&bridge_lock); mutex_destroy(&bridge->hpd_mutex); drm_bridge_put(bridge); } EXPORT_SYMBOL(drm_bridge_remove); static struct drm_private_state * drm_bridge_atomic_duplicate_priv_state(struct drm_private_obj *obj) { struct drm_bridge *bridge = drm_priv_to_bridge(obj); struct drm_bridge_state *state; state = bridge->funcs->atomic_duplicate_state(bridge); return state ? &state->base : NULL; } static void drm_bridge_atomic_destroy_priv_state(struct drm_private_obj *obj, struct drm_private_state *s) { struct drm_bridge_state *state = drm_priv_to_bridge_state(s); struct drm_bridge *bridge = drm_priv_to_bridge(obj); bridge->funcs->atomic_destroy_state(bridge, state); } static const struct drm_private_state_funcs drm_bridge_priv_state_funcs = { .atomic_duplicate_state = drm_bridge_atomic_duplicate_priv_state, .atomic_destroy_state = drm_bridge_atomic_destroy_priv_state, }; static bool drm_bridge_is_atomic(struct drm_bridge *bridge) { return bridge->funcs->atomic_reset != NULL; } /** * drm_bridge_attach - attach the bridge to an encoder's chain * * @encoder: DRM encoder * @bridge: bridge to attach * @previous: previous bridge in the chain (optional) * @flags: DRM_BRIDGE_ATTACH_* flags * * Called by a kms driver to link the bridge to an encoder's chain. The previous * argument specifies the previous bridge in the chain. If NULL, the bridge is * linked directly at the encoder's output. Otherwise it is linked at the * previous bridge's output. * * If non-NULL the previous bridge must be already attached by a call to this * function. * * Note that bridges attached to encoders are auto-detached during encoder * cleanup in drm_encoder_cleanup(), so drm_bridge_attach() should generally * *not* be balanced with a drm_bridge_detach() in driver code. * * RETURNS: * Zero on success, error code on failure */ int drm_bridge_attach(struct drm_encoder *encoder, struct drm_bridge *bridge, struct drm_bridge *previous, enum drm_bridge_attach_flags flags) { int ret; if (!encoder || !bridge) return -EINVAL; drm_bridge_get(bridge); if (previous && (!previous->dev || previous->encoder != encoder)) { ret = -EINVAL; goto err_put_bridge; } if (bridge->dev) { ret = -EBUSY; goto err_put_bridge; } bridge->dev = encoder->dev; bridge->encoder = encoder; if (previous) list_add(&bridge->chain_node, &previous->chain_node); else list_add(&bridge->chain_node, &encoder->bridge_chain); if (bridge->funcs->attach) { ret = bridge->funcs->attach(bridge, encoder, flags); if (ret < 0) goto err_reset_bridge; } if (drm_bridge_is_atomic(bridge)) { struct drm_bridge_state *state; state = bridge->funcs->atomic_reset(bridge); if (IS_ERR(state)) { ret = PTR_ERR(state); goto err_detach_bridge; } drm_atomic_private_obj_init(bridge->dev, &bridge->base, &state->base, &drm_bridge_priv_state_funcs); } return 0; err_detach_bridge: if (bridge->funcs->detach) bridge->funcs->detach(bridge); err_reset_bridge: bridge->dev = NULL; bridge->encoder = NULL; list_del(&bridge->chain_node); if (ret != -EPROBE_DEFER) DRM_ERROR("failed to attach bridge %pOF to encoder %s: %d\n", bridge->of_node, encoder->name, ret); else dev_err_probe(encoder->dev->dev, -EPROBE_DEFER, "failed to attach bridge %pOF to encoder %s\n", bridge->of_node, encoder->name); err_put_bridge: drm_bridge_put(bridge); return ret; } EXPORT_SYMBOL(drm_bridge_attach); void drm_bridge_detach(struct drm_bridge *bridge) { if (WARN_ON(!bridge)) return; if (WARN_ON(!bridge->dev)) return; if (drm_bridge_is_atomic(bridge)) drm_atomic_private_obj_fini(&bridge->base); if (bridge->funcs->detach) bridge->funcs->detach(bridge); list_del(&bridge->chain_node); bridge->dev = NULL; drm_bridge_put(bridge); } /** * DOC: bridge operations * * Bridge drivers expose operations through the &drm_bridge_funcs structure. * The DRM internals (atomic and CRTC helpers) use the helpers defined in * drm_bridge.c to call bridge operations. Those operations are divided in * three big categories to support different parts of the bridge usage. * * - The encoder-related operations support control of the bridges in the * chain, and are roughly counterparts to the &drm_encoder_helper_funcs * operations. They are used by the legacy CRTC and the atomic modeset * helpers to perform mode validation, fixup and setting, and enable and * disable the bridge automatically. * * The enable and disable operations are split in * &drm_bridge_funcs.pre_enable, &drm_bridge_funcs.enable, * &drm_bridge_funcs.disable and &drm_bridge_funcs.post_disable to provide * finer-grained control. * * Bridge drivers may implement the legacy version of those operations, or * the atomic version (prefixed with atomic\_), in which case they shall also * implement the atomic state bookkeeping operations * (&drm_bridge_funcs.atomic_duplicate_state, * &drm_bridge_funcs.atomic_destroy_state and &drm_bridge_funcs.reset). * Mixing atomic and non-atomic versions of the operations is not supported. * * - The bus format negotiation operations * &drm_bridge_funcs.atomic_get_output_bus_fmts and * &drm_bridge_funcs.atomic_get_input_bus_fmts allow bridge drivers to * negotiate the formats transmitted between bridges in the chain when * multiple formats are supported. Negotiation for formats is performed * transparently for display drivers by the atomic modeset helpers. Only * atomic versions of those operations exist, bridge drivers that need to * implement them shall thus also implement the atomic version of the * encoder-related operations. This feature is not supported by the legacy * CRTC helpers. * * - The connector-related operations support implementing a &drm_connector * based on a chain of bridges. DRM bridges traditionally create a * &drm_connector for bridges meant to be used at the end of the chain. This * puts additional burden on bridge drivers, especially for bridges that may * be used in the middle of a chain or at the end of it. Furthermore, it * requires all operations of the &drm_connector to be handled by a single * bridge, which doesn't always match the hardware architecture. * * To simplify bridge drivers and make the connector implementation more * flexible, a new model allows bridges to unconditionally skip creation of * &drm_connector and instead expose &drm_bridge_funcs operations to support * an externally-implemented &drm_connector. Those operations are * &drm_bridge_funcs.detect, &drm_bridge_funcs.get_modes, * &drm_bridge_funcs.get_edid, &drm_bridge_funcs.hpd_notify, * &drm_bridge_funcs.hpd_enable and &drm_bridge_funcs.hpd_disable. When * implemented, display drivers shall create a &drm_connector instance for * each chain of bridges, and implement those connector instances based on * the bridge connector operations. * * Bridge drivers shall implement the connector-related operations for all * the features that the bridge hardware support. For instance, if a bridge * supports reading EDID, the &drm_bridge_funcs.get_edid shall be * implemented. This however doesn't mean that the DDC lines are wired to the * bridge on a particular platform, as they could also be connected to an I2C * controller of the SoC. Support for the connector-related operations on the * running platform is reported through the &drm_bridge.ops flags. Bridge * drivers shall detect which operations they can support on the platform * (usually this information is provided by ACPI or DT), and set the * &drm_bridge.ops flags for all supported operations. A flag shall only be * set if the corresponding &drm_bridge_funcs operation is implemented, but * an implemented operation doesn't necessarily imply that the corresponding * flag will be set. Display drivers shall use the &drm_bridge.ops flags to * decide which bridge to delegate a connector operation to. This mechanism * allows providing a single static const &drm_bridge_funcs instance in * bridge drivers, improving security by storing function pointers in * read-only memory. * * In order to ease transition, bridge drivers may support both the old and * new models by making connector creation optional and implementing the * connected-related bridge operations. Connector creation is then controlled * by the flags argument to the drm_bridge_attach() function. Display drivers * that support the new model and create connectors themselves shall set the * %DRM_BRIDGE_ATTACH_NO_CONNECTOR flag, and bridge drivers shall then skip * connector creation. For intermediate bridges in the chain, the flag shall * be passed to the drm_bridge_attach() call for the downstream bridge. * Bridge drivers that implement the new model only shall return an error * from their &drm_bridge_funcs.attach handler when the * %DRM_BRIDGE_ATTACH_NO_CONNECTOR flag is not set. New display drivers * should use the new model, and convert the bridge drivers they use if * needed, in order to gradually transition to the new model. */ /** * drm_bridge_chain_mode_valid - validate the mode against all bridges in the * encoder chain. * @bridge: bridge control structure * @info: display info against which the mode shall be validated * @mode: desired mode to be validated * * Calls &drm_bridge_funcs.mode_valid for all the bridges in the encoder * chain, starting from the first bridge to the last. If at least one bridge * does not accept the mode the function returns the error code. * * Note: the bridge passed should be the one closest to the encoder. * * RETURNS: * MODE_OK on success, drm_mode_status Enum error code on failure */ enum drm_mode_status drm_bridge_chain_mode_valid(struct drm_bridge *bridge, const struct drm_display_info *info, const struct drm_display_mode *mode) { struct drm_encoder *encoder; if (!bridge) return MODE_OK; encoder = bridge->encoder; list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) { enum drm_mode_status ret; if (!bridge->funcs->mode_valid) continue; ret = bridge->funcs->mode_valid(bridge, info, mode); if (ret != MODE_OK) return ret; } return MODE_OK; } EXPORT_SYMBOL(drm_bridge_chain_mode_valid); /** * drm_bridge_chain_mode_set - set proposed mode for all bridges in the * encoder chain * @bridge: bridge control structure * @mode: desired mode to be set for the encoder chain * @adjusted_mode: updated mode that works for this encoder chain * * Calls &drm_bridge_funcs.mode_set op for all the bridges in the * encoder chain, starting from the first bridge to the last. * * Note: the bridge passed should be the one closest to the encoder */ void drm_bridge_chain_mode_set(struct drm_bridge *bridge, const struct drm_display_mode *mode, const struct drm_display_mode *adjusted_mode) { struct drm_encoder *encoder; if (!bridge) return; encoder = bridge->encoder; list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) { if (bridge->funcs->mode_set) bridge->funcs->mode_set(bridge, mode, adjusted_mode); } } EXPORT_SYMBOL(drm_bridge_chain_mode_set); /** * drm_atomic_bridge_chain_disable - disables all bridges in the encoder chain * @bridge: bridge control structure * @state: atomic state being committed * * Calls &drm_bridge_funcs.atomic_disable (falls back on * &drm_bridge_funcs.disable) op for all the bridges in the encoder chain, * starting from the last bridge to the first. These are called before calling * &drm_encoder_helper_funcs.atomic_disable * * Note: the bridge passed should be the one closest to the encoder */ void drm_atomic_bridge_chain_disable(struct drm_bridge *bridge, struct drm_atomic_state *state) { struct drm_encoder *encoder; struct drm_bridge *iter; if (!bridge) return; encoder = bridge->encoder; list_for_each_entry_reverse(iter, &encoder->bridge_chain, chain_node) { if (iter->funcs->atomic_disable) { iter->funcs->atomic_disable(iter, state); } else if (iter->funcs->disable) { iter->funcs->disable(iter); } if (iter == bridge) break; } } EXPORT_SYMBOL(drm_atomic_bridge_chain_disable); static void drm_atomic_bridge_call_post_disable(struct drm_bridge *bridge, struct drm_atomic_state *state) { if (state && bridge->funcs->atomic_post_disable) bridge->funcs->atomic_post_disable(bridge, state); else if (bridge->funcs->post_disable) bridge->funcs->post_disable(bridge); } /** * drm_atomic_bridge_chain_post_disable - cleans up after disabling all bridges * in the encoder chain * @bridge: bridge control structure * @state: atomic state being committed * * Calls &drm_bridge_funcs.atomic_post_disable (falls back on * &drm_bridge_funcs.post_disable) op for all the bridges in the encoder chain, * starting from the first bridge to the last. These are called after completing * &drm_encoder_helper_funcs.atomic_disable * * If a bridge sets @pre_enable_prev_first, then the @post_disable for that * bridge will be called before the previous one to reverse the @pre_enable * calling direction. * * Example: * Bridge A ---> Bridge B ---> Bridge C ---> Bridge D ---> Bridge E * * With pre_enable_prev_first flag enable in Bridge B, D, E then the resulting * @post_disable order would be, * Bridge B, Bridge A, Bridge E, Bridge D, Bridge C. * * Note: the bridge passed should be the one closest to the encoder */ void drm_atomic_bridge_chain_post_disable(struct drm_bridge *bridge, struct drm_atomic_state *state) { struct drm_encoder *encoder; struct drm_bridge *next, *limit; if (!bridge) return; encoder = bridge->encoder; list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) { limit = NULL; if (!list_is_last(&bridge->chain_node, &encoder->bridge_chain)) { next = list_next_entry(bridge, chain_node); if (next->pre_enable_prev_first) { /* next bridge had requested that prev * was enabled first, so disabled last */ limit = next; /* Find the next bridge that has NOT requested * prev to be enabled first / disabled last */ list_for_each_entry_from(next, &encoder->bridge_chain, chain_node) { if (!next->pre_enable_prev_first) { next = list_prev_entry(next, chain_node); limit = next; break; } if (list_is_last(&next->chain_node, &encoder->bridge_chain)) { limit = next; break; } } /* Call these bridges in reverse order */ list_for_each_entry_from_reverse(next, &encoder->bridge_chain, chain_node) { if (next == bridge) break; drm_atomic_bridge_call_post_disable(next, state); } } } drm_atomic_bridge_call_post_disable(bridge, state); if (limit) /* Jump all bridges that we have already post_disabled */ bridge = limit; } } EXPORT_SYMBOL(drm_atomic_bridge_chain_post_disable); static void drm_atomic_bridge_call_pre_enable(struct drm_bridge *bridge, struct drm_atomic_state *state) { if (state && bridge->funcs->atomic_pre_enable) bridge->funcs->atomic_pre_enable(bridge, state); else if (bridge->funcs->pre_enable) bridge->funcs->pre_enable(bridge); } /** * drm_atomic_bridge_chain_pre_enable - prepares for enabling all bridges in * the encoder chain * @bridge: bridge control structure * @state: atomic state being committed * * Calls &drm_bridge_funcs.atomic_pre_enable (falls back on * &drm_bridge_funcs.pre_enable) op for all the bridges in the encoder chain, * starting from the last bridge to the first. These are called before calling * &drm_encoder_helper_funcs.atomic_enable * * If a bridge sets @pre_enable_prev_first, then the pre_enable for the * prev bridge will be called before pre_enable of this bridge. * * Example: * Bridge A ---> Bridge B ---> Bridge C ---> Bridge D ---> Bridge E * * With pre_enable_prev_first flag enable in Bridge B, D, E then the resulting * @pre_enable order would be, * Bridge C, Bridge D, Bridge E, Bridge A, Bridge B. * * Note: the bridge passed should be the one closest to the encoder */ void drm_atomic_bridge_chain_pre_enable(struct drm_bridge *bridge, struct drm_atomic_state *state) { struct drm_encoder *encoder; struct drm_bridge *iter, *next, *limit; if (!bridge) return; encoder = bridge->encoder; list_for_each_entry_reverse(iter, &encoder->bridge_chain, chain_node) { if (iter->pre_enable_prev_first) { next = iter; limit = bridge; list_for_each_entry_from_reverse(next, &encoder->bridge_chain, chain_node) { if (next == bridge) break; if (!next->pre_enable_prev_first) { /* Found first bridge that does NOT * request prev to be enabled first */ limit = next; break; } } list_for_each_entry_from(next, &encoder->bridge_chain, chain_node) { /* Call requested prev bridge pre_enable * in order. */ if (next == iter) /* At the first bridge to request prev * bridges called first. */ break; drm_atomic_bridge_call_pre_enable(next, state); } } drm_atomic_bridge_call_pre_enable(iter, state); if (iter->pre_enable_prev_first) /* Jump all bridges that we have already pre_enabled */ iter = limit; if (iter == bridge) break; } } EXPORT_SYMBOL(drm_atomic_bridge_chain_pre_enable); /** * drm_atomic_bridge_chain_enable - enables all bridges in the encoder chain * @bridge: bridge control structure * @state: atomic state being committed * * Calls &drm_bridge_funcs.atomic_enable (falls back on * &drm_bridge_funcs.enable) op for all the bridges in the encoder chain, * starting from the first bridge to the last. These are called after completing * &drm_encoder_helper_funcs.atomic_enable * * Note: the bridge passed should be the one closest to the encoder */ void drm_atomic_bridge_chain_enable(struct drm_bridge *bridge, struct drm_atomic_state *state) { struct drm_encoder *encoder; if (!bridge) return; encoder = bridge->encoder; list_for_each_entry_from(bridge, &encoder->bridge_chain, chain_node) { if (bridge->funcs->atomic_enable) { bridge->funcs->atomic_enable(bridge, state); } else if (bridge->funcs->enable) { bridge->funcs->enable(bridge); } } } EXPORT_SYMBOL(drm_atomic_bridge_chain_enable); static int drm_atomic_bridge_check(struct drm_bridge *bridge, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) { if (bridge->funcs->atomic_check) { struct drm_bridge_state *bridge_state; int ret; bridge_state = drm_atomic_get_new_bridge_state(crtc_state->state, bridge); if (WARN_ON(!bridge_state)) return -EINVAL; ret = bridge->funcs->atomic_check(bridge, bridge_state, crtc_state, conn_state); if (ret) return ret; } else if (bridge->funcs->mode_fixup) { if (!bridge->funcs->mode_fixup(bridge, &crtc_state->mode, &crtc_state->adjusted_mode)) return -EINVAL; } return 0; } static int select_bus_fmt_recursive(struct drm_bridge *first_bridge, struct drm_bridge *cur_bridge, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state, u32 out_bus_fmt) { unsigned int i, num_in_bus_fmts = 0; struct drm_bridge_state *cur_state; struct drm_bridge *prev_bridge __free(drm_bridge_put) = drm_bridge_get_prev_bridge(cur_bridge); u32 *in_bus_fmts; int ret; cur_state = drm_atomic_get_new_bridge_state(crtc_state->state, cur_bridge); /* * If bus format negotiation is not supported by this bridge, let's * pass MEDIA_BUS_FMT_FIXED to the previous bridge in the chain and * hope that it can handle this situation gracefully (by providing * appropriate default values). */ if (!cur_bridge->funcs->atomic_get_input_bus_fmts) { if (cur_bridge != first_bridge) { ret = select_bus_fmt_recursive(first_bridge, prev_bridge, crtc_state, conn_state, MEDIA_BUS_FMT_FIXED); if (ret) return ret; } /* * Driver does not implement the atomic state hooks, but that's * fine, as long as it does not access the bridge state. */ if (cur_state) { cur_state->input_bus_cfg.format = MEDIA_BUS_FMT_FIXED; cur_state->output_bus_cfg.format = out_bus_fmt; } return 0; } /* * If the driver implements ->atomic_get_input_bus_fmts() it * should also implement the atomic state hooks. */ if (WARN_ON(!cur_state)) return -EINVAL; in_bus_fmts = cur_bridge->funcs->atomic_get_input_bus_fmts(cur_bridge, cur_state, crtc_state, conn_state, out_bus_fmt, &num_in_bus_fmts); if (!num_in_bus_fmts) return -ENOTSUPP; else if (!in_bus_fmts) return -ENOMEM; if (first_bridge == cur_bridge) { cur_state->input_bus_cfg.format = in_bus_fmts[0]; cur_state->output_bus_cfg.format = out_bus_fmt; kfree(in_bus_fmts); return 0; } for (i = 0; i < num_in_bus_fmts; i++) { ret = select_bus_fmt_recursive(first_bridge, prev_bridge, crtc_state, conn_state, in_bus_fmts[i]); if (ret != -ENOTSUPP) break; } if (!ret) { cur_state->input_bus_cfg.format = in_bus_fmts[i]; cur_state->output_bus_cfg.format = out_bus_fmt; } kfree(in_bus_fmts); return ret; } /* * This function is called by &drm_atomic_bridge_chain_check() just before * calling &drm_bridge_funcs.atomic_check() on all elements of the chain. * It performs bus format negotiation between bridge elements. The negotiation * happens in reverse order, starting from the last element in the chain up to * @bridge. * * Negotiation starts by retrieving supported output bus formats on the last * bridge element and testing them one by one. The test is recursive, meaning * that for each tested output format, the whole chain will be walked backward, * and each element will have to choose an input bus format that can be * transcoded to the requested output format. When a bridge element does not * support transcoding into a specific output format -ENOTSUPP is returned and * the next bridge element will have to try a different format. If none of the * combinations worked, -ENOTSUPP is returned and the atomic modeset will fail. * * This implementation is relying on * &drm_bridge_funcs.atomic_get_output_bus_fmts() and * &drm_bridge_funcs.atomic_get_input_bus_fmts() to gather supported * input/output formats. * * When &drm_bridge_funcs.atomic_get_output_bus_fmts() is not implemented by * the last element of the chain, &drm_atomic_bridge_chain_select_bus_fmts() * tries a single format: &drm_connector.display_info.bus_formats[0] if * available, MEDIA_BUS_FMT_FIXED otherwise. * * When &drm_bridge_funcs.atomic_get_input_bus_fmts() is not implemented, * &drm_atomic_bridge_chain_select_bus_fmts() skips the negotiation on the * bridge element that lacks this hook and asks the previous element in the * chain to try MEDIA_BUS_FMT_FIXED. It's up to bridge drivers to decide what * to do in that case (fail if they want to enforce bus format negotiation, or * provide a reasonable default if they need to support pipelines where not * all elements support bus format negotiation). */ static int drm_atomic_bridge_chain_select_bus_fmts(struct drm_bridge *bridge, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) { struct drm_connector *conn = conn_state->connector; struct drm_encoder *encoder = bridge->encoder; struct drm_bridge_state *last_bridge_state; unsigned int i, num_out_bus_fmts = 0; struct drm_bridge *last_bridge; u32 *out_bus_fmts; int ret = 0; last_bridge = list_last_entry(&encoder->bridge_chain, struct drm_bridge, chain_node); last_bridge_state = drm_atomic_get_new_bridge_state(crtc_state->state, last_bridge); if (last_bridge->funcs->atomic_get_output_bus_fmts) { const struct drm_bridge_funcs *funcs = last_bridge->funcs; /* * If the driver implements ->atomic_get_output_bus_fmts() it * should also implement the atomic state hooks. */ if (WARN_ON(!last_bridge_state)) return -EINVAL; out_bus_fmts = funcs->atomic_get_output_bus_fmts(last_bridge, last_bridge_state, crtc_state, conn_state, &num_out_bus_fmts); if (!num_out_bus_fmts) return -ENOTSUPP; else if (!out_bus_fmts) return -ENOMEM; } else { num_out_bus_fmts = 1; out_bus_fmts = kmalloc(sizeof(*out_bus_fmts), GFP_KERNEL); if (!out_bus_fmts) return -ENOMEM; if (conn->display_info.num_bus_formats && conn->display_info.bus_formats) out_bus_fmts[0] = conn->display_info.bus_formats[0]; else out_bus_fmts[0] = MEDIA_BUS_FMT_FIXED; } for (i = 0; i < num_out_bus_fmts; i++) { ret = select_bus_fmt_recursive(bridge, last_bridge, crtc_state, conn_state, out_bus_fmts[i]); if (ret != -ENOTSUPP) break; } kfree(out_bus_fmts); return ret; } static void drm_atomic_bridge_propagate_bus_flags(struct drm_bridge *bridge, struct drm_connector *conn, struct drm_atomic_state *state) { struct drm_bridge_state *bridge_state, *next_bridge_state; struct drm_bridge *next_bridge; u32 output_flags = 0; bridge_state = drm_atomic_get_new_bridge_state(state, bridge); /* No bridge state attached to this bridge => nothing to propagate. */ if (!bridge_state) return; next_bridge = drm_bridge_get_next_bridge(bridge); /* * Let's try to apply the most common case here, that is, propagate * display_info flags for the last bridge, and propagate the input * flags of the next bridge element to the output end of the current * bridge when the bridge is not the last one. * There are exceptions to this rule, like when signal inversion is * happening at the board level, but that's something drivers can deal * with from their &drm_bridge_funcs.atomic_check() implementation by * simply overriding the flags value we've set here. */ if (!next_bridge) { output_flags = conn->display_info.bus_flags; } else { next_bridge_state = drm_atomic_get_new_bridge_state(state, next_bridge); /* * No bridge state attached to the next bridge, just leave the * flags to 0. */ if (next_bridge_state) output_flags = next_bridge_state->input_bus_cfg.flags; } bridge_state->output_bus_cfg.flags = output_flags; /* * Propagate the output flags to the input end of the bridge. Again, it's * not necessarily what all bridges want, but that's what most of them * do, and by doing that by default we avoid forcing drivers to * duplicate the "dummy propagation" logic. */ bridge_state->input_bus_cfg.flags = output_flags; } /** * drm_atomic_bridge_chain_check() - Do an atomic check on the bridge chain * @bridge: bridge control structure * @crtc_state: new CRTC state * @conn_state: new connector state * * First trigger a bus format negotiation before calling * &drm_bridge_funcs.atomic_check() (falls back on * &drm_bridge_funcs.mode_fixup()) op for all the bridges in the encoder chain, * starting from the last bridge to the first. These are called before calling * &drm_encoder_helper_funcs.atomic_check() * * RETURNS: * 0 on success, a negative error code on failure */ int drm_atomic_bridge_chain_check(struct drm_bridge *bridge, struct drm_crtc_state *crtc_state, struct drm_connector_state *conn_state) { struct drm_connector *conn = conn_state->connector; struct drm_encoder *encoder; struct drm_bridge *iter; int ret; if (!bridge) return 0; ret = drm_atomic_bridge_chain_select_bus_fmts(bridge, crtc_state, conn_state); if (ret) return ret; encoder = bridge->encoder; list_for_each_entry_reverse(iter, &encoder->bridge_chain, chain_node) { int ret; /* * Bus flags are propagated by default. If a bridge needs to * tweak the input bus flags for any reason, it should happen * in its &drm_bridge_funcs.atomic_check() implementation such * that preceding bridges in the chain can propagate the new * bus flags. */ drm_atomic_bridge_propagate_bus_flags(iter, conn, crtc_state->state); ret = drm_atomic_bridge_check(iter, crtc_state, conn_state); if (ret) return ret; if (iter == bridge) break; } return 0; } EXPORT_SYMBOL(drm_atomic_bridge_chain_check); /** * drm_bridge_detect - check if anything is attached to the bridge output * @bridge: bridge control structure * @connector: attached connector * * If the bridge supports output detection, as reported by the * DRM_BRIDGE_OP_DETECT bridge ops flag, call &drm_bridge_funcs.detect for the * bridge and return the connection status. Otherwise return * connector_status_unknown. * * RETURNS: * The detection status on success, or connector_status_unknown if the bridge * doesn't support output detection. */ enum drm_connector_status drm_bridge_detect(struct drm_bridge *bridge, struct drm_connector *connector) { if (!(bridge->ops & DRM_BRIDGE_OP_DETECT)) return connector_status_unknown; return bridge->funcs->detect(bridge, connector); } EXPORT_SYMBOL_GPL(drm_bridge_detect); /** * drm_bridge_get_modes - fill all modes currently valid for the sink into the * @connector * @bridge: bridge control structure * @connector: the connector to fill with modes * * If the bridge supports output modes retrieval, as reported by the * DRM_BRIDGE_OP_MODES bridge ops flag, call &drm_bridge_funcs.get_modes to * fill the connector with all valid modes and return the number of modes * added. Otherwise return 0. * * RETURNS: * The number of modes added to the connector. */ int drm_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector *connector) { if (!(bridge->ops & DRM_BRIDGE_OP_MODES)) return 0; return bridge->funcs->get_modes(bridge, connector); } EXPORT_SYMBOL_GPL(drm_bridge_get_modes); /** * drm_bridge_edid_read - read the EDID data of the connected display * @bridge: bridge control structure * @connector: the connector to read EDID for * * If the bridge supports output EDID retrieval, as reported by the * DRM_BRIDGE_OP_EDID bridge ops flag, call &drm_bridge_funcs.edid_read to get * the EDID and return it. Otherwise return NULL. * * RETURNS: * The retrieved EDID on success, or NULL otherwise. */ const struct drm_edid *drm_bridge_edid_read(struct drm_bridge *bridge, struct drm_connector *connector) { if (!(bridge->ops & DRM_BRIDGE_OP_EDID)) return NULL; return bridge->funcs->edid_read(bridge, connector); } EXPORT_SYMBOL_GPL(drm_bridge_edid_read); /** * drm_bridge_hpd_enable - enable hot plug detection for the bridge * @bridge: bridge control structure * @cb: hot-plug detection callback * @data: data to be passed to the hot-plug detection callback * * Call &drm_bridge_funcs.hpd_enable if implemented and register the given @cb * and @data as hot plug notification callback. From now on the @cb will be * called with @data when an output status change is detected by the bridge, * until hot plug notification gets disabled with drm_bridge_hpd_disable(). * * Hot plug detection is supported only if the DRM_BRIDGE_OP_HPD flag is set in * bridge->ops. This function shall not be called when the flag is not set. * * Only one hot plug detection callback can be registered at a time, it is an * error to call this function when hot plug detection is already enabled for * the bridge. */ void drm_bridge_hpd_enable(struct drm_bridge *bridge, void (*cb)(void *data, enum drm_connector_status status), void *data) { if (!(bridge->ops & DRM_BRIDGE_OP_HPD)) return; mutex_lock(&bridge->hpd_mutex); if (WARN(bridge->hpd_cb, "Hot plug detection already enabled\n")) goto unlock; bridge->hpd_cb = cb; bridge->hpd_data = data; if (bridge->funcs->hpd_enable) bridge->funcs->hpd_enable(bridge); unlock: mutex_unlock(&bridge->hpd_mutex); } EXPORT_SYMBOL_GPL(drm_bridge_hpd_enable); /** * drm_bridge_hpd_disable - disable hot plug detection for the bridge * @bridge: bridge control structure * * Call &drm_bridge_funcs.hpd_disable if implemented and unregister the hot * plug detection callback previously registered with drm_bridge_hpd_enable(). * Once this function returns the callback will not be called by the bridge * when an output status change occurs. * * Hot plug detection is supported only if the DRM_BRIDGE_OP_HPD flag is set in * bridge->ops. This function shall not be called when the flag is not set. */ void drm_bridge_hpd_disable(struct drm_bridge *bridge) { if (!(bridge->ops & DRM_BRIDGE_OP_HPD)) return; mutex_lock(&bridge->hpd_mutex); if (bridge->funcs->hpd_disable) bridge->funcs->hpd_disable(bridge); bridge->hpd_cb = NULL; bridge->hpd_data = NULL; mutex_unlock(&bridge->hpd_mutex); } EXPORT_SYMBOL_GPL(drm_bridge_hpd_disable); /** * drm_bridge_hpd_notify - notify hot plug detection events * @bridge: bridge control structure * @status: output connection status * * Bridge drivers shall call this function to report hot plug events when they * detect a change in the output status, when hot plug detection has been * enabled by drm_bridge_hpd_enable(). * * This function shall be called in a context that can sleep. */ void drm_bridge_hpd_notify(struct drm_bridge *bridge, enum drm_connector_status status) { mutex_lock(&bridge->hpd_mutex); if (bridge->hpd_cb) bridge->hpd_cb(bridge->hpd_data, status); mutex_unlock(&bridge->hpd_mutex); } EXPORT_SYMBOL_GPL(drm_bridge_hpd_notify); #ifdef CONFIG_OF /** * of_drm_find_bridge - find the bridge corresponding to the device node in * the global bridge list * * @np: device node * * RETURNS: * drm_bridge control struct on success, NULL on failure */ struct drm_bridge *of_drm_find_bridge(struct device_node *np) { struct drm_bridge *bridge; mutex_lock(&bridge_lock); list_for_each_entry(bridge, &bridge_list, list) { if (bridge->of_node == np) { mutex_unlock(&bridge_lock); return bridge; } } mutex_unlock(&bridge_lock); return NULL; } EXPORT_SYMBOL(of_drm_find_bridge); #endif /** * devm_drm_put_bridge - Release a bridge reference obtained via devm * @dev: device that got the bridge via devm * @bridge: pointer to a struct drm_bridge obtained via devm * * Same as drm_bridge_put() for bridge pointers obtained via devm functions * such as devm_drm_bridge_alloc(). * * This function is a temporary workaround and MUST NOT be used. Manual * handling of bridge lifetime is inherently unsafe. */ void devm_drm_put_bridge(struct device *dev, struct drm_bridge *bridge) { devm_release_action(dev, drm_bridge_put_void, bridge); } EXPORT_SYMBOL(devm_drm_put_bridge); static void drm_bridge_debugfs_show_bridge(struct drm_printer *p, struct drm_bridge *bridge, unsigned int idx) { drm_printf(p, "bridge[%u]: %ps\n", idx, bridge->funcs); drm_printf(p, "\trefcount: %u\n", kref_read(&bridge->refcount)); drm_printf(p, "\ttype: [%d] %s\n", bridge->type, drm_get_connector_type_name(bridge->type)); if (bridge->of_node) drm_printf(p, "\tOF: %pOFfc\n", bridge->of_node); drm_printf(p, "\tops: [0x%x]", bridge->ops); if (bridge->ops & DRM_BRIDGE_OP_DETECT) drm_puts(p, " detect"); if (bridge->ops & DRM_BRIDGE_OP_EDID) drm_puts(p, " edid"); if (bridge->ops & DRM_BRIDGE_OP_HPD) drm_puts(p, " hpd"); if (bridge->ops & DRM_BRIDGE_OP_MODES) drm_puts(p, " modes"); if (bridge->ops & DRM_BRIDGE_OP_HDMI) drm_puts(p, " hdmi"); drm_puts(p, "\n"); } static int allbridges_show(struct seq_file *m, void *data) { struct drm_printer p = drm_seq_file_printer(m); struct drm_bridge *bridge; unsigned int idx = 0; mutex_lock(&bridge_lock); list_for_each_entry(bridge, &bridge_list, list) drm_bridge_debugfs_show_bridge(&p, bridge, idx++); mutex_unlock(&bridge_lock); return 0; } DEFINE_SHOW_ATTRIBUTE(allbridges); static int encoder_bridges_show(struct seq_file *m, void *data) { struct drm_encoder *encoder = m->private; struct drm_printer p = drm_seq_file_printer(m); struct drm_bridge *bridge; unsigned int idx = 0; drm_for_each_bridge_in_chain(encoder, bridge) drm_bridge_debugfs_show_bridge(&p, bridge, idx++); return 0; } DEFINE_SHOW_ATTRIBUTE(encoder_bridges); void drm_bridge_debugfs_params(struct dentry *root) { debugfs_create_file("bridges", 0444, root, NULL, &allbridges_fops); } void drm_bridge_debugfs_encoder_params(struct dentry *root, struct drm_encoder *encoder) { /* bridges list */ debugfs_create_file("bridges", 0444, root, encoder, &encoder_bridges_fops); } MODULE_AUTHOR("Ajay Kumar <ajaykumar.rs@samsung.com>"); MODULE_DESCRIPTION("DRM bridge infrastructure"); MODULE_LICENSE("GPL and additional rights");
34 34 34 34 34 34 35 34 35 35 33 4 3 3 3 3 3 3 17 16 15 15 15 15 4 4 1 4 3 3 4 1 15 15 15 15 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 // SPDX-License-Identifier: GPL-2.0-only #include <net/netdev_lock.h> #include "netlink.h" #include "common.h" #include "bitset.h" struct features_req_info { struct ethnl_req_info base; }; struct features_reply_data { struct ethnl_reply_data base; u32 hw[ETHTOOL_DEV_FEATURE_WORDS]; u32 wanted[ETHTOOL_DEV_FEATURE_WORDS]; u32 active[ETHTOOL_DEV_FEATURE_WORDS]; u32 nochange[ETHTOOL_DEV_FEATURE_WORDS]; u32 all[ETHTOOL_DEV_FEATURE_WORDS]; }; #define FEATURES_REPDATA(__reply_base) \ container_of(__reply_base, struct features_reply_data, base) const struct nla_policy ethnl_features_get_policy[] = { [ETHTOOL_A_FEATURES_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; static void ethnl_features_to_bitmap32(u32 *dest, netdev_features_t src) { unsigned int i; for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; i++) dest[i] = src >> (32 * i); } static int features_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { struct features_reply_data *data = FEATURES_REPDATA(reply_base); struct net_device *dev = reply_base->dev; netdev_features_t all_features; ethnl_features_to_bitmap32(data->hw, dev->hw_features); ethnl_features_to_bitmap32(data->wanted, dev->wanted_features); ethnl_features_to_bitmap32(data->active, dev->features); ethnl_features_to_bitmap32(data->nochange, NETIF_F_NEVER_CHANGE); all_features = GENMASK_ULL(NETDEV_FEATURE_COUNT - 1, 0); ethnl_features_to_bitmap32(data->all, all_features); return 0; } static int features_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct features_reply_data *data = FEATURES_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; unsigned int len = 0; int ret; ret = ethnl_bitset32_size(data->hw, data->all, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; len += ret; ret = ethnl_bitset32_size(data->wanted, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; len += ret; ret = ethnl_bitset32_size(data->active, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; len += ret; ret = ethnl_bitset32_size(data->nochange, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; len += ret; return len; } static int features_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct features_reply_data *data = FEATURES_REPDATA(reply_base); bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; int ret; ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_HW, data->hw, data->all, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_WANTED, data->wanted, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_ACTIVE, data->active, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) return ret; return ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_NOCHANGE, data->nochange, NULL, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); } const struct ethnl_request_ops ethnl_features_request_ops = { .request_cmd = ETHTOOL_MSG_FEATURES_GET, .reply_cmd = ETHTOOL_MSG_FEATURES_GET_REPLY, .hdr_attr = ETHTOOL_A_FEATURES_HEADER, .req_info_size = sizeof(struct features_req_info), .reply_data_size = sizeof(struct features_reply_data), .prepare_data = features_prepare_data, .reply_size = features_reply_size, .fill_reply = features_fill_reply, }; /* FEATURES_SET */ const struct nla_policy ethnl_features_set_policy[] = { [ETHTOOL_A_FEATURES_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), [ETHTOOL_A_FEATURES_WANTED] = { .type = NLA_NESTED }, }; static void ethnl_features_to_bitmap(unsigned long *dest, netdev_features_t val) { const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); unsigned int i; for (i = 0; i < words; i++) dest[i] = (unsigned long)(val >> (i * BITS_PER_LONG)); } static netdev_features_t ethnl_bitmap_to_features(unsigned long *src) { const unsigned int nft_bits = sizeof(netdev_features_t) * BITS_PER_BYTE; const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); netdev_features_t ret = 0; unsigned int i; for (i = 0; i < words; i++) ret |= (netdev_features_t)(src[i]) << (i * BITS_PER_LONG); ret &= ~(netdev_features_t)0 >> (nft_bits - NETDEV_FEATURE_COUNT); return ret; } static int features_send_reply(struct net_device *dev, struct genl_info *info, const unsigned long *wanted, const unsigned long *wanted_mask, const unsigned long *active, const unsigned long *active_mask, bool compact) { struct sk_buff *rskb; void *reply_payload; int reply_len = 0; int ret; reply_len = ethnl_reply_header_size(); ret = ethnl_bitset_size(wanted, wanted_mask, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) goto err; reply_len += ret; ret = ethnl_bitset_size(active, active_mask, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) goto err; reply_len += ret; ret = -ENOMEM; rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_FEATURES_SET_REPLY, ETHTOOL_A_FEATURES_HEADER, info, &reply_payload); if (!rskb) goto err; ret = ethnl_put_bitset(rskb, ETHTOOL_A_FEATURES_WANTED, wanted, wanted_mask, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) goto nla_put_failure; ret = ethnl_put_bitset(rskb, ETHTOOL_A_FEATURES_ACTIVE, active, active_mask, NETDEV_FEATURE_COUNT, netdev_features_strings, compact); if (ret < 0) goto nla_put_failure; genlmsg_end(rskb, reply_payload); ret = genlmsg_reply(rskb, info); return ret; nla_put_failure: nlmsg_free(rskb); WARN_ONCE(1, "calculated message payload length (%d) not sufficient\n", reply_len); err: GENL_SET_ERR_MSG(info, "failed to send reply message"); return ret; } int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) { DECLARE_BITMAP(wanted_diff_mask, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(active_diff_mask, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(old_active, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(old_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(new_active, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(new_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(req_wanted, NETDEV_FEATURE_COUNT); DECLARE_BITMAP(req_mask, NETDEV_FEATURE_COUNT); struct ethnl_req_info req_info = {}; struct nlattr **tb = info->attrs; struct net_device *dev; bool mod; int ret; if (!tb[ETHTOOL_A_FEATURES_WANTED]) return -EINVAL; ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_FEATURES_HEADER], genl_info_net(info), info->extack, true); if (ret < 0) return ret; dev = req_info.dev; rtnl_lock(); netdev_lock_ops(dev); ret = ethnl_ops_begin(dev); if (ret < 0) goto out_unlock; ethnl_features_to_bitmap(old_active, dev->features); ethnl_features_to_bitmap(old_wanted, dev->wanted_features); ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, tb[ETHTOOL_A_FEATURES_WANTED], netdev_features_strings, info->extack); if (ret < 0) goto out_ops; if (ethnl_bitmap_to_features(req_mask) & ~NETIF_F_ETHTOOL_BITS) { GENL_SET_ERR_MSG(info, "attempt to change non-ethtool features"); ret = -EINVAL; goto out_ops; } /* set req_wanted bits not in req_mask from old_wanted */ bitmap_and(req_wanted, req_wanted, req_mask, NETDEV_FEATURE_COUNT); bitmap_andnot(new_wanted, old_wanted, req_mask, NETDEV_FEATURE_COUNT); bitmap_or(req_wanted, new_wanted, req_wanted, NETDEV_FEATURE_COUNT); if (!bitmap_equal(req_wanted, old_wanted, NETDEV_FEATURE_COUNT)) { dev->wanted_features &= ~dev->hw_features; dev->wanted_features |= ethnl_bitmap_to_features(req_wanted) & dev->hw_features; __netdev_update_features(dev); } ethnl_features_to_bitmap(new_active, dev->features); mod = !bitmap_equal(old_active, new_active, NETDEV_FEATURE_COUNT); ret = 0; if (!(req_info.flags & ETHTOOL_FLAG_OMIT_REPLY)) { bool compact = req_info.flags & ETHTOOL_FLAG_COMPACT_BITSETS; bitmap_xor(wanted_diff_mask, req_wanted, new_active, NETDEV_FEATURE_COUNT); bitmap_xor(active_diff_mask, old_active, new_active, NETDEV_FEATURE_COUNT); bitmap_and(wanted_diff_mask, wanted_diff_mask, req_mask, NETDEV_FEATURE_COUNT); bitmap_and(req_wanted, req_wanted, wanted_diff_mask, NETDEV_FEATURE_COUNT); bitmap_and(new_active, new_active, active_diff_mask, NETDEV_FEATURE_COUNT); ret = features_send_reply(dev, info, req_wanted, wanted_diff_mask, new_active, active_diff_mask, compact); } if (mod) netdev_features_change(dev); out_ops: ethnl_ops_complete(dev); out_unlock: netdev_unlock_ops(dev); rtnl_unlock(); ethnl_parse_header_dev_put(&req_info); return ret; }
8 2 146 37 4 225 110 46 46 2 8 33 67 67 67 32 145 132 14 3 4 3 56 7 56 54 54 54 54 55 54 4 51 51 48 46 46 32 25 25 14 24 32 30 30 99 99 99 99 33 33 33 33 33 33 5 5 5 5 54 4 55 55 55 28 37 33 37 69 69 69 68 22 22 22 68 68 67 67 35 18 18 8 74 75 57 57 57 5 57 57 57 57 57 75 27 27 27 27 27 22 27 81 80 81 80 14 14 14 12 3 3 3 3 98 140 66 139 93 226 92 92 40 92 93 78 77 41 20 76 77 29 29 14 40 40 40 40 39 40 30 40 39 212 57 20 57 57 57 20 57 56 3 117 117 117 117 37 37 6 37 37 37 37 37 117 28 48 48 47 47 40 26 28 35 34 34 34 34 18 34 34 13 48 31 32 32 32 22 17 7 22 32 32 32 32 16 32 16 31 15 32 30 30 46 2 5 5 3 3 5 109 111 109 22 115 111 115 114 24 111 109 26 1 2 85 51 75 74 85 2 2 1 1 2 102 73 72 40 1 1 1 1 39 39 40 1 1 40 2 10 2 2 2 1 1 3 3 3 3 3 3 1 3 3 1 1 3 11 1 2 2 1 2 2 1 4 1 14 14 4 4 3 3 2 2 73 3 38 38 38 38 38 3 6 1 1 1 2 6 3 153 153 1 41 40 73 50 50 69 84 87 53 47 96 96 96 1 22 1 1 3 1 2 1 3 3 1 11 2 2 3 3 1 1 2 2 7 1 1 2 1 1 2 1 1 1 1 87 2 1 2 2 1 2 2 1 2 3 2 3 2 1 2 3 2 3 3 2 3 2 1 2 21 1 21 21 1 10 4 2 3 2 1 2 1 1 1 1 3 21 21 2 1 3 1 3 3 2 3 2 1 3 14 4 97 153 154 119 96 22 2 2 17 3 97 2 7 1 1 1 87 96 3 3 37 96 8 94 7 87 8 8 3 1 4 1 1 3 3 66 161 44 125 125 64 107 72 64 64 64 64 106 73 107 70 70 61 60 60 56 161 159 107 125 28 28 28 161 159 152 144 119 96 146 146 4 4 4 96 43 95 48 2 46 46 2 147 147 114 113 94 90 146 96 147 145 146 28 28 28 145 70 145 71 146 146 146 146 75 125 146 69 145 145 43 146 28 161 158 161 161 161 81 161 161 161 161 63 161 161 154 154 147 146 160 161 3 3 3 2 2 3 2 32 32 31 29 12 3 3 2 1 1 1 1 1 2 1 1 1 1 2 2 2 2 32 1 1 141 143 143 79 178 6 6 6 6 3 3 3 3 176 173 114 114 114 114 114 114 114 114 114 114 116 49 49 1 37 37 37 1 20 20 12 20 20 12 20 20 20 20 20 19 20 12 20 20 20 20 20 19 19 19 19 19 18 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 15 12 12 12 12 7 12 12 11 7 12 7 11 16 16 16 16 15 16 16 16 15 20 12 16 19 4 1 1 21 17 21 21 254 254 1 1 1 1 1 1 1 1 1 1 1 1 13 13 13 9 9 1 8 8 8 8 8 8 4 4 4 4 2 2 2 2 2 2 4 1 15 15 12 11 1 11 79 78 76 2 1 1 1 1 1 1 1 1 1 1 39 39 10 4 8 9 3 6 9 8 8 5 2 4 3 1 4 1 4 3 2 4 3 2 3 3 2 2 19 4 10 18 19 4 10 10 4 17 11 4 4 14 14 12 11 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695 4696 4697 4698 4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721 4722 4723 4724 4725 4726 4727 4728 4729 4730 4731 4732 4733 4734 4735 4736 4737 4738 4739 4740 4741 4742 4743 4744 4745 4746 4747 4748 4749 4750 4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790 4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814 4815 4816 4817 4818 4819 4820 4821 4822 4823 4824 4825 4826 4827 4828 4829 4830 4831 4832 4833 4834 4835 4836 4837 4838 4839 4840 4841 4842 4843 4844 4845 4846 4847 4848 4849 4850 4851 4852 4853 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 4864 4865 4866 4867 4868 4869 4870 4871 4872 4873 4874 4875 4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888 4889 4890 4891 4892 4893 4894 4895 4896 4897 4898 4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913 4914 4915 4916 4917 4918 4919 4920 4921 4922 4923 4924 4925 4926 4927 4928 4929 4930 4931 4932 4933 4934 4935 4936 4937 4938 4939 4940 4941 4942 4943 4944 4945 4946 4947 4948 4949 4950 4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963 4964 4965 4966 4967 4968 4969 4970 4971 4972 4973 4974 4975 4976 4977 4978 4979 4980 4981 4982 4983 4984 4985 4986 4987 4988 4989 4990 4991 4992 4993 4994 4995 4996 4997 4998 4999 5000 5001 5002 5003 5004 5005 5006 5007 5008 5009 5010 5011 5012 5013 5014 5015 5016 5017 5018 5019 5020 5021 5022 5023 5024 5025 5026 5027 5028 5029 5030 5031 5032 5033 5034 5035 5036 5037 5038 5039 // SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 1991, 1992 Linus Torvalds */ /* * Hopefully this will be a rather complete VT102 implementation. * * Beeping thanks to John T Kohl. * * Virtual Consoles, Screen Blanking, Screen Dumping, Color, Graphics * Chars, and VT100 enhancements by Peter MacDonald. * * Copy and paste function by Andrew Haylett, * some enhancements by Alessandro Rubini. * * Code to check for different video-cards mostly by Galen Hunt, * <g-hunt@ee.utah.edu> * * Rudimentary ISO 10646/Unicode/UTF-8 character set support by * Markus Kuhn, <mskuhn@immd4.informatik.uni-erlangen.de>. * * Dynamic allocation of consoles, aeb@cwi.nl, May 1994 * Resizing of consoles, aeb, 940926 * * Code for xterm like mouse click reporting by Peter Orbaek 20-Jul-94 * <poe@daimi.aau.dk> * * User-defined bell sound, new setterm control sequences and printk * redirection by Martin Mares <mj@k332.feld.cvut.cz> 19-Nov-95 * * APM screenblank bug fixed Takashi Manabe <manabe@roy.dsl.tutics.tut.jp> * * Merge with the abstract console driver by Geert Uytterhoeven * <geert@linux-m68k.org>, Jan 1997. * * Original m68k console driver modifications by * * - Arno Griffioen <arno@usn.nl> * - David Carter <carter@cs.bris.ac.uk> * * The abstract console driver provides a generic interface for a text * console. It supports VGA text mode, frame buffer based graphical consoles * and special graphics processors that are only accessible through some * registers (e.g. a TMS340x0 GSP). * * The interface to the hardware is specified using a special structure * (struct consw) which contains function pointers to console operations * (see <linux/console.h> for more information). * * Support for changeable cursor shape * by Pavel Machek <pavel@atrey.karlin.mff.cuni.cz>, August 1997 * * Ported to i386 and con_scrolldelta fixed * by Emmanuel Marty <core@ggi-project.org>, April 1998 * * Resurrected character buffers in videoram plus lots of other trickery * by Martin Mares <mj@atrey.karlin.mff.cuni.cz>, July 1998 * * Removed old-style timers, introduced console_timer, made timer * deletion SMP-safe. 17Jun00, Andrew Morton * * Removed console_lock, enabled interrupts across all console operations * 13 March 2001, Andrew Morton * * Fixed UTF-8 mode so alternate charset modes always work according * to control sequences interpreted in do_con_trol function * preserving backward VT100 semigraphics compatibility, * malformed UTF sequences represented as sequences of replacement glyphs, * original codes or '?' as a last resort if replacement glyph is undefined * by Adam Tla/lka <atlka@pg.gda.pl>, Aug 2006 */ #include <linux/module.h> #include <linux/types.h> #include <linux/sched/signal.h> #include <linux/tty.h> #include <linux/tty_flip.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> #include <linux/kd.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/major.h> #include <linux/mm.h> #include <linux/console.h> #include <linux/init.h> #include <linux/mutex.h> #include <linux/vt_kern.h> #include <linux/selection.h> #include <linux/tiocl.h> #include <linux/kbd_kern.h> #include <linux/consolemap.h> #include <linux/timer.h> #include <linux/interrupt.h> #include <linux/workqueue.h> #include <linux/pm.h> #include <linux/font.h> #include <linux/bitops.h> #include <linux/notifier.h> #include <linux/device.h> #include <linux/io.h> #include <linux/uaccess.h> #include <linux/kdb.h> #include <linux/ctype.h> #include <linux/gcd.h> #define MAX_NR_CON_DRIVER 16 #define CON_DRIVER_FLAG_MODULE 1 #define CON_DRIVER_FLAG_INIT 2 #define CON_DRIVER_FLAG_ATTR 4 #define CON_DRIVER_FLAG_ZOMBIE 8 struct con_driver { const struct consw *con; const char *desc; struct device *dev; int node; int first; int last; int flag; }; static struct con_driver registered_con_driver[MAX_NR_CON_DRIVER]; const struct consw *conswitchp; /* * Here is the default bell parameters: 750HZ, 1/8th of a second */ #define DEFAULT_BELL_PITCH 750 #define DEFAULT_BELL_DURATION (HZ/8) #define DEFAULT_CURSOR_BLINK_MS 200 struct vc vc_cons [MAX_NR_CONSOLES]; EXPORT_SYMBOL(vc_cons); static const struct consw *con_driver_map[MAX_NR_CONSOLES]; static int con_open(struct tty_struct *, struct file *); static void vc_init(struct vc_data *vc, int do_clear); static void gotoxy(struct vc_data *vc, int new_x, int new_y); static void restore_cur(struct vc_data *vc); static void save_cur(struct vc_data *vc); static void reset_terminal(struct vc_data *vc, int do_clear); static void con_flush_chars(struct tty_struct *tty); static int set_vesa_blanking(u8 __user *mode); static void set_cursor(struct vc_data *vc); static void hide_cursor(struct vc_data *vc); static void console_callback(struct work_struct *ignored); static void con_driver_unregister_callback(struct work_struct *ignored); static void blank_screen_t(struct timer_list *unused); static void set_palette(struct vc_data *vc); static void unblank_screen(void); #define vt_get_kmsg_redirect() vt_kmsg_redirect(-1) int default_utf8 = true; module_param(default_utf8, int, S_IRUGO | S_IWUSR); int global_cursor_default = -1; module_param(global_cursor_default, int, S_IRUGO | S_IWUSR); EXPORT_SYMBOL(global_cursor_default); static int cur_default = CUR_UNDERLINE; module_param(cur_default, int, S_IRUGO | S_IWUSR); /* * ignore_poke: don't unblank the screen when things are typed. This is * mainly for the privacy of braille terminal users. */ static int ignore_poke; int do_poke_blanked_console; int console_blanked; EXPORT_SYMBOL(console_blanked); static enum vesa_blank_mode vesa_blank_mode; static int vesa_off_interval; static int blankinterval; core_param(consoleblank, blankinterval, int, 0444); static DECLARE_WORK(console_work, console_callback); static DECLARE_WORK(con_driver_unregister_work, con_driver_unregister_callback); /* * fg_console is the current virtual console, * last_console is the last used one, * want_console is the console we want to switch to, * saved_* variants are for save/restore around kernel debugger enter/leave */ int fg_console; EXPORT_SYMBOL(fg_console); int last_console; int want_console = -1; static int saved_fg_console; static int saved_last_console; static int saved_want_console; static int saved_vc_mode; static int saved_console_blanked; /* * For each existing display, we have a pointer to console currently visible * on that display, allowing consoles other than fg_console to be refreshed * appropriately. Unless the low-level driver supplies its own display_fg * variable, we use this one for the "master display". */ static struct vc_data *master_display_fg; /* * Unfortunately, we need to delay tty echo when we're currently writing to the * console since the code is (and always was) not re-entrant, so we schedule * all flip requests to process context with schedule-task() and run it from * console_callback(). */ /* * For the same reason, we defer scrollback to the console callback. */ static int scrollback_delta; /* * Hook so that the power management routines can (un)blank * the console on our behalf. */ int (*console_blank_hook)(int); EXPORT_SYMBOL(console_blank_hook); static DEFINE_TIMER(console_timer, blank_screen_t); static int blank_state; static int blank_timer_expired; enum { blank_off = 0, blank_normal_wait, blank_vesa_wait, }; /* * /sys/class/tty/tty0/ * * the attribute 'active' contains the name of the current vc * console and it supports poll() to detect vc switches */ static struct device *tty0dev; /* * Notifier list for console events. */ static ATOMIC_NOTIFIER_HEAD(vt_notifier_list); int register_vt_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&vt_notifier_list, nb); } EXPORT_SYMBOL_GPL(register_vt_notifier); int unregister_vt_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(&vt_notifier_list, nb); } EXPORT_SYMBOL_GPL(unregister_vt_notifier); static void notify_write(struct vc_data *vc, unsigned int unicode) { struct vt_notifier_param param = { .vc = vc, .c = unicode }; atomic_notifier_call_chain(&vt_notifier_list, VT_WRITE, &param); } static void notify_update(struct vc_data *vc) { struct vt_notifier_param param = { .vc = vc }; atomic_notifier_call_chain(&vt_notifier_list, VT_UPDATE, &param); } /* * Low-Level Functions */ static inline bool con_is_fg(const struct vc_data *vc) { return vc->vc_num == fg_console; } static inline bool con_should_update(const struct vc_data *vc) { return con_is_visible(vc) && !console_blanked; } static inline u16 *screenpos(const struct vc_data *vc, unsigned int offset, bool viewed) { unsigned long origin = viewed ? vc->vc_visible_origin : vc->vc_origin; return (u16 *)(origin + offset); } static void con_putc(struct vc_data *vc, u16 ca, unsigned int y, unsigned int x) { if (vc->vc_sw->con_putc) vc->vc_sw->con_putc(vc, ca, y, x); else vc->vc_sw->con_putcs(vc, &ca, 1, y, x); } /* Called from the keyboard irq path.. */ static inline void scrolldelta(int lines) { /* FIXME */ /* scrolldelta needs some kind of consistency lock, but the BKL was and still is not protecting versus the scheduled back end */ scrollback_delta += lines; schedule_console_callback(); } void schedule_console_callback(void) { schedule_work(&console_work); } /* * Code to manage unicode-based screen buffers */ /* * Our screen buffer is preceded by an array of line pointers so that * scrolling only implies some pointer shuffling. */ static u32 **vc_uniscr_alloc(unsigned int cols, unsigned int rows) { u32 **uni_lines; void *p; unsigned int memsize, i, col_size = cols * sizeof(**uni_lines); /* allocate everything in one go */ memsize = col_size * rows; memsize += rows * sizeof(*uni_lines); uni_lines = vzalloc(memsize); if (!uni_lines) return NULL; /* initial line pointers */ p = uni_lines + rows; for (i = 0; i < rows; i++) { uni_lines[i] = p; p += col_size; } return uni_lines; } static void vc_uniscr_free(u32 **uni_lines) { vfree(uni_lines); } static void vc_uniscr_set(struct vc_data *vc, u32 **new_uni_lines) { vc_uniscr_free(vc->vc_uni_lines); vc->vc_uni_lines = new_uni_lines; } static void vc_uniscr_putc(struct vc_data *vc, u32 uc) { if (vc->vc_uni_lines) vc->vc_uni_lines[vc->state.y][vc->state.x] = uc; } static void vc_uniscr_insert(struct vc_data *vc, unsigned int nr) { if (vc->vc_uni_lines) { u32 *ln = vc->vc_uni_lines[vc->state.y]; unsigned int x = vc->state.x, cols = vc->vc_cols; memmove(&ln[x + nr], &ln[x], (cols - x - nr) * sizeof(*ln)); memset32(&ln[x], ' ', nr); } } static void vc_uniscr_delete(struct vc_data *vc, unsigned int nr) { if (vc->vc_uni_lines) { u32 *ln = vc->vc_uni_lines[vc->state.y]; unsigned int x = vc->state.x, cols = vc->vc_cols; memmove(&ln[x], &ln[x + nr], (cols - x - nr) * sizeof(*ln)); memset32(&ln[cols - nr], ' ', nr); } } static void vc_uniscr_clear_line(struct vc_data *vc, unsigned int x, unsigned int nr) { if (vc->vc_uni_lines) memset32(&vc->vc_uni_lines[vc->state.y][x], ' ', nr); } static void vc_uniscr_clear_lines(struct vc_data *vc, unsigned int y, unsigned int nr) { if (vc->vc_uni_lines) while (nr--) memset32(vc->vc_uni_lines[y++], ' ', vc->vc_cols); } /* juggling array rotation algorithm (complexity O(N), size complexity O(1)) */ static void juggle_array(u32 **array, unsigned int size, unsigned int nr) { unsigned int gcd_idx; for (gcd_idx = 0; gcd_idx < gcd(nr, size); gcd_idx++) { u32 *gcd_idx_val = array[gcd_idx]; unsigned int dst_idx = gcd_idx; while (1) { unsigned int src_idx = (dst_idx + nr) % size; if (src_idx == gcd_idx) break; array[dst_idx] = array[src_idx]; dst_idx = src_idx; } array[dst_idx] = gcd_idx_val; } } static void vc_uniscr_scroll(struct vc_data *vc, unsigned int top, unsigned int bottom, enum con_scroll dir, unsigned int nr) { u32 **uni_lines = vc->vc_uni_lines; unsigned int size = bottom - top; if (!uni_lines) return; if (dir == SM_DOWN) { juggle_array(&uni_lines[top], size, size - nr); vc_uniscr_clear_lines(vc, top, nr); } else { juggle_array(&uni_lines[top], size, nr); vc_uniscr_clear_lines(vc, bottom - nr, nr); } } static u32 vc_uniscr_getc(struct vc_data *vc, int relative_pos) { int pos = vc->state.x + vc->vc_need_wrap + relative_pos; if (vc->vc_uni_lines && in_range(pos, 0, vc->vc_cols)) return vc->vc_uni_lines[vc->state.y][pos]; return 0; } static void vc_uniscr_copy_area(u32 **dst_lines, unsigned int dst_cols, unsigned int dst_rows, u32 **src_lines, unsigned int src_cols, unsigned int src_top_row, unsigned int src_bot_row) { unsigned int dst_row = 0; if (!dst_lines) return; while (src_top_row < src_bot_row) { u32 *src_line = src_lines[src_top_row]; u32 *dst_line = dst_lines[dst_row]; memcpy(dst_line, src_line, src_cols * sizeof(*src_line)); if (dst_cols - src_cols) memset32(dst_line + src_cols, ' ', dst_cols - src_cols); src_top_row++; dst_row++; } while (dst_row < dst_rows) { u32 *dst_line = dst_lines[dst_row]; memset32(dst_line, ' ', dst_cols); dst_row++; } } /* * Called from vcs_read() to make sure unicode screen retrieval is possible. * This will initialize the unicode screen buffer if not already done. * This returns 0 if OK, or a negative error code otherwise. * In particular, -ENODATA is returned if the console is not in UTF-8 mode. */ int vc_uniscr_check(struct vc_data *vc) { u32 **uni_lines; unsigned short *p; int x, y, mask; WARN_CONSOLE_UNLOCKED(); if (!vc->vc_utf) return -ENODATA; if (vc->vc_uni_lines) return 0; uni_lines = vc_uniscr_alloc(vc->vc_cols, vc->vc_rows); if (!uni_lines) return -ENOMEM; /* * Let's populate it initially with (imperfect) reverse translation. * This is the next best thing we can do short of having it enabled * from the start even when no users rely on this functionality. True * unicode content will be available after a complete screen refresh. */ p = (unsigned short *)vc->vc_origin; mask = vc->vc_hi_font_mask | 0xff; for (y = 0; y < vc->vc_rows; y++) { u32 *line = uni_lines[y]; for (x = 0; x < vc->vc_cols; x++) { u16 glyph = scr_readw(p++) & mask; line[x] = inverse_translate(vc, glyph, true); } } vc->vc_uni_lines = uni_lines; return 0; } /* * Called from vcs_read() to get the unicode data from the screen. * This must be preceded by a successful call to vc_uniscr_check() once * the console lock has been taken. */ void vc_uniscr_copy_line(const struct vc_data *vc, void *dest, bool viewed, unsigned int row, unsigned int col, unsigned int nr) { u32 **uni_lines = vc->vc_uni_lines; int offset = row * vc->vc_size_row + col * 2; unsigned long pos; if (WARN_ON_ONCE(!uni_lines)) return; pos = (unsigned long)screenpos(vc, offset, viewed); if (pos >= vc->vc_origin && pos < vc->vc_scr_end) { /* * Desired position falls in the main screen buffer. * However the actual row/col might be different if * scrollback is active. */ row = (pos - vc->vc_origin) / vc->vc_size_row; col = ((pos - vc->vc_origin) % vc->vc_size_row) / 2; memcpy(dest, &uni_lines[row][col], nr * sizeof(u32)); } else { /* * Scrollback is active. For now let's simply backtranslate * the screen glyphs until the unicode screen buffer does * synchronize with console display drivers for a scrollback * buffer of its own. */ u16 *p = (u16 *)pos; int mask = vc->vc_hi_font_mask | 0xff; u32 *uni_buf = dest; while (nr--) { u16 glyph = scr_readw(p++) & mask; *uni_buf++ = inverse_translate(vc, glyph, true); } } } static void con_scroll(struct vc_data *vc, unsigned int top, unsigned int bottom, enum con_scroll dir, unsigned int nr) { unsigned int rows = bottom - top; u16 *clear, *dst, *src; if (top + nr >= bottom) nr = rows - 1; if (bottom > vc->vc_rows || top >= bottom || nr < 1) return; vc_uniscr_scroll(vc, top, bottom, dir, nr); if (con_is_visible(vc) && vc->vc_sw->con_scroll(vc, top, bottom, dir, nr)) return; src = clear = (u16 *)(vc->vc_origin + vc->vc_size_row * top); dst = (u16 *)(vc->vc_origin + vc->vc_size_row * (top + nr)); if (dir == SM_UP) { clear = src + (rows - nr) * vc->vc_cols; swap(src, dst); } scr_memmovew(dst, src, (rows - nr) * vc->vc_size_row); scr_memsetw(clear, vc->vc_video_erase_char, vc->vc_size_row * nr); } static void do_update_region(struct vc_data *vc, unsigned long start, int count) { unsigned int xx, yy, offset; u16 *p = (u16 *)start; offset = (start - vc->vc_origin) / 2; xx = offset % vc->vc_cols; yy = offset / vc->vc_cols; for(;;) { u16 attrib = scr_readw(p) & 0xff00; int startx = xx; u16 *q = p; while (xx < vc->vc_cols && count) { if (attrib != (scr_readw(p) & 0xff00)) { if (p > q) vc->vc_sw->con_putcs(vc, q, p-q, yy, startx); startx = xx; q = p; attrib = scr_readw(p) & 0xff00; } p++; xx++; count--; } if (p > q) vc->vc_sw->con_putcs(vc, q, p-q, yy, startx); if (!count) break; xx = 0; yy++; } } void update_region(struct vc_data *vc, unsigned long start, int count) { WARN_CONSOLE_UNLOCKED(); if (con_should_update(vc)) { hide_cursor(vc); do_update_region(vc, start, count); set_cursor(vc); } } EXPORT_SYMBOL(update_region); /* Structure of attributes is hardware-dependent */ static u8 build_attr(struct vc_data *vc, u8 _color, enum vc_intensity _intensity, bool _blink, bool _underline, bool _reverse, bool _italic) { if (vc->vc_sw->con_build_attr) return vc->vc_sw->con_build_attr(vc, _color, _intensity, _blink, _underline, _reverse, _italic); /* * ++roman: I completely changed the attribute format for monochrome * mode (!can_do_color). The formerly used MDA (monochrome display * adapter) format didn't allow the combination of certain effects. * Now the attribute is just a bit vector: * Bit 0..1: intensity (0..2) * Bit 2 : underline * Bit 3 : reverse * Bit 7 : blink */ { u8 a = _color; if (!vc->vc_can_do_color) return _intensity | (_italic << 1) | (_underline << 2) | (_reverse << 3) | (_blink << 7); if (_italic) a = (a & 0xF0) | vc->vc_itcolor; else if (_underline) a = (a & 0xf0) | vc->vc_ulcolor; else if (_intensity == VCI_HALF_BRIGHT) a = (a & 0xf0) | vc->vc_halfcolor; if (_reverse) a = (a & 0x88) | (((a >> 4) | (a << 4)) & 0x77); if (_blink) a ^= 0x80; if (_intensity == VCI_BOLD) a ^= 0x08; if (vc->vc_hi_font_mask == 0x100) a <<= 1; return a; } } static void update_attr(struct vc_data *vc) { vc->vc_attr = build_attr(vc, vc->state.color, vc->state.intensity, vc->state.blink, vc->state.underline, vc->state.reverse ^ vc->vc_decscnm, vc->state.italic); vc->vc_video_erase_char = ' ' | (build_attr(vc, vc->state.color, VCI_NORMAL, vc->state.blink, false, vc->vc_decscnm, false) << 8); } /* Note: inverting the screen twice should revert to the original state */ void invert_screen(struct vc_data *vc, int offset, int count, bool viewed) { u16 *p; WARN_CONSOLE_UNLOCKED(); count /= 2; p = screenpos(vc, offset, viewed); if (vc->vc_sw->con_invert_region) { vc->vc_sw->con_invert_region(vc, p, count); } else { u16 *q = p; int cnt = count; u16 a; if (!vc->vc_can_do_color) { while (cnt--) { a = scr_readw(q); a ^= 0x0800; scr_writew(a, q); q++; } } else if (vc->vc_hi_font_mask == 0x100) { while (cnt--) { a = scr_readw(q); a = (a & 0x11ff) | ((a & 0xe000) >> 4) | ((a & 0x0e00) << 4); scr_writew(a, q); q++; } } else { while (cnt--) { a = scr_readw(q); a = (a & 0x88ff) | ((a & 0x7000) >> 4) | ((a & 0x0700) << 4); scr_writew(a, q); q++; } } } if (con_should_update(vc)) do_update_region(vc, (unsigned long) p, count); notify_update(vc); } /* used by selection: complement pointer position */ void complement_pos(struct vc_data *vc, int offset) { static int old_offset = -1; static unsigned short old; static unsigned short oldx, oldy; WARN_CONSOLE_UNLOCKED(); if (old_offset != -1 && old_offset >= 0 && old_offset < vc->vc_screenbuf_size) { scr_writew(old, screenpos(vc, old_offset, true)); if (con_should_update(vc)) con_putc(vc, old, oldy, oldx); notify_update(vc); } old_offset = offset; if (offset != -1 && offset >= 0 && offset < vc->vc_screenbuf_size) { unsigned short new; u16 *p = screenpos(vc, offset, true); old = scr_readw(p); new = old ^ vc->vc_complement_mask; scr_writew(new, p); if (con_should_update(vc)) { oldx = (offset >> 1) % vc->vc_cols; oldy = (offset >> 1) / vc->vc_cols; con_putc(vc, new, oldy, oldx); } notify_update(vc); } } static void insert_char(struct vc_data *vc, unsigned int nr) { unsigned short *p = (unsigned short *) vc->vc_pos; vc_uniscr_insert(vc, nr); scr_memmovew(p + nr, p, (vc->vc_cols - vc->state.x - nr) * 2); scr_memsetw(p, vc->vc_video_erase_char, nr * 2); vc->vc_need_wrap = 0; if (con_should_update(vc)) do_update_region(vc, (unsigned long) p, vc->vc_cols - vc->state.x); } static void delete_char(struct vc_data *vc, unsigned int nr) { unsigned short *p = (unsigned short *) vc->vc_pos; vc_uniscr_delete(vc, nr); scr_memmovew(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2); scr_memsetw(p + vc->vc_cols - vc->state.x - nr, vc->vc_video_erase_char, nr * 2); vc->vc_need_wrap = 0; if (con_should_update(vc)) do_update_region(vc, (unsigned long) p, vc->vc_cols - vc->state.x); } static int softcursor_original = -1; static void add_softcursor(struct vc_data *vc) { int i = scr_readw((u16 *) vc->vc_pos); u32 type = vc->vc_cursor_type; if (!(type & CUR_SW)) return; if (softcursor_original != -1) return; softcursor_original = i; i |= CUR_SET(type); i ^= CUR_CHANGE(type); if ((type & CUR_ALWAYS_BG) && (softcursor_original & CUR_BG) == (i & CUR_BG)) i ^= CUR_BG; if ((type & CUR_INVERT_FG_BG) && (i & CUR_FG) == ((i & CUR_BG) >> 4)) i ^= CUR_FG; scr_writew(i, (u16 *)vc->vc_pos); if (con_should_update(vc)) con_putc(vc, i, vc->state.y, vc->state.x); } static void hide_softcursor(struct vc_data *vc) { if (softcursor_original != -1) { scr_writew(softcursor_original, (u16 *)vc->vc_pos); if (con_should_update(vc)) con_putc(vc, softcursor_original, vc->state.y, vc->state.x); softcursor_original = -1; } } static void hide_cursor(struct vc_data *vc) { if (vc_is_sel(vc)) clear_selection(); vc->vc_sw->con_cursor(vc, false); hide_softcursor(vc); } static void set_cursor(struct vc_data *vc) { if (!con_is_fg(vc) || console_blanked || vc->vc_mode == KD_GRAPHICS) return; if (vc->vc_deccm) { if (vc_is_sel(vc)) clear_selection(); add_softcursor(vc); if (CUR_SIZE(vc->vc_cursor_type) != CUR_NONE) vc->vc_sw->con_cursor(vc, true); } else hide_cursor(vc); } static void set_origin(struct vc_data *vc) { WARN_CONSOLE_UNLOCKED(); if (!con_is_visible(vc) || !vc->vc_sw->con_set_origin || !vc->vc_sw->con_set_origin(vc)) vc->vc_origin = (unsigned long)vc->vc_screenbuf; vc->vc_visible_origin = vc->vc_origin; vc->vc_scr_end = vc->vc_origin + vc->vc_screenbuf_size; vc->vc_pos = vc->vc_origin + vc->vc_size_row * vc->state.y + 2 * vc->state.x; } static void save_screen(struct vc_data *vc) { WARN_CONSOLE_UNLOCKED(); if (vc->vc_sw->con_save_screen) vc->vc_sw->con_save_screen(vc); } static void flush_scrollback(struct vc_data *vc) { WARN_CONSOLE_UNLOCKED(); set_origin(vc); if (!con_is_visible(vc)) return; /* * The legacy way for flushing the scrollback buffer is to use a side * effect of the con_switch method. We do it only on the foreground * console as background consoles have no scrollback buffers in that * case and we obviously don't want to switch to them. */ hide_cursor(vc); vc->vc_sw->con_switch(vc); set_cursor(vc); } /* * Redrawing of screen */ void clear_buffer_attributes(struct vc_data *vc) { unsigned short *p = (unsigned short *)vc->vc_origin; int count = vc->vc_screenbuf_size / 2; int mask = vc->vc_hi_font_mask | 0xff; for (; count > 0; count--, p++) { scr_writew((scr_readw(p)&mask) | (vc->vc_video_erase_char & ~mask), p); } } void redraw_screen(struct vc_data *vc, int is_switch) { int redraw = 0; WARN_CONSOLE_UNLOCKED(); if (!vc) { /* strange ... */ /* printk("redraw_screen: tty %d not allocated ??\n", new_console+1); */ return; } if (is_switch) { struct vc_data *old_vc = vc_cons[fg_console].d; if (old_vc == vc) return; if (!con_is_visible(vc)) redraw = 1; *vc->vc_display_fg = vc; fg_console = vc->vc_num; hide_cursor(old_vc); if (!con_is_visible(old_vc)) { save_screen(old_vc); set_origin(old_vc); } if (tty0dev) sysfs_notify(&tty0dev->kobj, NULL, "active"); } else { hide_cursor(vc); redraw = 1; } if (redraw) { bool update; int old_was_color = vc->vc_can_do_color; set_origin(vc); update = vc->vc_sw->con_switch(vc); set_palette(vc); /* * If console changed from mono<->color, the best we can do * is to clear the buffer attributes. As it currently stands, * rebuilding new attributes from the old buffer is not doable * without overly complex code. */ if (old_was_color != vc->vc_can_do_color) { update_attr(vc); clear_buffer_attributes(vc); } if (update && vc->vc_mode != KD_GRAPHICS) do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2); } set_cursor(vc); if (is_switch) { vt_set_leds_compute_shiftstate(); notify_update(vc); } } EXPORT_SYMBOL(redraw_screen); /* * Allocation, freeing and resizing of VTs. */ int vc_cons_allocated(unsigned int i) { return (i < MAX_NR_CONSOLES && vc_cons[i].d); } static void visual_init(struct vc_data *vc, int num, bool init) { /* ++Geert: vc->vc_sw->con_init determines console size */ if (vc->vc_sw) module_put(vc->vc_sw->owner); vc->vc_sw = conswitchp; if (con_driver_map[num]) vc->vc_sw = con_driver_map[num]; __module_get(vc->vc_sw->owner); vc->vc_num = num; vc->vc_display_fg = &master_display_fg; if (vc->uni_pagedict_loc) con_free_unimap(vc); vc->uni_pagedict_loc = &vc->uni_pagedict; vc->uni_pagedict = NULL; vc->vc_hi_font_mask = 0; vc->vc_complement_mask = 0; vc->vc_can_do_color = 0; vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS; vc->vc_sw->con_init(vc, init); if (!vc->vc_complement_mask) vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800; vc->vc_s_complement_mask = vc->vc_complement_mask; vc->vc_size_row = vc->vc_cols << 1; vc->vc_screenbuf_size = vc->vc_rows * vc->vc_size_row; } static void visual_deinit(struct vc_data *vc) { vc->vc_sw->con_deinit(vc); module_put(vc->vc_sw->owner); } static void vc_port_destruct(struct tty_port *port) { struct vc_data *vc = container_of(port, struct vc_data, port); kfree(vc); } static const struct tty_port_operations vc_port_ops = { .destruct = vc_port_destruct, }; /* * Change # of rows and columns (0 means unchanged/the size of fg_console) * [this is to be used together with some user program * like resize that changes the hardware videomode] */ #define VC_MAXCOL (32767) #define VC_MAXROW (32767) int vc_allocate(unsigned int currcons) /* return 0 on success */ { struct vt_notifier_param param; struct vc_data *vc; int err; WARN_CONSOLE_UNLOCKED(); if (currcons >= MAX_NR_CONSOLES) return -ENXIO; if (vc_cons[currcons].d) return 0; /* due to the granularity of kmalloc, we waste some memory here */ /* the alloc is done in two steps, to optimize the common situation of a 25x80 console (structsize=216, screenbuf_size=4000) */ /* although the numbers above are not valid since long ago, the point is still up-to-date and the comment still has its value even if only as a historical artifact. --mj, July 1998 */ param.vc = vc = kzalloc(sizeof(struct vc_data), GFP_KERNEL); if (!vc) return -ENOMEM; vc_cons[currcons].d = vc; tty_port_init(&vc->port); vc->port.ops = &vc_port_ops; INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); visual_init(vc, currcons, true); if (!*vc->uni_pagedict_loc) con_set_default_unimap(vc); err = -EINVAL; if (vc->vc_cols > VC_MAXCOL || vc->vc_rows > VC_MAXROW || vc->vc_screenbuf_size > KMALLOC_MAX_SIZE || !vc->vc_screenbuf_size) goto err_free; err = -ENOMEM; vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_KERNEL); if (!vc->vc_screenbuf) goto err_free; /* If no drivers have overridden us and the user didn't pass a boot option, default to displaying the cursor */ if (global_cursor_default == -1) global_cursor_default = 1; vc_init(vc, 1); vcs_make_sysfs(currcons); atomic_notifier_call_chain(&vt_notifier_list, VT_ALLOCATE, &param); return 0; err_free: visual_deinit(vc); kfree(vc); vc_cons[currcons].d = NULL; return err; } static inline int resize_screen(struct vc_data *vc, int width, int height, bool from_user) { /* Resizes the resolution of the display adapater */ int err = 0; if (vc->vc_sw->con_resize) err = vc->vc_sw->con_resize(vc, width, height, from_user); return err; } /** * vc_do_resize - resizing method for the tty * @tty: tty being resized * @vc: virtual console private data * @cols: columns * @lines: lines * @from_user: invoked by a user? * * Resize a virtual console, clipping according to the actual constraints. If * the caller passes a tty structure then update the termios winsize * information and perform any necessary signal handling. * * Locking: Caller must hold the console semaphore. Takes the termios rwsem and * ctrl.lock of the tty IFF a tty is passed. */ static int vc_do_resize(struct tty_struct *tty, struct vc_data *vc, unsigned int cols, unsigned int lines, bool from_user) { unsigned long old_origin, new_origin, new_scr_end, rlth, rrem, err = 0; unsigned long end; unsigned int old_rows, old_row_size, first_copied_row; unsigned int new_cols, new_rows, new_row_size, new_screen_size; unsigned short *oldscreen, *newscreen; u32 **new_uniscr = NULL; WARN_CONSOLE_UNLOCKED(); if (cols > VC_MAXCOL || lines > VC_MAXROW) return -EINVAL; new_cols = (cols ? cols : vc->vc_cols); new_rows = (lines ? lines : vc->vc_rows); new_row_size = new_cols << 1; new_screen_size = new_row_size * new_rows; if (new_cols == vc->vc_cols && new_rows == vc->vc_rows) { /* * This function is being called here to cover the case * where the userspace calls the FBIOPUT_VSCREENINFO twice, * passing the same fb_var_screeninfo containing the fields * yres/xres equal to a number non-multiple of vc_font.height * and yres_virtual/xres_virtual equal to number lesser than the * vc_font.height and yres/xres. * In the second call, the struct fb_var_screeninfo isn't * being modified by the underlying driver because of the * if above, and this causes the fbcon_display->vrows to become * negative and it eventually leads to out-of-bound * access by the imageblit function. * To give the correct values to the struct and to not have * to deal with possible errors from the code below, we call * the resize_screen here as well. */ return resize_screen(vc, new_cols, new_rows, from_user); } if (new_screen_size > KMALLOC_MAX_SIZE || !new_screen_size) return -EINVAL; newscreen = kzalloc(new_screen_size, GFP_USER); if (!newscreen) return -ENOMEM; if (vc->vc_uni_lines) { new_uniscr = vc_uniscr_alloc(new_cols, new_rows); if (!new_uniscr) { kfree(newscreen); return -ENOMEM; } } if (vc_is_sel(vc)) clear_selection(); old_rows = vc->vc_rows; old_row_size = vc->vc_size_row; err = resize_screen(vc, new_cols, new_rows, from_user); if (err) { kfree(newscreen); vc_uniscr_free(new_uniscr); return err; } vc->vc_rows = new_rows; vc->vc_cols = new_cols; vc->vc_size_row = new_row_size; vc->vc_screenbuf_size = new_screen_size; rlth = min(old_row_size, new_row_size); rrem = new_row_size - rlth; old_origin = vc->vc_origin; new_origin = (long) newscreen; new_scr_end = new_origin + new_screen_size; if (vc->state.y > new_rows) { if (old_rows - vc->state.y < new_rows) { /* * Cursor near the bottom, copy contents from the * bottom of buffer */ first_copied_row = (old_rows - new_rows); } else { /* * Cursor is in no man's land, copy 1/2 screenful * from the top and bottom of cursor position */ first_copied_row = (vc->state.y - new_rows/2); } old_origin += first_copied_row * old_row_size; } else first_copied_row = 0; end = old_origin + old_row_size * min(old_rows, new_rows); vc_uniscr_copy_area(new_uniscr, new_cols, new_rows, vc->vc_uni_lines, rlth/2, first_copied_row, min(old_rows, new_rows)); vc_uniscr_set(vc, new_uniscr); update_attr(vc); while (old_origin < end) { scr_memcpyw((unsigned short *) new_origin, (unsigned short *) old_origin, rlth); if (rrem) scr_memsetw((void *)(new_origin + rlth), vc->vc_video_erase_char, rrem); old_origin += old_row_size; new_origin += new_row_size; } if (new_scr_end > new_origin) scr_memsetw((void *)new_origin, vc->vc_video_erase_char, new_scr_end - new_origin); oldscreen = vc->vc_screenbuf; vc->vc_screenbuf = newscreen; vc->vc_screenbuf_size = new_screen_size; set_origin(vc); kfree(oldscreen); /* do part of a reset_terminal() */ vc->vc_top = 0; vc->vc_bottom = vc->vc_rows; gotoxy(vc, vc->state.x, vc->state.y); save_cur(vc); if (tty) { /* Rewrite the requested winsize data with the actual resulting sizes */ struct winsize ws; memset(&ws, 0, sizeof(ws)); ws.ws_row = vc->vc_rows; ws.ws_col = vc->vc_cols; ws.ws_ypixel = vc->vc_scan_lines; tty_do_resize(tty, &ws); } if (con_is_visible(vc)) update_screen(vc); vt_event_post(VT_EVENT_RESIZE, vc->vc_num, vc->vc_num); notify_update(vc); return err; } /** * __vc_resize - resize a VT * @vc: virtual console * @cols: columns * @rows: rows * @from_user: invoked by a user? * * Resize a virtual console as seen from the console end of things. We use the * common vc_do_resize() method to update the structures. * * Locking: The caller must hold the console sem to protect console internals * and @vc->port.tty. */ int __vc_resize(struct vc_data *vc, unsigned int cols, unsigned int rows, bool from_user) { return vc_do_resize(vc->port.tty, vc, cols, rows, from_user); } EXPORT_SYMBOL(__vc_resize); /** * vt_resize - resize a VT * @tty: tty to resize * @ws: winsize attributes * * Resize a virtual terminal. This is called by the tty layer as we register * our own handler for resizing. The mutual helper does all the actual work. * * Locking: Takes the console sem and the called methods then take the tty * termios_rwsem and the tty ctrl.lock in that order. */ static int vt_resize(struct tty_struct *tty, struct winsize *ws) { struct vc_data *vc = tty->driver_data; guard(console_lock)(); return vc_do_resize(tty, vc, ws->ws_col, ws->ws_row, false); } struct vc_data *vc_deallocate(unsigned int currcons) { struct vc_data *vc = NULL; WARN_CONSOLE_UNLOCKED(); if (vc_cons_allocated(currcons)) { struct vt_notifier_param param; param.vc = vc = vc_cons[currcons].d; atomic_notifier_call_chain(&vt_notifier_list, VT_DEALLOCATE, &param); vcs_remove_sysfs(currcons); visual_deinit(vc); con_free_unimap(vc); put_pid(vc->vt_pid); vc_uniscr_set(vc, NULL); kfree(vc->vc_screenbuf); vc_cons[currcons].d = NULL; if (vc->vc_saved_screen != NULL) { kfree(vc->vc_saved_screen); vc->vc_saved_screen = NULL; } } return vc; } /* * VT102 emulator */ enum { EPecma = 0, EPdec, EPeq, EPgt, EPlt}; #define set_kbd(vc, x) vt_set_kbd_mode_bit((vc)->vc_num, (x)) #define clr_kbd(vc, x) vt_clr_kbd_mode_bit((vc)->vc_num, (x)) #define is_kbd(vc, x) vt_get_kbd_mode_bit((vc)->vc_num, (x)) #define decarm VC_REPEAT #define decckm VC_CKMODE #define kbdapplic VC_APPLIC #define lnm VC_CRLF const unsigned char color_table[] = { 0, 4, 2, 6, 1, 5, 3, 7, 8,12,10,14, 9,13,11,15 }; EXPORT_SYMBOL(color_table); /* the default colour table, for VGA+ colour systems */ unsigned char default_red[] = { 0x00, 0xaa, 0x00, 0xaa, 0x00, 0xaa, 0x00, 0xaa, 0x55, 0xff, 0x55, 0xff, 0x55, 0xff, 0x55, 0xff }; module_param_array(default_red, byte, NULL, S_IRUGO | S_IWUSR); EXPORT_SYMBOL(default_red); unsigned char default_grn[] = { 0x00, 0x00, 0xaa, 0x55, 0x00, 0x00, 0xaa, 0xaa, 0x55, 0x55, 0xff, 0xff, 0x55, 0x55, 0xff, 0xff }; module_param_array(default_grn, byte, NULL, S_IRUGO | S_IWUSR); EXPORT_SYMBOL(default_grn); unsigned char default_blu[] = { 0x00, 0x00, 0x00, 0x00, 0xaa, 0xaa, 0xaa, 0xaa, 0x55, 0x55, 0x55, 0x55, 0xff, 0xff, 0xff, 0xff }; module_param_array(default_blu, byte, NULL, S_IRUGO | S_IWUSR); EXPORT_SYMBOL(default_blu); /* * gotoxy() must verify all boundaries, because the arguments * might also be negative. If the given position is out of * bounds, the cursor is placed at the nearest margin. */ static void gotoxy(struct vc_data *vc, int new_x, int new_y) { int min_y, max_y; if (new_x < 0) vc->state.x = 0; else { if (new_x >= vc->vc_cols) vc->state.x = vc->vc_cols - 1; else vc->state.x = new_x; } if (vc->vc_decom) { min_y = vc->vc_top; max_y = vc->vc_bottom; } else { min_y = 0; max_y = vc->vc_rows; } if (new_y < min_y) vc->state.y = min_y; else if (new_y >= max_y) vc->state.y = max_y - 1; else vc->state.y = new_y; vc->vc_pos = vc->vc_origin + vc->state.y * vc->vc_size_row + (vc->state.x << 1); vc->vc_need_wrap = 0; } /* for absolute user moves, when decom is set */ static void gotoxay(struct vc_data *vc, int new_x, int new_y) { gotoxy(vc, new_x, vc->vc_decom ? (vc->vc_top + new_y) : new_y); } void scrollback(struct vc_data *vc) { scrolldelta(-(vc->vc_rows / 2)); } void scrollfront(struct vc_data *vc, int lines) { if (!lines) lines = vc->vc_rows / 2; scrolldelta(lines); } static void lf(struct vc_data *vc) { /* don't scroll if above bottom of scrolling region, or * if below scrolling region */ if (vc->state.y + 1 == vc->vc_bottom) con_scroll(vc, vc->vc_top, vc->vc_bottom, SM_UP, 1); else if (vc->state.y < vc->vc_rows - 1) { vc->state.y++; vc->vc_pos += vc->vc_size_row; } vc->vc_need_wrap = 0; notify_write(vc, '\n'); } static void ri(struct vc_data *vc) { /* don't scroll if below top of scrolling region, or * if above scrolling region */ if (vc->state.y == vc->vc_top) con_scroll(vc, vc->vc_top, vc->vc_bottom, SM_DOWN, 1); else if (vc->state.y > 0) { vc->state.y--; vc->vc_pos -= vc->vc_size_row; } vc->vc_need_wrap = 0; } static inline void cr(struct vc_data *vc) { vc->vc_pos -= vc->state.x << 1; vc->vc_need_wrap = vc->state.x = 0; notify_write(vc, '\r'); } static inline void bs(struct vc_data *vc) { if (vc->state.x) { vc->vc_pos -= 2; vc->state.x--; vc->vc_need_wrap = 0; notify_write(vc, '\b'); } } static inline void del(struct vc_data *vc) { /* ignored */ } enum CSI_J { CSI_J_CURSOR_TO_END = 0, CSI_J_START_TO_CURSOR = 1, CSI_J_VISIBLE = 2, CSI_J_FULL = 3, }; static void csi_J(struct vc_data *vc, enum CSI_J vpar) { unsigned short *start; unsigned int count; switch (vpar) { case CSI_J_CURSOR_TO_END: vc_uniscr_clear_line(vc, vc->state.x, vc->vc_cols - vc->state.x); vc_uniscr_clear_lines(vc, vc->state.y + 1, vc->vc_rows - vc->state.y - 1); count = (vc->vc_scr_end - vc->vc_pos) >> 1; start = (unsigned short *)vc->vc_pos; break; case CSI_J_START_TO_CURSOR: vc_uniscr_clear_line(vc, 0, vc->state.x + 1); vc_uniscr_clear_lines(vc, 0, vc->state.y); count = ((vc->vc_pos - vc->vc_origin) >> 1) + 1; start = (unsigned short *)vc->vc_origin; break; case CSI_J_FULL: flush_scrollback(vc); fallthrough; case CSI_J_VISIBLE: vc_uniscr_clear_lines(vc, 0, vc->vc_rows); count = vc->vc_cols * vc->vc_rows; start = (unsigned short *)vc->vc_origin; break; default: return; } scr_memsetw(start, vc->vc_video_erase_char, 2 * count); if (con_should_update(vc)) do_update_region(vc, (unsigned long) start, count); vc->vc_need_wrap = 0; } enum { CSI_K_CURSOR_TO_LINEEND = 0, CSI_K_LINESTART_TO_CURSOR = 1, CSI_K_LINE = 2, }; static void csi_K(struct vc_data *vc) { unsigned int count; unsigned short *start = (unsigned short *)vc->vc_pos; int offset; switch (vc->vc_par[0]) { case CSI_K_CURSOR_TO_LINEEND: offset = 0; count = vc->vc_cols - vc->state.x; break; case CSI_K_LINESTART_TO_CURSOR: offset = -vc->state.x; count = vc->state.x + 1; break; case CSI_K_LINE: offset = -vc->state.x; count = vc->vc_cols; break; default: return; } vc_uniscr_clear_line(vc, vc->state.x + offset, count); scr_memsetw(start + offset, vc->vc_video_erase_char, 2 * count); vc->vc_need_wrap = 0; if (con_should_update(vc)) do_update_region(vc, (unsigned long)(start + offset), count); } /* erase the following count positions */ static void csi_X(struct vc_data *vc) { /* not vt100? */ unsigned int count = clamp(vc->vc_par[0], 1, vc->vc_cols - vc->state.x); vc_uniscr_clear_line(vc, vc->state.x, count); scr_memsetw((unsigned short *)vc->vc_pos, vc->vc_video_erase_char, 2 * count); if (con_should_update(vc)) vc->vc_sw->con_clear(vc, vc->state.y, vc->state.x, count); vc->vc_need_wrap = 0; } static void default_attr(struct vc_data *vc) { vc->state.intensity = VCI_NORMAL; vc->state.italic = false; vc->state.underline = false; vc->state.reverse = false; vc->state.blink = false; vc->state.color = vc->vc_def_color; } struct rgb { u8 r; u8 g; u8 b; }; static void rgb_from_256(unsigned int i, struct rgb *c) { if (i < 8) { /* Standard colours. */ c->r = i&1 ? 0xaa : 0x00; c->g = i&2 ? 0xaa : 0x00; c->b = i&4 ? 0xaa : 0x00; } else if (i < 16) { c->r = i&1 ? 0xff : 0x55; c->g = i&2 ? 0xff : 0x55; c->b = i&4 ? 0xff : 0x55; } else if (i < 232) { /* 6x6x6 colour cube. */ i -= 16; c->b = i % 6 * 255 / 6; i /= 6; c->g = i % 6 * 255 / 6; i /= 6; c->r = i * 255 / 6; } else /* Grayscale ramp. */ c->r = c->g = c->b = i * 10 - 2312; } static void rgb_foreground(struct vc_data *vc, const struct rgb *c) { u8 hue = 0, max = max3(c->r, c->g, c->b); if (c->r > max / 2) hue |= 4; if (c->g > max / 2) hue |= 2; if (c->b > max / 2) hue |= 1; if (hue == 7 && max <= 0x55) { hue = 0; vc->state.intensity = VCI_BOLD; } else if (max > 0xaa) vc->state.intensity = VCI_BOLD; else vc->state.intensity = VCI_NORMAL; vc->state.color = (vc->state.color & 0xf0) | hue; } static void rgb_background(struct vc_data *vc, const struct rgb *c) { /* For backgrounds, err on the dark side. */ vc->state.color = (vc->state.color & 0x0f) | (c->r&0x80) >> 1 | (c->g&0x80) >> 2 | (c->b&0x80) >> 3; } /* * ITU T.416 Higher colour modes. They break the usual properties of SGR codes * and thus need to be detected and ignored by hand. That standard also * wants : rather than ; as separators but sequences containing : are currently * completely ignored by the parser. * * Subcommands 3 (CMY) and 4 (CMYK) are so insane there's no point in * supporting them. */ static int vc_t416_color(struct vc_data *vc, int i, void(*set_color)(struct vc_data *vc, const struct rgb *c)) { struct rgb c; i++; if (i > vc->vc_npar) return i; if (vc->vc_par[i] == 5 && i + 1 <= vc->vc_npar) { /* 256 colours */ i++; rgb_from_256(vc->vc_par[i], &c); } else if (vc->vc_par[i] == 2 && i + 3 <= vc->vc_npar) { /* 24 bit */ c.r = vc->vc_par[i + 1]; c.g = vc->vc_par[i + 2]; c.b = vc->vc_par[i + 3]; i += 3; } else return i; set_color(vc, &c); return i; } enum { CSI_m_DEFAULT = 0, CSI_m_BOLD = 1, CSI_m_HALF_BRIGHT = 2, CSI_m_ITALIC = 3, CSI_m_UNDERLINE = 4, CSI_m_BLINK = 5, CSI_m_REVERSE = 7, CSI_m_PRI_FONT = 10, CSI_m_ALT_FONT1 = 11, CSI_m_ALT_FONT2 = 12, CSI_m_DOUBLE_UNDERLINE = 21, CSI_m_NORMAL_INTENSITY = 22, CSI_m_NO_ITALIC = 23, CSI_m_NO_UNDERLINE = 24, CSI_m_NO_BLINK = 25, CSI_m_NO_REVERSE = 27, CSI_m_FG_COLOR_BEG = 30, CSI_m_FG_COLOR_END = 37, CSI_m_FG_COLOR = 38, CSI_m_DEFAULT_FG_COLOR = 39, CSI_m_BG_COLOR_BEG = 40, CSI_m_BG_COLOR_END = 47, CSI_m_BG_COLOR = 48, CSI_m_DEFAULT_BG_COLOR = 49, CSI_m_BRIGHT_FG_COLOR_BEG = 90, CSI_m_BRIGHT_FG_COLOR_END = 97, CSI_m_BRIGHT_FG_COLOR_OFF = CSI_m_BRIGHT_FG_COLOR_BEG - CSI_m_FG_COLOR_BEG, CSI_m_BRIGHT_BG_COLOR_BEG = 100, CSI_m_BRIGHT_BG_COLOR_END = 107, CSI_m_BRIGHT_BG_COLOR_OFF = CSI_m_BRIGHT_BG_COLOR_BEG - CSI_m_BG_COLOR_BEG, }; /* console_lock is held */ static void csi_m(struct vc_data *vc) { int i; for (i = 0; i <= vc->vc_npar; i++) switch (vc->vc_par[i]) { case CSI_m_DEFAULT: /* all attributes off */ default_attr(vc); break; case CSI_m_BOLD: vc->state.intensity = VCI_BOLD; break; case CSI_m_HALF_BRIGHT: vc->state.intensity = VCI_HALF_BRIGHT; break; case CSI_m_ITALIC: vc->state.italic = true; break; case CSI_m_DOUBLE_UNDERLINE: /* * No console drivers support double underline, so * convert it to a single underline. */ case CSI_m_UNDERLINE: vc->state.underline = true; break; case CSI_m_BLINK: vc->state.blink = true; break; case CSI_m_REVERSE: vc->state.reverse = true; break; case CSI_m_PRI_FONT: /* ANSI X3.64-1979 (SCO-ish?) * Select primary font, don't display control chars if * defined, don't set bit 8 on output. */ vc->vc_translate = set_translate(vc->state.Gx_charset[vc->state.charset], vc); vc->vc_disp_ctrl = 0; vc->vc_toggle_meta = 0; break; case CSI_m_ALT_FONT1: /* ANSI X3.64-1979 (SCO-ish?) * Select first alternate font, lets chars < 32 be * displayed as ROM chars. */ vc->vc_translate = set_translate(IBMPC_MAP, vc); vc->vc_disp_ctrl = 1; vc->vc_toggle_meta = 0; break; case CSI_m_ALT_FONT2: /* ANSI X3.64-1979 (SCO-ish?) * Select second alternate font, toggle high bit * before displaying as ROM char. */ vc->vc_translate = set_translate(IBMPC_MAP, vc); vc->vc_disp_ctrl = 1; vc->vc_toggle_meta = 1; break; case CSI_m_NORMAL_INTENSITY: vc->state.intensity = VCI_NORMAL; break; case CSI_m_NO_ITALIC: vc->state.italic = false; break; case CSI_m_NO_UNDERLINE: vc->state.underline = false; break; case CSI_m_NO_BLINK: vc->state.blink = false; break; case CSI_m_NO_REVERSE: vc->state.reverse = false; break; case CSI_m_FG_COLOR: i = vc_t416_color(vc, i, rgb_foreground); break; case CSI_m_BG_COLOR: i = vc_t416_color(vc, i, rgb_background); break; case CSI_m_DEFAULT_FG_COLOR: vc->state.color = (vc->vc_def_color & 0x0f) | (vc->state.color & 0xf0); break; case CSI_m_DEFAULT_BG_COLOR: vc->state.color = (vc->vc_def_color & 0xf0) | (vc->state.color & 0x0f); break; case CSI_m_BRIGHT_FG_COLOR_BEG ... CSI_m_BRIGHT_FG_COLOR_END: vc->state.intensity = VCI_BOLD; vc->vc_par[i] -= CSI_m_BRIGHT_FG_COLOR_OFF; fallthrough; case CSI_m_FG_COLOR_BEG ... CSI_m_FG_COLOR_END: vc->vc_par[i] -= CSI_m_FG_COLOR_BEG; vc->state.color = color_table[vc->vc_par[i]] | (vc->state.color & 0xf0); break; case CSI_m_BRIGHT_BG_COLOR_BEG ... CSI_m_BRIGHT_BG_COLOR_END: vc->vc_par[i] -= CSI_m_BRIGHT_BG_COLOR_OFF; fallthrough; case CSI_m_BG_COLOR_BEG ... CSI_m_BG_COLOR_END: vc->vc_par[i] -= CSI_m_BG_COLOR_BEG; vc->state.color = (color_table[vc->vc_par[i]] << 4) | (vc->state.color & 0x0f); break; } update_attr(vc); } static void respond_string(const char *p, size_t len, struct tty_port *port) { tty_insert_flip_string(port, p, len); tty_flip_buffer_push(port); } static void cursor_report(struct vc_data *vc, struct tty_struct *tty) { char buf[40]; int len; len = sprintf(buf, "\033[%d;%dR", vc->state.y + (vc->vc_decom ? vc->vc_top + 1 : 1), vc->state.x + 1); respond_string(buf, len, tty->port); } static inline void status_report(struct tty_struct *tty) { static const char teminal_ok[] = "\033[0n"; respond_string(teminal_ok, strlen(teminal_ok), tty->port); } static inline void respond_ID(struct tty_struct *tty) { /* terminal answer to an ESC-Z or csi0c query. */ static const char vt102_id[] = "\033[?6c"; respond_string(vt102_id, strlen(vt102_id), tty->port); } void mouse_report(struct tty_struct *tty, int butt, int mrx, int mry) { char buf[8]; int len; len = sprintf(buf, "\033[M%c%c%c", (char)(' ' + butt), (char)('!' + mrx), (char)('!' + mry)); respond_string(buf, len, tty->port); } /* invoked via ioctl(TIOCLINUX) and through set_selection_user */ int mouse_reporting(void) { return vc_cons[fg_console].d->vc_report_mouse; } /* invoked via ioctl(TIOCLINUX) */ static int get_bracketed_paste(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; return vc->vc_bracketed_paste; } /* console_lock is held */ static void enter_alt_screen(struct vc_data *vc) { unsigned int size = vc->vc_rows * vc->vc_cols * 2; if (vc->vc_saved_screen != NULL) return; /* Already inside an alt-screen */ vc->vc_saved_screen = kmemdup((u16 *)vc->vc_origin, size, GFP_KERNEL); if (vc->vc_saved_screen == NULL) return; vc->vc_saved_rows = vc->vc_rows; vc->vc_saved_cols = vc->vc_cols; save_cur(vc); /* clear entire screen */ csi_J(vc, CSI_J_FULL); } /* console_lock is held */ static void leave_alt_screen(struct vc_data *vc) { unsigned int rows = min(vc->vc_saved_rows, vc->vc_rows); unsigned int cols = min(vc->vc_saved_cols, vc->vc_cols); u16 *src, *dest; if (vc->vc_saved_screen == NULL) return; /* Not inside an alt-screen */ for (unsigned int r = 0; r < rows; r++) { src = vc->vc_saved_screen + r * vc->vc_saved_cols; dest = ((u16 *)vc->vc_origin) + r * vc->vc_cols; memcpy(dest, src, 2 * cols); } restore_cur(vc); /* Update the entire screen */ if (con_should_update(vc)) do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2); kfree(vc->vc_saved_screen); vc->vc_saved_screen = NULL; } enum { CSI_DEC_hl_CURSOR_KEYS = 1, /* CKM: cursor keys send ^[Ox/^[[x */ CSI_DEC_hl_132_COLUMNS = 3, /* COLM: 80/132 mode switch */ CSI_DEC_hl_REVERSE_VIDEO = 5, /* SCNM */ CSI_DEC_hl_ORIGIN_MODE = 6, /* OM: origin relative/absolute */ CSI_DEC_hl_AUTOWRAP = 7, /* AWM */ CSI_DEC_hl_AUTOREPEAT = 8, /* ARM */ CSI_DEC_hl_MOUSE_X10 = 9, CSI_DEC_hl_SHOW_CURSOR = 25, /* TCEM */ CSI_DEC_hl_MOUSE_VT200 = 1000, CSI_DEC_hl_ALT_SCREEN = 1049, CSI_DEC_hl_BRACKETED_PASTE = 2004, }; /* console_lock is held */ static void csi_DEC_hl(struct vc_data *vc, bool on_off) { unsigned int i; for (i = 0; i <= vc->vc_npar; i++) switch (vc->vc_par[i]) { case CSI_DEC_hl_CURSOR_KEYS: if (on_off) set_kbd(vc, decckm); else clr_kbd(vc, decckm); break; case CSI_DEC_hl_132_COLUMNS: /* unimplemented */ #if 0 vc_resize(deccolm ? 132 : 80, vc->vc_rows); /* this alone does not suffice; some user mode utility has to change the hardware regs */ #endif break; case CSI_DEC_hl_REVERSE_VIDEO: if (vc->vc_decscnm != on_off) { vc->vc_decscnm = on_off; invert_screen(vc, 0, vc->vc_screenbuf_size, false); update_attr(vc); } break; case CSI_DEC_hl_ORIGIN_MODE: vc->vc_decom = on_off; gotoxay(vc, 0, 0); break; case CSI_DEC_hl_AUTOWRAP: vc->vc_decawm = on_off; break; case CSI_DEC_hl_AUTOREPEAT: if (on_off) set_kbd(vc, decarm); else clr_kbd(vc, decarm); break; case CSI_DEC_hl_MOUSE_X10: vc->vc_report_mouse = on_off ? 1 : 0; break; case CSI_DEC_hl_SHOW_CURSOR: vc->vc_deccm = on_off; break; case CSI_DEC_hl_MOUSE_VT200: vc->vc_report_mouse = on_off ? 2 : 0; break; case CSI_DEC_hl_BRACKETED_PASTE: vc->vc_bracketed_paste = on_off; break; case CSI_DEC_hl_ALT_SCREEN: if (on_off) enter_alt_screen(vc); else leave_alt_screen(vc); break; } } enum { CSI_hl_DISPLAY_CTRL = 3, /* handle ansi control chars */ CSI_hl_INSERT = 4, /* IRM: insert/replace */ CSI_hl_AUTO_NL = 20, /* LNM: Enter == CrLf/Lf */ }; /* console_lock is held */ static void csi_hl(struct vc_data *vc, bool on_off) { unsigned int i; for (i = 0; i <= vc->vc_npar; i++) switch (vc->vc_par[i]) { /* ANSI modes set/reset */ case CSI_hl_DISPLAY_CTRL: vc->vc_disp_ctrl = on_off; break; case CSI_hl_INSERT: vc->vc_decim = on_off; break; case CSI_hl_AUTO_NL: if (on_off) set_kbd(vc, lnm); else clr_kbd(vc, lnm); break; } } enum CSI_right_square_bracket { CSI_RSB_COLOR_FOR_UNDERLINE = 1, CSI_RSB_COLOR_FOR_HALF_BRIGHT = 2, CSI_RSB_MAKE_CUR_COLOR_DEFAULT = 8, CSI_RSB_BLANKING_INTERVAL = 9, CSI_RSB_BELL_FREQUENCY = 10, CSI_RSB_BELL_DURATION = 11, CSI_RSB_BRING_CONSOLE_TO_FRONT = 12, CSI_RSB_UNBLANK = 13, CSI_RSB_VESA_OFF_INTERVAL = 14, CSI_RSB_BRING_PREV_CONSOLE_TO_FRONT = 15, CSI_RSB_CURSOR_BLINK_INTERVAL = 16, }; /* * csi_RSB - csi+] (Right Square Bracket) handler * * These are linux console private sequences. * * console_lock is held */ static void csi_RSB(struct vc_data *vc) { switch (vc->vc_par[0]) { case CSI_RSB_COLOR_FOR_UNDERLINE: if (vc->vc_can_do_color && vc->vc_par[1] < 16) { vc->vc_ulcolor = color_table[vc->vc_par[1]]; if (vc->state.underline) update_attr(vc); } break; case CSI_RSB_COLOR_FOR_HALF_BRIGHT: if (vc->vc_can_do_color && vc->vc_par[1] < 16) { vc->vc_halfcolor = color_table[vc->vc_par[1]]; if (vc->state.intensity == VCI_HALF_BRIGHT) update_attr(vc); } break; case CSI_RSB_MAKE_CUR_COLOR_DEFAULT: vc->vc_def_color = vc->vc_attr; if (vc->vc_hi_font_mask == 0x100) vc->vc_def_color >>= 1; default_attr(vc); update_attr(vc); break; case CSI_RSB_BLANKING_INTERVAL: blankinterval = min(vc->vc_par[1], 60U) * 60; poke_blanked_console(); break; case CSI_RSB_BELL_FREQUENCY: if (vc->vc_npar >= 1) vc->vc_bell_pitch = vc->vc_par[1]; else vc->vc_bell_pitch = DEFAULT_BELL_PITCH; break; case CSI_RSB_BELL_DURATION: if (vc->vc_npar >= 1) vc->vc_bell_duration = (vc->vc_par[1] < 2000) ? msecs_to_jiffies(vc->vc_par[1]) : 0; else vc->vc_bell_duration = DEFAULT_BELL_DURATION; break; case CSI_RSB_BRING_CONSOLE_TO_FRONT: if (vc->vc_par[1] >= 1 && vc_cons_allocated(vc->vc_par[1] - 1)) set_console(vc->vc_par[1] - 1); break; case CSI_RSB_UNBLANK: poke_blanked_console(); break; case CSI_RSB_VESA_OFF_INTERVAL: vesa_off_interval = min(vc->vc_par[1], 60U) * 60 * HZ; break; case CSI_RSB_BRING_PREV_CONSOLE_TO_FRONT: set_console(last_console); break; case CSI_RSB_CURSOR_BLINK_INTERVAL: if (vc->vc_npar >= 1 && vc->vc_par[1] >= 50 && vc->vc_par[1] <= USHRT_MAX) vc->vc_cur_blink_ms = vc->vc_par[1]; else vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS; break; } } /* console_lock is held */ static void csi_at(struct vc_data *vc, unsigned int nr) { nr = clamp(nr, 1, vc->vc_cols - vc->state.x); insert_char(vc, nr); } /* console_lock is held */ static void csi_L(struct vc_data *vc) { unsigned int nr = clamp(vc->vc_par[0], 1, vc->vc_rows - vc->state.y); con_scroll(vc, vc->state.y, vc->vc_bottom, SM_DOWN, nr); vc->vc_need_wrap = 0; } /* console_lock is held */ static void csi_P(struct vc_data *vc) { unsigned int nr = clamp(vc->vc_par[0], 1, vc->vc_cols - vc->state.x); delete_char(vc, nr); } /* console_lock is held */ static void csi_M(struct vc_data *vc) { unsigned int nr = clamp(vc->vc_par[0], 1, vc->vc_rows - vc->state.y); con_scroll(vc, vc->state.y, vc->vc_bottom, SM_UP, nr); vc->vc_need_wrap = 0; } /* console_lock is held (except via vc_init->reset_terminal */ static void save_cur(struct vc_data *vc) { memcpy(&vc->saved_state, &vc->state, sizeof(vc->state)); } /* console_lock is held */ static void restore_cur(struct vc_data *vc) { memcpy(&vc->state, &vc->saved_state, sizeof(vc->state)); gotoxy(vc, vc->state.x, vc->state.y); vc->vc_translate = set_translate(vc->state.Gx_charset[vc->state.charset], vc); update_attr(vc); vc->vc_need_wrap = 0; } /** * enum vc_ctl_state - control characters state of a vt * * @ESnormal: initial state, no control characters parsed * @ESesc: ESC parsed * @ESsquare: CSI parsed -- modifiers/parameters/ctrl chars expected * @ESgetpars: CSI parsed -- parameters/ctrl chars expected * @ESfunckey: CSI [ parsed * @EShash: ESC # parsed * @ESsetG0: ESC ( parsed * @ESsetG1: ESC ) parsed * @ESpercent: ESC % parsed * @EScsiignore: CSI [0x20-0x3f] parsed * @ESnonstd: OSC parsed * @ESpalette: OSC P parsed * @ESosc: OSC [0-9] parsed * @ESANSI_first: first state for ignoring ansi control sequences * @ESapc: ESC _ parsed * @ESpm: ESC ^ parsed * @ESdcs: ESC P parsed * @ESANSI_last: last state for ignoring ansi control sequences */ enum vc_ctl_state { ESnormal, ESesc, ESsquare, ESgetpars, ESfunckey, EShash, ESsetG0, ESsetG1, ESpercent, EScsiignore, ESnonstd, ESpalette, ESosc, ESANSI_first = ESosc, ESapc, ESpm, ESdcs, ESANSI_last = ESdcs, }; /* console_lock is held (except via vc_init()) */ static void reset_terminal(struct vc_data *vc, int do_clear) { unsigned int i; vc->vc_top = 0; vc->vc_bottom = vc->vc_rows; vc->vc_state = ESnormal; vc->vc_priv = EPecma; vc->vc_translate = set_translate(LAT1_MAP, vc); vc->state.Gx_charset[0] = LAT1_MAP; vc->state.Gx_charset[1] = GRAF_MAP; vc->state.charset = 0; vc->vc_need_wrap = 0; vc->vc_report_mouse = 0; vc->vc_bracketed_paste = 0; vc->vc_utf = default_utf8; vc->vc_utf_count = 0; vc->vc_disp_ctrl = 0; vc->vc_toggle_meta = 0; vc->vc_decscnm = 0; vc->vc_decom = 0; vc->vc_decawm = 1; vc->vc_deccm = global_cursor_default; vc->vc_decim = 0; if (vc->vc_saved_screen != NULL) { kfree(vc->vc_saved_screen); vc->vc_saved_screen = NULL; vc->vc_saved_rows = 0; vc->vc_saved_cols = 0; } vt_reset_keyboard(vc->vc_num); vc->vc_cursor_type = cur_default; vc->vc_complement_mask = vc->vc_s_complement_mask; default_attr(vc); update_attr(vc); bitmap_zero(vc->vc_tab_stop, VC_TABSTOPS_COUNT); for (i = 0; i < VC_TABSTOPS_COUNT; i += 8) set_bit(i, vc->vc_tab_stop); vc->vc_bell_pitch = DEFAULT_BELL_PITCH; vc->vc_bell_duration = DEFAULT_BELL_DURATION; vc->vc_cur_blink_ms = DEFAULT_CURSOR_BLINK_MS; gotoxy(vc, 0, 0); save_cur(vc); if (do_clear) csi_J(vc, CSI_J_VISIBLE); } static void vc_setGx(struct vc_data *vc, unsigned int which, u8 c) { unsigned char *charset = &vc->state.Gx_charset[which]; switch (c) { case '0': *charset = GRAF_MAP; break; case 'B': *charset = LAT1_MAP; break; case 'U': *charset = IBMPC_MAP; break; case 'K': *charset = USER_MAP; break; } if (vc->state.charset == which) vc->vc_translate = set_translate(*charset, vc); } static bool ansi_control_string(enum vc_ctl_state state) { return state >= ESANSI_first && state <= ESANSI_last; } enum { ASCII_NULL = 0, ASCII_BELL = 7, ASCII_BACKSPACE = 8, ASCII_IGNORE_FIRST = ASCII_BACKSPACE, ASCII_HTAB = 9, ASCII_LINEFEED = 10, ASCII_VTAB = 11, ASCII_FORMFEED = 12, ASCII_CAR_RET = 13, ASCII_IGNORE_LAST = ASCII_CAR_RET, ASCII_SHIFTOUT = 14, ASCII_SHIFTIN = 15, ASCII_CANCEL = 24, ASCII_SUBSTITUTE = 26, ASCII_ESCAPE = 27, ASCII_CSI_IGNORE_FIRST = ' ', /* 0x2x, 0x3a and 0x3c - 0x3f */ ASCII_CSI_IGNORE_LAST = '?', ASCII_DEL = 127, ASCII_EXT_CSI = 128 + ASCII_ESCAPE, }; /* * Handle ascii characters in control sequences and change states accordingly. * E.g. ESC sets the state of vc to ESesc. * * Returns: true if @c handled. */ static bool handle_ascii(struct tty_struct *tty, struct vc_data *vc, u8 c) { switch (c) { case ASCII_NULL: return true; case ASCII_BELL: if (ansi_control_string(vc->vc_state)) vc->vc_state = ESnormal; else if (vc->vc_bell_duration) kd_mksound(vc->vc_bell_pitch, vc->vc_bell_duration); return true; case ASCII_BACKSPACE: bs(vc); return true; case ASCII_HTAB: vc->vc_pos -= (vc->state.x << 1); vc->state.x = find_next_bit(vc->vc_tab_stop, min(vc->vc_cols - 1, VC_TABSTOPS_COUNT), vc->state.x + 1); if (vc->state.x >= VC_TABSTOPS_COUNT) vc->state.x = vc->vc_cols - 1; vc->vc_pos += (vc->state.x << 1); notify_write(vc, '\t'); return true; case ASCII_LINEFEED: case ASCII_VTAB: case ASCII_FORMFEED: lf(vc); if (!is_kbd(vc, lnm)) return true; fallthrough; case ASCII_CAR_RET: cr(vc); return true; case ASCII_SHIFTOUT: vc->state.charset = 1; vc->vc_translate = set_translate(vc->state.Gx_charset[1], vc); vc->vc_disp_ctrl = 1; return true; case ASCII_SHIFTIN: vc->state.charset = 0; vc->vc_translate = set_translate(vc->state.Gx_charset[0], vc); vc->vc_disp_ctrl = 0; return true; case ASCII_CANCEL: case ASCII_SUBSTITUTE: vc->vc_state = ESnormal; return true; case ASCII_ESCAPE: vc->vc_state = ESesc; return true; case ASCII_DEL: del(vc); return true; case ASCII_EXT_CSI: vc->vc_state = ESsquare; return true; } return false; } /* * Handle a character (@c) following an ESC (when @vc is in the ESesc state). * E.g. previous ESC with @c == '[' here yields the ESsquare state (that is: * CSI). */ static void handle_esc(struct tty_struct *tty, struct vc_data *vc, u8 c) { vc->vc_state = ESnormal; switch (c) { case '[': vc->vc_state = ESsquare; break; case ']': vc->vc_state = ESnonstd; break; case '_': vc->vc_state = ESapc; break; case '^': vc->vc_state = ESpm; break; case '%': vc->vc_state = ESpercent; break; case 'E': cr(vc); lf(vc); break; case 'M': ri(vc); break; case 'D': lf(vc); break; case 'H': if (vc->state.x < VC_TABSTOPS_COUNT) set_bit(vc->state.x, vc->vc_tab_stop); break; case 'P': vc->vc_state = ESdcs; break; case 'Z': respond_ID(tty); break; case '7': save_cur(vc); break; case '8': restore_cur(vc); break; case '(': vc->vc_state = ESsetG0; break; case ')': vc->vc_state = ESsetG1; break; case '#': vc->vc_state = EShash; break; case 'c': reset_terminal(vc, 1); break; case '>': /* Numeric keypad */ clr_kbd(vc, kbdapplic); break; case '=': /* Appl. keypad */ set_kbd(vc, kbdapplic); break; } } /* * Handle special DEC control sequences ("ESC [ ? parameters char"). Parameters * are in @vc->vc_par and the char is in @c here. */ static void csi_DEC(struct tty_struct *tty, struct vc_data *vc, u8 c) { switch (c) { case 'h': csi_DEC_hl(vc, true); break; case 'l': csi_DEC_hl(vc, false); break; case 'c': if (vc->vc_par[0]) vc->vc_cursor_type = CUR_MAKE(vc->vc_par[0], vc->vc_par[1], vc->vc_par[2]); else vc->vc_cursor_type = cur_default; break; case 'm': clear_selection(); if (vc->vc_par[0]) vc->vc_complement_mask = vc->vc_par[0] << 8 | vc->vc_par[1]; else vc->vc_complement_mask = vc->vc_s_complement_mask; break; case 'n': if (vc->vc_par[0] == 5) status_report(tty); else if (vc->vc_par[0] == 6) cursor_report(vc, tty); break; } } /* * Handle Control Sequence Introducer control characters. That is * "ESC [ parameters char". Parameters are in @vc->vc_par and the char is in * @c here. */ static void csi_ECMA(struct tty_struct *tty, struct vc_data *vc, u8 c) { switch (c) { case 'G': case '`': if (vc->vc_par[0]) vc->vc_par[0]--; gotoxy(vc, vc->vc_par[0], vc->state.y); break; case 'A': if (!vc->vc_par[0]) vc->vc_par[0]++; gotoxy(vc, vc->state.x, vc->state.y - vc->vc_par[0]); break; case 'B': case 'e': if (!vc->vc_par[0]) vc->vc_par[0]++; gotoxy(vc, vc->state.x, vc->state.y + vc->vc_par[0]); break; case 'C': case 'a': if (!vc->vc_par[0]) vc->vc_par[0]++; gotoxy(vc, vc->state.x + vc->vc_par[0], vc->state.y); break; case 'D': if (!vc->vc_par[0]) vc->vc_par[0]++; gotoxy(vc, vc->state.x - vc->vc_par[0], vc->state.y); break; case 'E': if (!vc->vc_par[0]) vc->vc_par[0]++; gotoxy(vc, 0, vc->state.y + vc->vc_par[0]); break; case 'F': if (!vc->vc_par[0]) vc->vc_par[0]++; gotoxy(vc, 0, vc->state.y - vc->vc_par[0]); break; case 'd': if (vc->vc_par[0]) vc->vc_par[0]--; gotoxay(vc, vc->state.x ,vc->vc_par[0]); break; case 'H': case 'f': if (vc->vc_par[0]) vc->vc_par[0]--; if (vc->vc_par[1]) vc->vc_par[1]--; gotoxay(vc, vc->vc_par[1], vc->vc_par[0]); break; case 'J': csi_J(vc, vc->vc_par[0]); break; case 'K': csi_K(vc); break; case 'L': csi_L(vc); break; case 'M': csi_M(vc); break; case 'P': csi_P(vc); break; case 'c': if (!vc->vc_par[0]) respond_ID(tty); break; case 'g': if (!vc->vc_par[0] && vc->state.x < VC_TABSTOPS_COUNT) set_bit(vc->state.x, vc->vc_tab_stop); else if (vc->vc_par[0] == 3) bitmap_zero(vc->vc_tab_stop, VC_TABSTOPS_COUNT); break; case 'h': csi_hl(vc, true); break; case 'l': csi_hl(vc, false); break; case 'm': csi_m(vc); break; case 'n': if (vc->vc_par[0] == 5) status_report(tty); else if (vc->vc_par[0] == 6) cursor_report(vc, tty); break; case 'q': /* DECLL - but only 3 leds */ /* map 0,1,2,3 to 0,1,2,4 */ if (vc->vc_par[0] < 4) vt_set_led_state(vc->vc_num, (vc->vc_par[0] < 3) ? vc->vc_par[0] : 4); break; case 'r': if (!vc->vc_par[0]) vc->vc_par[0]++; if (!vc->vc_par[1]) vc->vc_par[1] = vc->vc_rows; /* Minimum allowed region is 2 lines */ if (vc->vc_par[0] < vc->vc_par[1] && vc->vc_par[1] <= vc->vc_rows) { vc->vc_top = vc->vc_par[0] - 1; vc->vc_bottom = vc->vc_par[1]; gotoxay(vc, 0, 0); } break; case 's': save_cur(vc); break; case 'u': restore_cur(vc); break; case 'X': csi_X(vc); break; case '@': csi_at(vc, vc->vc_par[0]); break; case ']': csi_RSB(vc); break; } } static void vc_reset_params(struct vc_data *vc) { memset(vc->vc_par, 0, sizeof(vc->vc_par)); vc->vc_npar = 0; } /* console_lock is held */ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, u8 c) { /* * Control characters can be used in the _middle_ * of an escape sequence, aside from ANSI control strings. */ if (ansi_control_string(vc->vc_state) && c >= ASCII_IGNORE_FIRST && c <= ASCII_IGNORE_LAST) return; if (handle_ascii(tty, vc, c)) return; switch(vc->vc_state) { case ESesc: /* ESC */ handle_esc(tty, vc, c); return; case ESnonstd: /* ESC ] aka OSC */ switch (c) { case 'P': /* palette escape sequence */ vc_reset_params(vc); vc->vc_state = ESpalette; return; case 'R': /* reset palette */ reset_palette(vc); break; case '0' ... '9': vc->vc_state = ESosc; return; } vc->vc_state = ESnormal; return; case ESpalette: /* ESC ] P aka OSC P */ if (isxdigit(c)) { vc->vc_par[vc->vc_npar++] = hex_to_bin(c); if (vc->vc_npar == 7) { int i = vc->vc_par[0] * 3, j = 1; vc->vc_palette[i] = 16 * vc->vc_par[j++]; vc->vc_palette[i++] += vc->vc_par[j++]; vc->vc_palette[i] = 16 * vc->vc_par[j++]; vc->vc_palette[i++] += vc->vc_par[j++]; vc->vc_palette[i] = 16 * vc->vc_par[j++]; vc->vc_palette[i] += vc->vc_par[j]; set_palette(vc); vc->vc_state = ESnormal; } } else vc->vc_state = ESnormal; return; case ESsquare: /* ESC [ aka CSI, parameters or modifiers expected */ vc_reset_params(vc); vc->vc_state = ESgetpars; switch (c) { case '[': /* Function key */ vc->vc_state = ESfunckey; return; case '?': vc->vc_priv = EPdec; return; case '>': vc->vc_priv = EPgt; return; case '=': vc->vc_priv = EPeq; return; case '<': vc->vc_priv = EPlt; return; } vc->vc_priv = EPecma; fallthrough; case ESgetpars: /* ESC [ aka CSI, parameters expected */ switch (c) { case ';': if (vc->vc_npar < NPAR - 1) { vc->vc_npar++; return; } break; case '0' ... '9': vc->vc_par[vc->vc_npar] *= 10; vc->vc_par[vc->vc_npar] += c - '0'; return; } if (c >= ASCII_CSI_IGNORE_FIRST && c <= ASCII_CSI_IGNORE_LAST) { vc->vc_state = EScsiignore; return; } /* parameters done, handle the control char @c */ vc->vc_state = ESnormal; switch (vc->vc_priv) { case EPdec: csi_DEC(tty, vc, c); return; case EPecma: csi_ECMA(tty, vc, c); return; default: return; } case EScsiignore: if (c >= ASCII_CSI_IGNORE_FIRST && c <= ASCII_CSI_IGNORE_LAST) return; vc->vc_state = ESnormal; return; case ESpercent: /* ESC % */ vc->vc_state = ESnormal; switch (c) { case '@': /* defined in ISO 2022 */ vc->vc_utf = 0; return; case 'G': /* prelim official escape code */ case '8': /* retained for compatibility */ vc->vc_utf = 1; return; } return; case ESfunckey: /* ESC [ [ aka CSI [ */ vc->vc_state = ESnormal; return; case EShash: /* ESC # */ vc->vc_state = ESnormal; if (c == '8') { /* DEC screen alignment test. kludge :-) */ vc->vc_video_erase_char = (vc->vc_video_erase_char & 0xff00) | 'E'; csi_J(vc, CSI_J_VISIBLE); vc->vc_video_erase_char = (vc->vc_video_erase_char & 0xff00) | ' '; do_update_region(vc, vc->vc_origin, vc->vc_screenbuf_size / 2); } return; case ESsetG0: /* ESC ( */ vc_setGx(vc, 0, c); vc->vc_state = ESnormal; return; case ESsetG1: /* ESC ) */ vc_setGx(vc, 1, c); vc->vc_state = ESnormal; return; case ESapc: /* ESC _ */ return; case ESosc: /* ESC ] [0-9] aka OSC [0-9] */ return; case ESpm: /* ESC ^ */ return; case ESdcs: /* ESC P */ return; default: vc->vc_state = ESnormal; } } struct vc_draw_region { unsigned long from, to; int x; }; static void con_flush(struct vc_data *vc, struct vc_draw_region *draw) { if (draw->x < 0) return; vc->vc_sw->con_putcs(vc, (u16 *)draw->from, (u16 *)draw->to - (u16 *)draw->from, vc->state.y, draw->x); draw->x = -1; } static inline int vc_translate_ascii(const struct vc_data *vc, int c) { if (IS_ENABLED(CONFIG_CONSOLE_TRANSLATIONS)) { if (vc->vc_toggle_meta) c |= 0x80; return vc->vc_translate[c]; } return c; } /** * vc_sanitize_unicode - Replace invalid Unicode code points with ``U+FFFD`` * @c: the received code point */ static inline int vc_sanitize_unicode(const int c) { if (c >= 0xd800 && c <= 0xdfff) return 0xfffd; return c; } /** * vc_translate_unicode - Combine UTF-8 into Unicode in &vc_data.vc_utf_char * @vc: virtual console * @c: UTF-8 byte to translate * @rescan: set to true iff @c wasn't consumed here and needs to be re-processed * * * &vc_data.vc_utf_char is the being-constructed Unicode code point. * * &vc_data.vc_utf_count is the number of continuation bytes still expected to * arrive. * * &vc_data.vc_npar is the number of continuation bytes arrived so far. * * Return: * * %-1 - Input OK so far, @c consumed, further bytes expected. * * %0xFFFD - Possibility 1: input invalid, @c may have been consumed (see * desc. of @rescan). Possibility 2: input OK, @c consumed, * ``U+FFFD`` is the resulting code point. ``U+FFFD`` is valid, * ``REPLACEMENT CHARACTER``. * * otherwise - Input OK, @c consumed, resulting code point returned. */ static int vc_translate_unicode(struct vc_data *vc, int c, bool *rescan) { static const u32 utf8_length_changes[] = {0x7f, 0x7ff, 0xffff, 0x10ffff}; /* Continuation byte received */ if ((c & 0xc0) == 0x80) { /* Unexpected continuation byte? */ if (!vc->vc_utf_count) goto bad_sequence; vc->vc_utf_char = (vc->vc_utf_char << 6) | (c & 0x3f); vc->vc_npar++; if (--vc->vc_utf_count) goto need_more_bytes; /* Got a whole character */ c = vc->vc_utf_char; /* Reject overlong sequences */ if (c <= utf8_length_changes[vc->vc_npar - 1] || c > utf8_length_changes[vc->vc_npar]) goto bad_sequence; return vc_sanitize_unicode(c); } /* Single ASCII byte or first byte of a sequence received */ if (vc->vc_utf_count) { /* A continuation byte was expected */ *rescan = true; vc->vc_utf_count = 0; goto bad_sequence; } /* Nothing to do if an ASCII byte was received */ if (c <= 0x7f) return c; /* First byte of a multibyte sequence received */ vc->vc_npar = 0; if ((c & 0xe0) == 0xc0) { vc->vc_utf_count = 1; vc->vc_utf_char = (c & 0x1f); } else if ((c & 0xf0) == 0xe0) { vc->vc_utf_count = 2; vc->vc_utf_char = (c & 0x0f); } else if ((c & 0xf8) == 0xf0) { vc->vc_utf_count = 3; vc->vc_utf_char = (c & 0x07); } else { goto bad_sequence; } need_more_bytes: return -1; bad_sequence: return 0xfffd; } static int vc_translate(struct vc_data *vc, int *c, bool *rescan) { /* Do no translation at all in control states */ if (vc->vc_state != ESnormal) return *c; if (vc->vc_utf && !vc->vc_disp_ctrl) return *c = vc_translate_unicode(vc, *c, rescan); /* no utf or alternate charset mode */ return vc_translate_ascii(vc, *c); } static inline unsigned char vc_invert_attr(const struct vc_data *vc) { if (!vc->vc_can_do_color) return vc->vc_attr ^ 0x08; if (vc->vc_hi_font_mask == 0x100) return (vc->vc_attr & 0x11) | ((vc->vc_attr & 0xe0) >> 4) | ((vc->vc_attr & 0x0e) << 4); return (vc->vc_attr & 0x88) | ((vc->vc_attr & 0x70) >> 4) | ((vc->vc_attr & 0x07) << 4); } static bool vc_is_control(struct vc_data *vc, int tc, int c) { /* * A bitmap for codes <32. A bit of 1 indicates that the code * corresponding to that bit number invokes some special action (such * as cursor movement) and should not be displayed as a glyph unless * the disp_ctrl mode is explicitly enabled. */ static const u32 CTRL_ACTION = BIT(ASCII_NULL) | GENMASK(ASCII_SHIFTIN, ASCII_BELL) | BIT(ASCII_CANCEL) | BIT(ASCII_SUBSTITUTE) | BIT(ASCII_ESCAPE); /* Cannot be overridden by disp_ctrl */ static const u32 CTRL_ALWAYS = BIT(ASCII_NULL) | BIT(ASCII_BACKSPACE) | BIT(ASCII_LINEFEED) | BIT(ASCII_SHIFTIN) | BIT(ASCII_SHIFTOUT) | BIT(ASCII_CAR_RET) | BIT(ASCII_FORMFEED) | BIT(ASCII_ESCAPE); if (vc->vc_state != ESnormal) return true; if (!tc) return true; /* * If the original code was a control character we only allow a glyph * to be displayed if the code is not normally used (such as for cursor * movement) or if the disp_ctrl mode has been explicitly enabled. * Certain characters (as given by the CTRL_ALWAYS bitmap) are always * displayed as control characters, as the console would be pretty * useless without them; to display an arbitrary font position use the * direct-to-font zone in UTF-8 mode. */ if (c < BITS_PER_TYPE(CTRL_ALWAYS)) { if (vc->vc_disp_ctrl) return CTRL_ALWAYS & BIT(c); else return vc->vc_utf || (CTRL_ACTION & BIT(c)); } if (c == ASCII_DEL && !vc->vc_disp_ctrl) return true; if (c == ASCII_EXT_CSI) return true; return false; } static void vc_con_rewind(struct vc_data *vc) { if (vc->state.x && !vc->vc_need_wrap) { vc->vc_pos -= 2; vc->state.x--; } vc->vc_need_wrap = 0; } #define UCS_ZWS 0x200b /* Zero Width Space */ #define UCS_VS16 0xfe0f /* Variation Selector 16 */ #define UCS_REPLACEMENT 0xfffd /* Replacement Character */ static int vc_process_ucs(struct vc_data *vc, int *c, int *tc) { u32 prev_c, curr_c = *c; if (ucs_is_double_width(curr_c)) { /* * The Unicode screen memory is allocated only when * required. This is one such case as we need to remember * which displayed characters are double-width. */ vc_uniscr_check(vc); return 2; } if (!ucs_is_zero_width(curr_c)) return 1; /* From here curr_c is known to be zero-width. */ if (ucs_is_double_width(vc_uniscr_getc(vc, -2))) { /* * Let's merge this zero-width code point with the preceding * double-width code point by replacing the existing * zero-width space padding. To do so we rewind one column * and pretend this has a width of 1. * We give the legacy display the same initial space padding. */ vc_con_rewind(vc); *tc = ' '; return 1; } /* From here the preceding character, if any, must be single-width. */ prev_c = vc_uniscr_getc(vc, -1); if (curr_c == UCS_VS16 && prev_c != 0) { /* * VS16 (U+FE0F) is special. It typically turns the preceding * single-width character into a double-width one. Let it * have a width of 1 effectively making the combination with * the preceding character double-width. */ *tc = ' '; return 1; } /* try recomposition */ prev_c = ucs_recompose(prev_c, curr_c); if (prev_c != 0) { vc_con_rewind(vc); *tc = *c = prev_c; return 1; } /* Otherwise zero-width code points are ignored. */ return 0; } static int vc_get_glyph(struct vc_data *vc, int tc) { int glyph = conv_uni_to_pc(vc, tc); u16 charmask = vc->vc_hi_font_mask ? 0x1ff : 0xff; if (!(glyph & ~charmask)) return glyph; if (glyph == -1) return -1; /* nothing to display */ /* Glyph not found */ if ((!vc->vc_utf || vc->vc_disp_ctrl || tc < 128) && !(tc & ~charmask)) { /* * In legacy mode use the glyph we get by a 1:1 mapping. * This would make absolutely no sense with Unicode in mind, but do this for * ASCII characters since a font may lack Unicode mapping info and we don't * want to end up with having question marks only. */ return tc; } /* * The Unicode screen memory is allocated only when required. * This is one such case: we're about to "cheat" with the displayed * character meaning the simple screen buffer won't hold the original * information, whereas the Unicode screen buffer always does. */ vc_uniscr_check(vc); /* Try getting a simpler fallback character. */ tc = ucs_get_fallback(tc); if (tc) return vc_get_glyph(vc, tc); /* Display U+FFFD (Unicode Replacement Character). */ return conv_uni_to_pc(vc, UCS_REPLACEMENT); } static int vc_con_write_normal(struct vc_data *vc, int tc, int c, struct vc_draw_region *draw) { int next_c; unsigned char vc_attr = vc->vc_attr; u16 himask = vc->vc_hi_font_mask; u8 width = 1; bool inverse = false; if (vc->vc_utf && !vc->vc_disp_ctrl) { width = vc_process_ucs(vc, &c, &tc); if (!width) goto out; } /* Now try to find out how to display it */ tc = vc_get_glyph(vc, tc); if (tc == -1) return -1; /* nothing to display */ if (tc < 0) { inverse = true; tc = conv_uni_to_pc(vc, '?'); if (tc < 0) tc = '?'; vc_attr = vc_invert_attr(vc); con_flush(vc, draw); } next_c = c; while (1) { if (vc->vc_need_wrap || vc->vc_decim) con_flush(vc, draw); if (vc->vc_need_wrap) { cr(vc); lf(vc); } if (vc->vc_decim) insert_char(vc, 1); vc_uniscr_putc(vc, next_c); if (himask) tc = ((tc & 0x100) ? himask : 0) | (tc & 0xff); tc |= (vc_attr << 8) & ~himask; scr_writew(tc, (u16 *)vc->vc_pos); if (con_should_update(vc) && draw->x < 0) { draw->x = vc->state.x; draw->from = vc->vc_pos; } if (vc->state.x == vc->vc_cols - 1) { vc->vc_need_wrap = vc->vc_decawm; draw->to = vc->vc_pos + 2; } else { vc->state.x++; draw->to = (vc->vc_pos += 2); } if (!--width) break; /* A space is printed in the second column */ tc = conv_uni_to_pc(vc, ' '); if (tc < 0) tc = ' '; /* * Store a zero-width space in the Unicode screen given that * the previous code point is semantically double width. */ next_c = UCS_ZWS; } out: notify_write(vc, c); if (inverse) con_flush(vc, draw); return 0; } /* acquires console_lock */ static int do_con_write(struct tty_struct *tty, const u8 *buf, int count) { struct vc_draw_region draw = { .x = -1, }; int c, tc, n = 0; unsigned int currcons; struct vc_data *vc = tty->driver_data; struct vt_notifier_param param; bool rescan; if (in_interrupt()) return count; guard(console_lock)(); currcons = vc->vc_num; if (!vc_cons_allocated(currcons)) { /* could this happen? */ pr_warn_once("con_write: tty %d not allocated\n", currcons+1); return 0; } /* undraw cursor first */ if (con_is_fg(vc)) hide_cursor(vc); param.vc = vc; while (!tty->flow.stopped && count) { u8 orig = *buf; buf++; n++; count--; rescan_last_byte: c = orig; rescan = false; tc = vc_translate(vc, &c, &rescan); if (tc == -1) continue; param.c = tc; if (atomic_notifier_call_chain(&vt_notifier_list, VT_PREWRITE, &param) == NOTIFY_STOP) continue; if (vc_is_control(vc, tc, c)) { con_flush(vc, &draw); do_con_trol(tty, vc, orig); continue; } if (vc_con_write_normal(vc, tc, c, &draw) < 0) continue; if (rescan) goto rescan_last_byte; } con_flush(vc, &draw); console_conditional_schedule(); notify_update(vc); return n; } /* * This is the console switching callback. * * Doing console switching in a process context allows * us to do the switches asynchronously (needed when we want * to switch due to a keyboard interrupt). Synchronization * with other console code and prevention of re-entrancy is * ensured with console_lock. */ static void console_callback(struct work_struct *ignored) { guard(console_lock)(); if (want_console >= 0) { if (want_console != fg_console && vc_cons_allocated(want_console)) { hide_cursor(vc_cons[fg_console].d); change_console(vc_cons[want_console].d); /* we only changed when the console had already been allocated - a new console is not created in an interrupt routine */ } want_console = -1; } if (do_poke_blanked_console) { /* do not unblank for a LED change */ do_poke_blanked_console = 0; poke_blanked_console(); } if (scrollback_delta) { struct vc_data *vc = vc_cons[fg_console].d; clear_selection(); if (vc->vc_mode == KD_TEXT && vc->vc_sw->con_scrolldelta) vc->vc_sw->con_scrolldelta(vc, scrollback_delta); scrollback_delta = 0; } if (blank_timer_expired) { do_blank_screen(0); blank_timer_expired = 0; } notify_update(vc_cons[fg_console].d); } int set_console(int nr) { struct vc_data *vc = vc_cons[fg_console].d; if (!vc_cons_allocated(nr) || vt_dont_switch || (vc->vt_mode.mode == VT_AUTO && vc->vc_mode == KD_GRAPHICS)) { /* * Console switch will fail in console_callback() or * change_console() so there is no point scheduling * the callback * * Existing set_console() users don't check the return * value so this shouldn't break anything */ return -EINVAL; } want_console = nr; schedule_console_callback(); return 0; } struct tty_driver *console_driver; #ifdef CONFIG_VT_CONSOLE /** * vt_kmsg_redirect() - sets/gets the kernel message console * @new: the new virtual terminal number or -1 if the console should stay * unchanged * * By default, the kernel messages are always printed on the current virtual * console. However, the user may modify that default with the * %TIOCL_SETKMSGREDIRECT ioctl call. * * This function sets the kernel message console to be @new. It returns the old * virtual console number. The virtual terminal number %0 (both as parameter and * return value) means no redirection (i.e. always printed on the currently * active console). * * The parameter -1 means that only the current console is returned, but the * value is not modified. You may use the macro vt_get_kmsg_redirect() in that * case to make the code more understandable. * * When the kernel is compiled without %CONFIG_VT_CONSOLE, this function ignores * the parameter and always returns %0. */ int vt_kmsg_redirect(int new) { static int kmsg_con; if (new != -1) return xchg(&kmsg_con, new); else return kmsg_con; } /* * Console on virtual terminal * * The console must be locked when we get here. */ static void vt_console_print(struct console *co, const char *b, unsigned count) { struct vc_data *vc = vc_cons[fg_console].d; unsigned char c; static DEFINE_SPINLOCK(printing_lock); const ushort *start; ushort start_x, cnt; int kmsg_console; WARN_CONSOLE_UNLOCKED(); /* this protects against concurrent oops only */ if (!spin_trylock(&printing_lock)) return; kmsg_console = vt_get_kmsg_redirect(); if (kmsg_console && vc_cons_allocated(kmsg_console - 1)) vc = vc_cons[kmsg_console - 1].d; if (!vc_cons_allocated(fg_console)) { /* impossible */ /* printk("vt_console_print: tty %d not allocated ??\n", currcons+1); */ goto quit; } if (vc->vc_mode != KD_TEXT) goto quit; /* undraw cursor first */ if (con_is_fg(vc)) hide_cursor(vc); start = (ushort *)vc->vc_pos; start_x = vc->state.x; cnt = 0; while (count--) { c = *b++; if (c == ASCII_LINEFEED || c == ASCII_CAR_RET || c == ASCII_BACKSPACE || vc->vc_need_wrap) { if (cnt && con_is_visible(vc)) vc->vc_sw->con_putcs(vc, start, cnt, vc->state.y, start_x); cnt = 0; if (c == ASCII_BACKSPACE) { bs(vc); start = (ushort *)vc->vc_pos; start_x = vc->state.x; continue; } if (c != ASCII_CAR_RET) lf(vc); cr(vc); start = (ushort *)vc->vc_pos; start_x = vc->state.x; if (c == ASCII_LINEFEED || c == ASCII_CAR_RET) continue; } vc_uniscr_putc(vc, c); scr_writew((vc->vc_attr << 8) + c, (unsigned short *)vc->vc_pos); notify_write(vc, c); cnt++; if (vc->state.x == vc->vc_cols - 1) { vc->vc_need_wrap = 1; } else { vc->vc_pos += 2; vc->state.x++; } } if (cnt && con_is_visible(vc)) vc->vc_sw->con_putcs(vc, start, cnt, vc->state.y, start_x); set_cursor(vc); notify_update(vc); quit: spin_unlock(&printing_lock); } static struct tty_driver *vt_console_device(struct console *c, int *index) { *index = c->index ? c->index-1 : fg_console; return console_driver; } static int vt_console_setup(struct console *co, char *options) { return co->index >= MAX_NR_CONSOLES ? -EINVAL : 0; } static struct console vt_console_driver = { .name = "tty", .setup = vt_console_setup, .write = vt_console_print, .device = vt_console_device, .unblank = unblank_screen, .flags = CON_PRINTBUFFER, .index = -1, }; #endif /* * Handling of Linux-specific VC ioctls */ /* * Generally a bit racy with respect to console_lock();. * * There are some functions which don't need it. * * There are some functions which can sleep for arbitrary periods * (paste_selection) but we don't need the lock there anyway. * * set_selection_user has locking, and definitely needs it */ int tioclinux(struct tty_struct *tty, unsigned long arg) { char type, data; char __user *p = (char __user *)arg; void __user *param_aligned32 = (u32 __user *)arg + 1; void __user *param = (void __user *)arg + 1; int lines; int ret; if (current->signal->tty != tty && !capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(type, p)) return -EFAULT; ret = 0; switch (type) { case TIOCL_SETSEL: return set_selection_user(param, tty); case TIOCL_PASTESEL: if (!capable(CAP_SYS_ADMIN)) return -EPERM; return paste_selection(tty); case TIOCL_UNBLANKSCREEN: scoped_guard(console_lock) unblank_screen(); break; case TIOCL_SELLOADLUT: if (!capable(CAP_SYS_ADMIN)) return -EPERM; return sel_loadlut(param_aligned32); case TIOCL_GETSHIFTSTATE: /* * Make it possible to react to Shift+Mousebutton. Note that * 'shift_state' is an undocumented kernel-internal variable; * programs not closely related to the kernel should not use * this. */ data = vt_get_shift_state(); return put_user(data, p); case TIOCL_GETMOUSEREPORTING: scoped_guard(console_lock) /* May be overkill */ data = mouse_reporting(); return put_user(data, p); case TIOCL_SETVESABLANK: return set_vesa_blanking(param); case TIOCL_GETKMSGREDIRECT: data = vt_get_kmsg_redirect(); return put_user(data, p); case TIOCL_SETKMSGREDIRECT: if (!capable(CAP_SYS_ADMIN)) return -EPERM; if (get_user(data, p+1)) return -EFAULT; vt_kmsg_redirect(data); break; case TIOCL_GETFGCONSOLE: /* * No locking needed as this is a transiently correct return * anyway if the caller hasn't disabled switching. */ return fg_console; case TIOCL_SCROLLCONSOLE: if (get_user(lines, (s32 __user *)param_aligned32)) return -EFAULT; /* * Needs the console lock here. Note that lots of other calls * need fixing before the lock is actually useful! */ scoped_guard(console_lock) scrollfront(vc_cons[fg_console].d, lines); break; case TIOCL_BLANKSCREEN: /* until explicitly unblanked, not only poked */ scoped_guard(console_lock) { ignore_poke = 1; do_blank_screen(0); } break; case TIOCL_BLANKEDSCREEN: return console_blanked; case TIOCL_GETBRACKETEDPASTE: return get_bracketed_paste(tty); default: return -EINVAL; } return ret; } /* * /dev/ttyN handling */ static ssize_t con_write(struct tty_struct *tty, const u8 *buf, size_t count) { int retval; retval = do_con_write(tty, buf, count); con_flush_chars(tty); return retval; } static int con_put_char(struct tty_struct *tty, u8 ch) { return do_con_write(tty, &ch, 1); } static unsigned int con_write_room(struct tty_struct *tty) { if (tty->flow.stopped) return 0; return 32768; /* No limit, really; we're not buffering */ } /* * con_throttle and con_unthrottle are only used for * paste_selection(), which has to stuff in a large number of * characters... */ static void con_throttle(struct tty_struct *tty) { } static void con_unthrottle(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; wake_up_interruptible(&vc->paste_wait); } /* * Turn the Scroll-Lock LED on when the tty is stopped */ static void con_stop(struct tty_struct *tty) { int console_num; if (!tty) return; console_num = tty->index; if (!vc_cons_allocated(console_num)) return; vt_kbd_con_stop(console_num); } /* * Turn the Scroll-Lock LED off when the console is started */ static void con_start(struct tty_struct *tty) { int console_num; if (!tty) return; console_num = tty->index; if (!vc_cons_allocated(console_num)) return; vt_kbd_con_start(console_num); } static void con_flush_chars(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; if (in_interrupt()) /* from flush_to_ldisc */ return; guard(console_lock)(); set_cursor(vc); } /* * Allocate the console screen memory. */ static int con_install(struct tty_driver *driver, struct tty_struct *tty) { unsigned int currcons = tty->index; struct vc_data *vc; int ret; guard(console_lock)(); ret = vc_allocate(currcons); if (ret) return ret; vc = vc_cons[currcons].d; /* Still being freed */ if (vc->port.tty) return -ERESTARTSYS; ret = tty_port_install(&vc->port, driver, tty); if (ret) return ret; tty->driver_data = vc; vc->port.tty = tty; tty_port_get(&vc->port); if (!tty->winsize.ws_row && !tty->winsize.ws_col) { tty->winsize.ws_row = vc_cons[currcons].d->vc_rows; tty->winsize.ws_col = vc_cons[currcons].d->vc_cols; } if (vc->vc_utf) tty->termios.c_iflag |= IUTF8; else tty->termios.c_iflag &= ~IUTF8; return 0; } static int con_open(struct tty_struct *tty, struct file *filp) { /* everything done in install */ return 0; } static void con_close(struct tty_struct *tty, struct file *filp) { /* Nothing to do - we defer to shutdown */ } static void con_shutdown(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; BUG_ON(vc == NULL); guard(console_lock)(); vc->port.tty = NULL; } static void con_cleanup(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; tty_port_put(&vc->port); } /* * We can't deal with anything but the N_TTY ldisc, * because we can sleep in our write() routine. */ static int con_ldisc_ok(struct tty_struct *tty, int ldisc) { return ldisc == N_TTY ? 0 : -EINVAL; } static int default_color = 7; /* white */ static int default_italic_color = 2; // green (ASCII) static int default_underline_color = 3; // cyan (ASCII) module_param_named(color, default_color, int, S_IRUGO | S_IWUSR); module_param_named(italic, default_italic_color, int, S_IRUGO | S_IWUSR); module_param_named(underline, default_underline_color, int, S_IRUGO | S_IWUSR); static void vc_init(struct vc_data *vc, int do_clear) { int j, k ; set_origin(vc); vc->vc_pos = vc->vc_origin; reset_vc(vc); for (j=k=0; j<16; j++) { vc->vc_palette[k++] = default_red[j] ; vc->vc_palette[k++] = default_grn[j] ; vc->vc_palette[k++] = default_blu[j] ; } vc->vc_def_color = default_color; vc->vc_ulcolor = default_underline_color; vc->vc_itcolor = default_italic_color; vc->vc_halfcolor = 0x08; /* grey */ init_waitqueue_head(&vc->paste_wait); reset_terminal(vc, do_clear); } /* * This routine initializes console interrupts, and does nothing * else. If you want the screen to clear, call tty_write with * the appropriate escape-sequence. */ static int __init con_init(void) { const char *display_desc = NULL; struct vc_data *vc; unsigned int currcons = 0, i; console_lock(); if (!conswitchp) conswitchp = &dummy_con; display_desc = conswitchp->con_startup(); if (!display_desc) { fg_console = 0; console_unlock(); return 0; } for (i = 0; i < MAX_NR_CON_DRIVER; i++) { struct con_driver *con_driver = &registered_con_driver[i]; if (con_driver->con == NULL) { con_driver->con = conswitchp; con_driver->desc = display_desc; con_driver->flag = CON_DRIVER_FLAG_INIT; con_driver->first = 0; con_driver->last = MAX_NR_CONSOLES - 1; break; } } for (i = 0; i < MAX_NR_CONSOLES; i++) con_driver_map[i] = conswitchp; if (blankinterval) { blank_state = blank_normal_wait; mod_timer(&console_timer, jiffies + (blankinterval * HZ)); } for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) { vc_cons[currcons].d = vc = kzalloc(sizeof(struct vc_data), GFP_NOWAIT); INIT_WORK(&vc_cons[currcons].SAK_work, vc_SAK); tty_port_init(&vc->port); visual_init(vc, currcons, true); /* Assuming vc->vc_{cols,rows,screenbuf_size} are sane here. */ vc->vc_screenbuf = kzalloc(vc->vc_screenbuf_size, GFP_NOWAIT); vc_init(vc, currcons || !vc->vc_sw->con_save_screen); } currcons = fg_console = 0; master_display_fg = vc = vc_cons[currcons].d; set_origin(vc); save_screen(vc); gotoxy(vc, vc->state.x, vc->state.y); csi_J(vc, CSI_J_CURSOR_TO_END); update_screen(vc); pr_info("Console: %s %s %dx%d\n", vc->vc_can_do_color ? "colour" : "mono", display_desc, vc->vc_cols, vc->vc_rows); console_unlock(); #ifdef CONFIG_VT_CONSOLE register_console(&vt_console_driver); #endif return 0; } console_initcall(con_init); static const struct tty_operations con_ops = { .install = con_install, .open = con_open, .close = con_close, .write = con_write, .write_room = con_write_room, .put_char = con_put_char, .flush_chars = con_flush_chars, .ioctl = vt_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = vt_compat_ioctl, #endif .stop = con_stop, .start = con_start, .throttle = con_throttle, .unthrottle = con_unthrottle, .resize = vt_resize, .shutdown = con_shutdown, .cleanup = con_cleanup, .ldisc_ok = con_ldisc_ok, }; static struct cdev vc0_cdev; static ssize_t show_tty_active(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "tty%d\n", fg_console + 1); } static DEVICE_ATTR(active, S_IRUGO, show_tty_active, NULL); static struct attribute *vt_dev_attrs[] = { &dev_attr_active.attr, NULL }; ATTRIBUTE_GROUPS(vt_dev); int __init vty_init(const struct file_operations *console_fops) { cdev_init(&vc0_cdev, console_fops); if (cdev_add(&vc0_cdev, MKDEV(TTY_MAJOR, 0), 1) || register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0) panic("Couldn't register /dev/tty0 driver\n"); tty0dev = device_create_with_groups(&tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, vt_dev_groups, "tty0"); if (IS_ERR(tty0dev)) tty0dev = NULL; vcs_init(); console_driver = tty_alloc_driver(MAX_NR_CONSOLES, TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS); if (IS_ERR(console_driver)) panic("Couldn't allocate console driver\n"); console_driver->name = "tty"; console_driver->name_base = 1; console_driver->major = TTY_MAJOR; console_driver->minor_start = 1; console_driver->type = TTY_DRIVER_TYPE_CONSOLE; console_driver->init_termios = tty_std_termios; if (default_utf8) console_driver->init_termios.c_iflag |= IUTF8; tty_set_operations(console_driver, &con_ops); if (tty_register_driver(console_driver)) panic("Couldn't register console driver\n"); kbd_init(); console_map_init(); #ifdef CONFIG_MDA_CONSOLE mda_console_init(); #endif return 0; } static const struct class vtconsole_class = { .name = "vtconsole", }; static int do_bind_con_driver(const struct consw *csw, int first, int last, int deflt) { struct module *owner = csw->owner; const char *desc = NULL; struct con_driver *con_driver; int i, j = -1, k = -1, retval = -ENODEV; if (!try_module_get(owner)) return -ENODEV; WARN_CONSOLE_UNLOCKED(); /* check if driver is registered */ for (i = 0; i < MAX_NR_CON_DRIVER; i++) { con_driver = &registered_con_driver[i]; if (con_driver->con == csw) { desc = con_driver->desc; retval = 0; break; } } if (retval) goto err; if (!(con_driver->flag & CON_DRIVER_FLAG_INIT)) { csw->con_startup(); con_driver->flag |= CON_DRIVER_FLAG_INIT; } if (deflt) { if (conswitchp) module_put(conswitchp->owner); __module_get(owner); conswitchp = csw; } first = max(first, con_driver->first); last = min(last, con_driver->last); for (i = first; i <= last; i++) { int old_was_color; struct vc_data *vc = vc_cons[i].d; if (con_driver_map[i]) module_put(con_driver_map[i]->owner); __module_get(owner); con_driver_map[i] = csw; if (!vc || !vc->vc_sw) continue; j = i; if (con_is_visible(vc)) { k = i; save_screen(vc); } old_was_color = vc->vc_can_do_color; vc->vc_sw->con_deinit(vc); vc->vc_origin = (unsigned long)vc->vc_screenbuf; visual_init(vc, i, false); set_origin(vc); update_attr(vc); /* If the console changed between mono <-> color, then * the attributes in the screenbuf will be wrong. The * following resets all attributes to something sane. */ if (old_was_color != vc->vc_can_do_color) clear_buffer_attributes(vc); } pr_info("Console: switching "); if (!deflt) pr_cont("consoles %d-%d ", first + 1, last + 1); if (j >= 0) { struct vc_data *vc = vc_cons[j].d; pr_cont("to %s %s %dx%d\n", vc->vc_can_do_color ? "colour" : "mono", desc, vc->vc_cols, vc->vc_rows); if (k >= 0) { vc = vc_cons[k].d; update_screen(vc); } } else { pr_cont("to %s\n", desc); } retval = 0; err: module_put(owner); return retval; }; #ifdef CONFIG_VT_HW_CONSOLE_BINDING int do_unbind_con_driver(const struct consw *csw, int first, int last, int deflt) { struct module *owner = csw->owner; const struct consw *defcsw = NULL; struct con_driver *con_driver = NULL, *con_back = NULL; int i, retval = -ENODEV; if (!try_module_get(owner)) return -ENODEV; WARN_CONSOLE_UNLOCKED(); /* check if driver is registered and if it is unbindable */ for (i = 0; i < MAX_NR_CON_DRIVER; i++) { con_driver = &registered_con_driver[i]; if (con_driver->con == csw && con_driver->flag & CON_DRIVER_FLAG_MODULE) { retval = 0; break; } } if (retval) goto err; retval = -ENODEV; /* check if backup driver exists */ for (i = 0; i < MAX_NR_CON_DRIVER; i++) { con_back = &registered_con_driver[i]; if (con_back->con && con_back->con != csw) { defcsw = con_back->con; retval = 0; break; } } if (retval) goto err; if (!con_is_bound(csw)) goto err; first = max(first, con_driver->first); last = min(last, con_driver->last); for (i = first; i <= last; i++) { if (con_driver_map[i] == csw) { module_put(csw->owner); con_driver_map[i] = NULL; } } if (!con_is_bound(defcsw)) { const struct consw *defconsw = conswitchp; defcsw->con_startup(); con_back->flag |= CON_DRIVER_FLAG_INIT; /* * vgacon may change the default driver to point * to dummycon, we restore it here... */ conswitchp = defconsw; } if (!con_is_bound(csw)) con_driver->flag &= ~CON_DRIVER_FLAG_INIT; /* ignore return value, binding should not fail */ do_bind_con_driver(defcsw, first, last, deflt); err: module_put(owner); return retval; } EXPORT_SYMBOL_GPL(do_unbind_con_driver); static int vt_bind(struct con_driver *con) { const struct consw *defcsw = NULL, *csw = NULL; int i, more = 1, first = -1, last = -1, deflt = 0; if (!con->con || !(con->flag & CON_DRIVER_FLAG_MODULE)) goto err; csw = con->con; for (i = 0; i < MAX_NR_CON_DRIVER; i++) { struct con_driver *con = &registered_con_driver[i]; if (con->con && !(con->flag & CON_DRIVER_FLAG_MODULE)) { defcsw = con->con; break; } } if (!defcsw) goto err; while (more) { more = 0; for (i = con->first; i <= con->last; i++) { if (con_driver_map[i] == defcsw) { if (first == -1) first = i; last = i; more = 1; } else if (first != -1) break; } if (first == 0 && last == MAX_NR_CONSOLES -1) deflt = 1; if (first != -1) do_bind_con_driver(csw, first, last, deflt); first = -1; last = -1; deflt = 0; } err: return 0; } static int vt_unbind(struct con_driver *con) { const struct consw *csw = NULL; int i, more = 1, first = -1, last = -1, deflt = 0; int ret; if (!con->con || !(con->flag & CON_DRIVER_FLAG_MODULE)) goto err; csw = con->con; while (more) { more = 0; for (i = con->first; i <= con->last; i++) { if (con_driver_map[i] == csw) { if (first == -1) first = i; last = i; more = 1; } else if (first != -1) break; } if (first == 0 && last == MAX_NR_CONSOLES -1) deflt = 1; if (first != -1) { ret = do_unbind_con_driver(csw, first, last, deflt); if (ret != 0) return ret; } first = -1; last = -1; deflt = 0; } err: return 0; } #else static inline int vt_bind(struct con_driver *con) { return 0; } static inline int vt_unbind(struct con_driver *con) { return 0; } #endif /* CONFIG_VT_HW_CONSOLE_BINDING */ static ssize_t store_bind(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct con_driver *con = dev_get_drvdata(dev); int bind = simple_strtoul(buf, NULL, 0); guard(console_lock)(); if (bind) vt_bind(con); else vt_unbind(con); return count; } static ssize_t show_bind(struct device *dev, struct device_attribute *attr, char *buf) { struct con_driver *con = dev_get_drvdata(dev); int bind; scoped_guard(console_lock) bind = con_is_bound(con->con); return sysfs_emit(buf, "%i\n", bind); } static ssize_t show_name(struct device *dev, struct device_attribute *attr, char *buf) { struct con_driver *con = dev_get_drvdata(dev); return sysfs_emit(buf, "%s %s\n", (con->flag & CON_DRIVER_FLAG_MODULE) ? "(M)" : "(S)", con->desc); } static DEVICE_ATTR(bind, S_IRUGO|S_IWUSR, show_bind, store_bind); static DEVICE_ATTR(name, S_IRUGO, show_name, NULL); static struct attribute *con_dev_attrs[] = { &dev_attr_bind.attr, &dev_attr_name.attr, NULL }; ATTRIBUTE_GROUPS(con_dev); static int vtconsole_init_device(struct con_driver *con) { con->flag |= CON_DRIVER_FLAG_ATTR; return 0; } static void vtconsole_deinit_device(struct con_driver *con) { con->flag &= ~CON_DRIVER_FLAG_ATTR; } /** * con_is_bound - checks if driver is bound to the console * @csw: console driver * * RETURNS: zero if unbound, nonzero if bound * * Drivers can call this and if zero, they should release * all resources allocated on &consw.con_startup() */ int con_is_bound(const struct consw *csw) { int i, bound = 0; WARN_CONSOLE_UNLOCKED(); for (i = 0; i < MAX_NR_CONSOLES; i++) { if (con_driver_map[i] == csw) { bound = 1; break; } } return bound; } EXPORT_SYMBOL(con_is_bound); /** * con_is_visible - checks whether the current console is visible * @vc: virtual console * * RETURNS: zero if not visible, nonzero if visible */ bool con_is_visible(const struct vc_data *vc) { WARN_CONSOLE_UNLOCKED(); return *vc->vc_display_fg == vc; } EXPORT_SYMBOL(con_is_visible); /** * con_debug_enter - prepare the console for the kernel debugger * @vc: virtual console * * Called when the console is taken over by the kernel debugger, this * function needs to save the current console state, then put the console * into a state suitable for the kernel debugger. */ void con_debug_enter(struct vc_data *vc) { saved_fg_console = fg_console; saved_last_console = last_console; saved_want_console = want_console; saved_vc_mode = vc->vc_mode; saved_console_blanked = console_blanked; vc->vc_mode = KD_TEXT; console_blanked = 0; if (vc->vc_sw->con_debug_enter) vc->vc_sw->con_debug_enter(vc); #ifdef CONFIG_KGDB_KDB /* Set the initial LINES variable if it is not already set */ if (vc->vc_rows < 999) { int linecount; char lns[4]; const char *setargs[3] = { "set", "LINES", lns, }; if (kdbgetintenv(setargs[0], &linecount)) { snprintf(lns, 4, "%i", vc->vc_rows); kdb_set(2, setargs); } } if (vc->vc_cols < 999) { int colcount; char cols[4]; const char *setargs[3] = { "set", "COLUMNS", cols, }; if (kdbgetintenv(setargs[0], &colcount)) { snprintf(cols, 4, "%i", vc->vc_cols); kdb_set(2, setargs); } } #endif /* CONFIG_KGDB_KDB */ } EXPORT_SYMBOL_GPL(con_debug_enter); /** * con_debug_leave - restore console state * * Restore the console state to what it was before the kernel debugger * was invoked. */ void con_debug_leave(void) { struct vc_data *vc; fg_console = saved_fg_console; last_console = saved_last_console; want_console = saved_want_console; console_blanked = saved_console_blanked; vc_cons[fg_console].d->vc_mode = saved_vc_mode; vc = vc_cons[fg_console].d; if (vc->vc_sw->con_debug_leave) vc->vc_sw->con_debug_leave(vc); } EXPORT_SYMBOL_GPL(con_debug_leave); static int do_register_con_driver(const struct consw *csw, int first, int last) { struct module *owner = csw->owner; struct con_driver *con_driver; const char *desc; int i, retval; WARN_CONSOLE_UNLOCKED(); if (!try_module_get(owner)) return -ENODEV; for (i = 0; i < MAX_NR_CON_DRIVER; i++) { con_driver = &registered_con_driver[i]; /* already registered */ if (con_driver->con == csw) { retval = -EBUSY; goto err; } } desc = csw->con_startup(); if (!desc) { retval = -ENODEV; goto err; } retval = -EINVAL; for (i = 0; i < MAX_NR_CON_DRIVER; i++) { con_driver = &registered_con_driver[i]; if (con_driver->con == NULL && !(con_driver->flag & CON_DRIVER_FLAG_ZOMBIE)) { con_driver->con = csw; con_driver->desc = desc; con_driver->node = i; con_driver->flag = CON_DRIVER_FLAG_MODULE | CON_DRIVER_FLAG_INIT; con_driver->first = first; con_driver->last = last; retval = 0; break; } } if (retval) goto err; con_driver->dev = device_create_with_groups(&vtconsole_class, NULL, MKDEV(0, con_driver->node), con_driver, con_dev_groups, "vtcon%i", con_driver->node); if (IS_ERR(con_driver->dev)) { pr_warn("Unable to create device for %s; errno = %ld\n", con_driver->desc, PTR_ERR(con_driver->dev)); con_driver->dev = NULL; } else { vtconsole_init_device(con_driver); } err: module_put(owner); return retval; } /** * do_unregister_con_driver - unregister console driver from console layer * @csw: console driver * * DESCRIPTION: All drivers that registers to the console layer must * call this function upon exit, or if the console driver is in a state * where it won't be able to handle console services, such as the * framebuffer console without loaded framebuffer drivers. * * The driver must unbind first prior to unregistration. */ int do_unregister_con_driver(const struct consw *csw) { int i; /* cannot unregister a bound driver */ if (con_is_bound(csw)) return -EBUSY; if (csw == conswitchp) return -EINVAL; for (i = 0; i < MAX_NR_CON_DRIVER; i++) { struct con_driver *con_driver = &registered_con_driver[i]; if (con_driver->con == csw) { /* * Defer the removal of the sysfs entries since that * will acquire the kernfs s_active lock and we can't * acquire this lock while holding the console lock: * the unbind sysfs entry imposes already the opposite * order. Reset con already here to prevent any later * lookup to succeed and mark this slot as zombie, so * it won't get reused until we complete the removal * in the deferred work. */ con_driver->con = NULL; con_driver->flag = CON_DRIVER_FLAG_ZOMBIE; schedule_work(&con_driver_unregister_work); return 0; } } return -ENODEV; } EXPORT_SYMBOL_GPL(do_unregister_con_driver); static void con_driver_unregister_callback(struct work_struct *ignored) { int i; guard(console_lock)(); for (i = 0; i < MAX_NR_CON_DRIVER; i++) { struct con_driver *con_driver = &registered_con_driver[i]; if (!(con_driver->flag & CON_DRIVER_FLAG_ZOMBIE)) continue; console_unlock(); vtconsole_deinit_device(con_driver); device_destroy(&vtconsole_class, MKDEV(0, con_driver->node)); console_lock(); if (WARN_ON_ONCE(con_driver->con)) con_driver->con = NULL; con_driver->desc = NULL; con_driver->dev = NULL; con_driver->node = 0; WARN_ON_ONCE(con_driver->flag != CON_DRIVER_FLAG_ZOMBIE); con_driver->flag = 0; con_driver->first = 0; con_driver->last = 0; } } /* * If we support more console drivers, this function is used * when a driver wants to take over some existing consoles * and become default driver for newly opened ones. * * do_take_over_console is basically a register followed by bind */ int do_take_over_console(const struct consw *csw, int first, int last, int deflt) { int err; err = do_register_con_driver(csw, first, last); /* * If we get an busy error we still want to bind the console driver * and return success, as we may have unbound the console driver * but not unregistered it. */ if (err == -EBUSY) err = 0; if (!err) do_bind_con_driver(csw, first, last, deflt); return err; } EXPORT_SYMBOL_GPL(do_take_over_console); /* * give_up_console is a wrapper to unregister_con_driver. It will only * work if driver is fully unbound. */ void give_up_console(const struct consw *csw) { guard(console_lock)(); do_unregister_con_driver(csw); } EXPORT_SYMBOL(give_up_console); static int __init vtconsole_class_init(void) { int i; i = class_register(&vtconsole_class); if (i) pr_warn("Unable to create vt console class; errno = %d\n", i); /* Add system drivers to sysfs */ for (i = 0; i < MAX_NR_CON_DRIVER; i++) { struct con_driver *con = &registered_con_driver[i]; if (con->con && !con->dev) { con->dev = device_create_with_groups(&vtconsole_class, NULL, MKDEV(0, con->node), con, con_dev_groups, "vtcon%i", con->node); if (IS_ERR(con->dev)) { pr_warn("Unable to create device for %s; errno = %ld\n", con->desc, PTR_ERR(con->dev)); con->dev = NULL; } else { vtconsole_init_device(con); } } } return 0; } postcore_initcall(vtconsole_class_init); /* * Screen blanking */ static int set_vesa_blanking(u8 __user *mode_user) { u8 mode; if (get_user(mode, mode_user)) return -EFAULT; guard(console_lock)(); vesa_blank_mode = (mode <= VESA_BLANK_MAX) ? mode : VESA_NO_BLANKING; return 0; } void do_blank_screen(int entering_gfx) { struct vc_data *vc = vc_cons[fg_console].d; int i; might_sleep(); WARN_CONSOLE_UNLOCKED(); if (console_blanked) { if (blank_state == blank_vesa_wait) { blank_state = blank_off; vc->vc_sw->con_blank(vc, vesa_blank_mode + 1, 0); } return; } /* entering graphics mode? */ if (entering_gfx) { hide_cursor(vc); save_screen(vc); vc->vc_sw->con_blank(vc, VESA_VSYNC_SUSPEND, 1); console_blanked = fg_console + 1; blank_state = blank_off; set_origin(vc); return; } blank_state = blank_off; /* don't blank graphics */ if (vc->vc_mode != KD_TEXT) { console_blanked = fg_console + 1; return; } hide_cursor(vc); timer_delete_sync(&console_timer); blank_timer_expired = 0; save_screen(vc); /* In case we need to reset origin, blanking hook returns 1 */ i = vc->vc_sw->con_blank(vc, vesa_off_interval ? VESA_VSYNC_SUSPEND : (vesa_blank_mode + 1), 0); console_blanked = fg_console + 1; if (i) set_origin(vc); if (console_blank_hook && console_blank_hook(1)) return; if (vesa_off_interval && vesa_blank_mode) { blank_state = blank_vesa_wait; mod_timer(&console_timer, jiffies + vesa_off_interval); } vt_event_post(VT_EVENT_BLANK, vc->vc_num, vc->vc_num); } EXPORT_SYMBOL(do_blank_screen); /* * Called by timer as well as from vt_console_driver */ void do_unblank_screen(int leaving_gfx) { struct vc_data *vc; /* This should now always be called from a "sane" (read: can schedule) * context for the sake of the low level drivers, except in the special * case of oops_in_progress */ if (!oops_in_progress) might_sleep(); WARN_CONSOLE_UNLOCKED(); ignore_poke = 0; if (!console_blanked) return; if (!vc_cons_allocated(fg_console)) { /* impossible */ pr_warn("unblank_screen: tty %d not allocated ??\n", fg_console + 1); return; } vc = vc_cons[fg_console].d; if (vc->vc_mode != KD_TEXT) return; /* but leave console_blanked != 0 */ if (blankinterval) { mod_timer(&console_timer, jiffies + (blankinterval * HZ)); blank_state = blank_normal_wait; } console_blanked = 0; if (vc->vc_sw->con_blank(vc, VESA_NO_BLANKING, leaving_gfx)) /* Low-level driver cannot restore -> do it ourselves */ update_screen(vc); if (console_blank_hook) console_blank_hook(0); set_palette(vc); set_cursor(vc); vt_event_post(VT_EVENT_UNBLANK, vc->vc_num, vc->vc_num); notify_update(vc); } EXPORT_SYMBOL(do_unblank_screen); /* * This is called by the outside world to cause a forced unblank, mostly for * oopses. Currently, I just call do_unblank_screen(0), but we could eventually * call it with 1 as an argument and so force a mode restore... that may kill * X or at least garbage the screen but would also make the Oops visible... */ static void unblank_screen(void) { do_unblank_screen(0); } /* * We defer the timer blanking to work queue so it can take the console mutex * (console operations can still happen at irq time, but only from printk which * has the console mutex. Not perfect yet, but better than no locking */ static void blank_screen_t(struct timer_list *unused) { blank_timer_expired = 1; schedule_work(&console_work); } void poke_blanked_console(void) { WARN_CONSOLE_UNLOCKED(); /* Add this so we quickly catch whoever might call us in a non * safe context. Nowadays, unblank_screen() isn't to be called in * atomic contexts and is allowed to schedule (with the special case * of oops_in_progress, but that isn't of any concern for this * function. --BenH. */ might_sleep(); /* This isn't perfectly race free, but a race here would be mostly harmless, * at worst, we'll do a spurious blank and it's unlikely */ timer_delete(&console_timer); blank_timer_expired = 0; if (ignore_poke || !vc_cons[fg_console].d || vc_cons[fg_console].d->vc_mode == KD_GRAPHICS) return; if (console_blanked) unblank_screen(); else if (blankinterval) { mod_timer(&console_timer, jiffies + (blankinterval * HZ)); blank_state = blank_normal_wait; } } /* * Palettes */ static void set_palette(struct vc_data *vc) { WARN_CONSOLE_UNLOCKED(); if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_set_palette) vc->vc_sw->con_set_palette(vc, color_table); } /* * Load palette into the DAC registers. arg points to a colour * map, 3 bytes per colour, 16 colours, range from 0 to 255. */ int con_set_cmap(unsigned char __user *arg) { int i, j, k; unsigned char colormap[3*16]; if (copy_from_user(colormap, arg, sizeof(colormap))) return -EFAULT; guard(console_lock)(); for (i = k = 0; i < 16; i++) { default_red[i] = colormap[k++]; default_grn[i] = colormap[k++]; default_blu[i] = colormap[k++]; } for (i = 0; i < MAX_NR_CONSOLES; i++) { if (!vc_cons_allocated(i)) continue; for (j = k = 0; j < 16; j++) { vc_cons[i].d->vc_palette[k++] = default_red[j]; vc_cons[i].d->vc_palette[k++] = default_grn[j]; vc_cons[i].d->vc_palette[k++] = default_blu[j]; } set_palette(vc_cons[i].d); } return 0; } int con_get_cmap(unsigned char __user *arg) { int i, k; unsigned char colormap[3*16]; scoped_guard(console_lock) for (i = k = 0; i < 16; i++) { colormap[k++] = default_red[i]; colormap[k++] = default_grn[i]; colormap[k++] = default_blu[i]; } if (copy_to_user(arg, colormap, sizeof(colormap))) return -EFAULT; return 0; } void reset_palette(struct vc_data *vc) { int j, k; for (j=k=0; j<16; j++) { vc->vc_palette[k++] = default_red[j]; vc->vc_palette[k++] = default_grn[j]; vc->vc_palette[k++] = default_blu[j]; } set_palette(vc); } /* * Font switching * * Currently we only support fonts up to 128 pixels wide, at a maximum height * of 128 pixels. Userspace fontdata may have to be stored with 32 bytes * (shorts/ints, depending on width) reserved for each character which is * kinda wasty, but this is done in order to maintain compatibility with the * EGA/VGA fonts. It is up to the actual low-level console-driver convert data * into its favorite format (maybe we should add a `fontoffset' field to the * `display' structure so we won't have to convert the fontdata all the time. * /Jes */ #define max_font_width 64 #define max_font_height 128 #define max_font_glyphs 512 #define max_font_size (max_font_glyphs*max_font_width*max_font_height) static int con_font_get(struct vc_data *vc, struct console_font_op *op) { struct console_font font; int c; unsigned int vpitch = op->op == KD_FONT_OP_GET_TALL ? op->height : 32; if (vpitch > max_font_height) return -EINVAL; void *font_data __free(kvfree) = NULL; if (op->data) { font.data = font_data = kvzalloc(max_font_size, GFP_KERNEL); if (!font.data) return -ENOMEM; } else font.data = NULL; scoped_guard(console_lock) { if (vc->vc_mode != KD_TEXT) return -EINVAL; if (!vc->vc_sw->con_font_get) return -ENOSYS; int ret = vc->vc_sw->con_font_get(vc, &font, vpitch); if (ret) return ret; } c = (font.width+7)/8 * vpitch * font.charcount; if (op->data && font.charcount > op->charcount) return -ENOSPC; if (font.width > op->width || font.height > op->height) return -ENOSPC; op->height = font.height; op->width = font.width; op->charcount = font.charcount; if (op->data && copy_to_user(op->data, font.data, c)) return -EFAULT; return 0; } static int con_font_set(struct vc_data *vc, const struct console_font_op *op) { struct console_font font; int size; unsigned int vpitch = op->op == KD_FONT_OP_SET_TALL ? op->height : 32; if (!op->data) return -EINVAL; if (op->charcount > max_font_glyphs) return -EINVAL; if (op->width <= 0 || op->width > max_font_width || !op->height || op->height > max_font_height) return -EINVAL; if (vpitch < op->height) return -EINVAL; size = (op->width+7)/8 * vpitch * op->charcount; if (size > max_font_size) return -ENOSPC; void *font_data __free(kfree) = font.data = memdup_user(op->data, size); if (IS_ERR(font.data)) return PTR_ERR(font.data); font.charcount = op->charcount; font.width = op->width; font.height = op->height; guard(console_lock)(); if (vc->vc_mode != KD_TEXT) return -EINVAL; if (!vc->vc_sw->con_font_set) return -ENOSYS; if (vc_is_sel(vc)) clear_selection(); return vc->vc_sw->con_font_set(vc, &font, vpitch, op->flags); } static int con_font_default(struct vc_data *vc, struct console_font_op *op) { struct console_font font = {.width = op->width, .height = op->height}; char name[MAX_FONT_NAME]; char *s = name; if (!op->data) s = NULL; else if (strncpy_from_user(name, op->data, MAX_FONT_NAME - 1) < 0) return -EFAULT; else name[MAX_FONT_NAME - 1] = 0; scoped_guard(console_lock) { if (vc->vc_mode != KD_TEXT) return -EINVAL; if (!vc->vc_sw->con_font_default) return -ENOSYS; if (vc_is_sel(vc)) clear_selection(); int ret = vc->vc_sw->con_font_default(vc, &font, s); if (ret) return ret; } op->width = font.width; op->height = font.height; return 0; } int con_font_op(struct vc_data *vc, struct console_font_op *op) { switch (op->op) { case KD_FONT_OP_SET: case KD_FONT_OP_SET_TALL: return con_font_set(vc, op); case KD_FONT_OP_GET: case KD_FONT_OP_GET_TALL: return con_font_get(vc, op); case KD_FONT_OP_SET_DEFAULT: return con_font_default(vc, op); case KD_FONT_OP_COPY: /* was buggy and never really used */ return -EINVAL; } return -ENOSYS; } /* * Interface exported to selection and vcs. */ /* used by selection */ u16 screen_glyph(const struct vc_data *vc, int offset) { u16 w = scr_readw(screenpos(vc, offset, true)); u16 c = w & 0xff; if (w & vc->vc_hi_font_mask) c |= 0x100; return c; } EXPORT_SYMBOL_GPL(screen_glyph); u32 screen_glyph_unicode(const struct vc_data *vc, int n) { u32 **uni_lines = vc->vc_uni_lines; if (uni_lines) return uni_lines[n / vc->vc_cols][n % vc->vc_cols]; return inverse_translate(vc, screen_glyph(vc, n * 2), true); } EXPORT_SYMBOL_GPL(screen_glyph_unicode); /* used by vcs - note the word offset */ unsigned short *screen_pos(const struct vc_data *vc, int w_offset, bool viewed) { return screenpos(vc, 2 * w_offset, viewed); } EXPORT_SYMBOL_GPL(screen_pos); void getconsxy(const struct vc_data *vc, unsigned char xy[static 2]) { /* clamp values if they don't fit */ xy[0] = min(vc->state.x, 0xFFu); xy[1] = min(vc->state.y, 0xFFu); } void putconsxy(struct vc_data *vc, unsigned char xy[static const 2]) { hide_cursor(vc); gotoxy(vc, xy[0], xy[1]); set_cursor(vc); } u16 vcs_scr_readw(const struct vc_data *vc, const u16 *org) { if ((unsigned long)org == vc->vc_pos && softcursor_original != -1) return softcursor_original; return scr_readw(org); } void vcs_scr_writew(struct vc_data *vc, u16 val, u16 *org) { scr_writew(val, org); if ((unsigned long)org == vc->vc_pos) { softcursor_original = -1; add_softcursor(vc); } } void vcs_scr_updated(struct vc_data *vc) { notify_update(vc); }
2 2 1 7 2 2 7 1 3 7 5 4 5 3 3 3 2 7 6 1 5 3 2 2 2 2 7 7 6 5 5 4 5 4 1 4 1 4 5 4 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 // SPDX-License-Identifier: GPL-2.0 /* * Quota code necessary even when VFS quota support is not compiled * into the kernel. The interesting stuff is over in dquot.c, here * we have symbols for initial quotactl(2) handling, the sysctl(2) * variables, etc - things needed even when quota support disabled. */ #include <linux/fs.h> #include <linux/namei.h> #include <linux/slab.h> #include <asm/current.h> #include <linux/blkdev.h> #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/capability.h> #include <linux/quotaops.h> #include <linux/types.h> #include <linux/mount.h> #include <linux/writeback.h> #include <linux/nospec.h> #include "compat.h" #include "../internal.h" static int check_quotactl_permission(struct super_block *sb, int type, int cmd, qid_t id) { switch (cmd) { /* these commands do not require any special privilegues */ case Q_GETFMT: case Q_SYNC: case Q_GETINFO: case Q_XGETQSTAT: case Q_XGETQSTATV: case Q_XQUOTASYNC: break; /* allow to query information for dquots we "own" */ case Q_GETQUOTA: case Q_XGETQUOTA: if ((type == USRQUOTA && uid_eq(current_euid(), make_kuid(current_user_ns(), id))) || (type == GRPQUOTA && in_egroup_p(make_kgid(current_user_ns(), id)))) break; fallthrough; default: if (!capable(CAP_SYS_ADMIN)) return -EPERM; } return security_quotactl(cmd, type, id, sb); } static void quota_sync_one(struct super_block *sb, void *arg) { int type = *(int *)arg; if (sb->s_qcop && sb->s_qcop->quota_sync && (sb->s_quota_types & (1 << type))) sb->s_qcop->quota_sync(sb, type); } static int quota_sync_all(int type) { int ret; ret = security_quotactl(Q_SYNC, type, 0, NULL); if (!ret) iterate_supers(quota_sync_one, &type); return ret; } unsigned int qtype_enforce_flag(int type) { switch (type) { case USRQUOTA: return FS_QUOTA_UDQ_ENFD; case GRPQUOTA: return FS_QUOTA_GDQ_ENFD; case PRJQUOTA: return FS_QUOTA_PDQ_ENFD; } return 0; } static int quota_quotaon(struct super_block *sb, int type, qid_t id, const struct path *path) { if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable) return -ENOSYS; if (sb->s_qcop->quota_enable) return sb->s_qcop->quota_enable(sb, qtype_enforce_flag(type)); if (IS_ERR(path)) return PTR_ERR(path); return sb->s_qcop->quota_on(sb, type, id, path); } static int quota_quotaoff(struct super_block *sb, int type) { if (!sb->s_qcop->quota_off && !sb->s_qcop->quota_disable) return -ENOSYS; if (sb->s_qcop->quota_disable) return sb->s_qcop->quota_disable(sb, qtype_enforce_flag(type)); return sb->s_qcop->quota_off(sb, type); } static int quota_getfmt(struct super_block *sb, int type, void __user *addr) { __u32 fmt; if (!sb_has_quota_active(sb, type)) return -ESRCH; fmt = sb_dqopt(sb)->info[type].dqi_format->qf_fmt_id; if (copy_to_user(addr, &fmt, sizeof(fmt))) return -EFAULT; return 0; } static int quota_getinfo(struct super_block *sb, int type, void __user *addr) { struct qc_state state; struct qc_type_state *tstate; struct if_dqinfo uinfo; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; ret = sb->s_qcop->get_state(sb, &state); if (ret) return ret; tstate = state.s_state + type; if (!(tstate->flags & QCI_ACCT_ENABLED)) return -ESRCH; memset(&uinfo, 0, sizeof(uinfo)); uinfo.dqi_bgrace = tstate->spc_timelimit; uinfo.dqi_igrace = tstate->ino_timelimit; if (tstate->flags & QCI_SYSFILE) uinfo.dqi_flags |= DQF_SYS_FILE; if (tstate->flags & QCI_ROOT_SQUASH) uinfo.dqi_flags |= DQF_ROOT_SQUASH; uinfo.dqi_valid = IIF_ALL; if (copy_to_user(addr, &uinfo, sizeof(uinfo))) return -EFAULT; return 0; } static int quota_setinfo(struct super_block *sb, int type, void __user *addr) { struct if_dqinfo info; struct qc_info qinfo; if (copy_from_user(&info, addr, sizeof(info))) return -EFAULT; if (!sb->s_qcop->set_info) return -ENOSYS; if (info.dqi_valid & ~(IIF_FLAGS | IIF_BGRACE | IIF_IGRACE)) return -EINVAL; memset(&qinfo, 0, sizeof(qinfo)); if (info.dqi_valid & IIF_FLAGS) { if (info.dqi_flags & ~DQF_SETINFO_MASK) return -EINVAL; if (info.dqi_flags & DQF_ROOT_SQUASH) qinfo.i_flags |= QCI_ROOT_SQUASH; qinfo.i_fieldmask |= QC_FLAGS; } if (info.dqi_valid & IIF_BGRACE) { qinfo.i_spc_timelimit = info.dqi_bgrace; qinfo.i_fieldmask |= QC_SPC_TIMER; } if (info.dqi_valid & IIF_IGRACE) { qinfo.i_ino_timelimit = info.dqi_igrace; qinfo.i_fieldmask |= QC_INO_TIMER; } return sb->s_qcop->set_info(sb, type, &qinfo); } static inline qsize_t qbtos(qsize_t blocks) { return blocks << QIF_DQBLKSIZE_BITS; } static inline qsize_t stoqb(qsize_t space) { return (space + QIF_DQBLKSIZE - 1) >> QIF_DQBLKSIZE_BITS; } static void copy_to_if_dqblk(struct if_dqblk *dst, struct qc_dqblk *src) { memset(dst, 0, sizeof(*dst)); dst->dqb_bhardlimit = stoqb(src->d_spc_hardlimit); dst->dqb_bsoftlimit = stoqb(src->d_spc_softlimit); dst->dqb_curspace = src->d_space; dst->dqb_ihardlimit = src->d_ino_hardlimit; dst->dqb_isoftlimit = src->d_ino_softlimit; dst->dqb_curinodes = src->d_ino_count; dst->dqb_btime = src->d_spc_timer; dst->dqb_itime = src->d_ino_timer; dst->dqb_valid = QIF_ALL; } static int quota_getquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct kqid qid; struct qc_dqblk fdq; struct if_dqblk idq; int ret; if (!sb->s_qcop->get_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_dqblk(sb, qid, &fdq); if (ret) return ret; copy_to_if_dqblk(&idq, &fdq); if (compat_need_64bit_alignment_fixup()) { struct compat_if_dqblk __user *compat_dqblk = addr; if (copy_to_user(compat_dqblk, &idq, sizeof(*compat_dqblk))) return -EFAULT; if (put_user(idq.dqb_valid, &compat_dqblk->dqb_valid)) return -EFAULT; } else { if (copy_to_user(addr, &idq, sizeof(idq))) return -EFAULT; } return 0; } /* * Return quota for next active quota >= this id, if any exists, * otherwise return -ENOENT via ->get_nextdqblk */ static int quota_getnextquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct kqid qid; struct qc_dqblk fdq; struct if_nextdqblk idq; int ret; if (!sb->s_qcop->get_nextdqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq); if (ret) return ret; /* struct if_nextdqblk is a superset of struct if_dqblk */ copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq); idq.dqb_id = from_kqid(current_user_ns(), qid); if (copy_to_user(addr, &idq, sizeof(idq))) return -EFAULT; return 0; } static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src) { dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit); dst->d_spc_softlimit = qbtos(src->dqb_bsoftlimit); dst->d_space = src->dqb_curspace; dst->d_ino_hardlimit = src->dqb_ihardlimit; dst->d_ino_softlimit = src->dqb_isoftlimit; dst->d_ino_count = src->dqb_curinodes; dst->d_spc_timer = src->dqb_btime; dst->d_ino_timer = src->dqb_itime; dst->d_fieldmask = 0; if (src->dqb_valid & QIF_BLIMITS) dst->d_fieldmask |= QC_SPC_SOFT | QC_SPC_HARD; if (src->dqb_valid & QIF_SPACE) dst->d_fieldmask |= QC_SPACE; if (src->dqb_valid & QIF_ILIMITS) dst->d_fieldmask |= QC_INO_SOFT | QC_INO_HARD; if (src->dqb_valid & QIF_INODES) dst->d_fieldmask |= QC_INO_COUNT; if (src->dqb_valid & QIF_BTIME) dst->d_fieldmask |= QC_SPC_TIMER; if (src->dqb_valid & QIF_ITIME) dst->d_fieldmask |= QC_INO_TIMER; } static int quota_setquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct qc_dqblk fdq; struct if_dqblk idq; struct kqid qid; if (compat_need_64bit_alignment_fixup()) { struct compat_if_dqblk __user *compat_dqblk = addr; if (copy_from_user(&idq, compat_dqblk, sizeof(*compat_dqblk)) || get_user(idq.dqb_valid, &compat_dqblk->dqb_valid)) return -EFAULT; } else { if (copy_from_user(&idq, addr, sizeof(idq))) return -EFAULT; } if (!sb->s_qcop->set_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; copy_from_if_dqblk(&fdq, &idq); return sb->s_qcop->set_dqblk(sb, qid, &fdq); } static int quota_enable(struct super_block *sb, void __user *addr) { __u32 flags; if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; if (!sb->s_qcop->quota_enable) return -ENOSYS; return sb->s_qcop->quota_enable(sb, flags); } static int quota_disable(struct super_block *sb, void __user *addr) { __u32 flags; if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; if (!sb->s_qcop->quota_disable) return -ENOSYS; return sb->s_qcop->quota_disable(sb, flags); } static int quota_state_to_flags(struct qc_state *state) { int flags = 0; if (state->s_state[USRQUOTA].flags & QCI_ACCT_ENABLED) flags |= FS_QUOTA_UDQ_ACCT; if (state->s_state[USRQUOTA].flags & QCI_LIMITS_ENFORCED) flags |= FS_QUOTA_UDQ_ENFD; if (state->s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED) flags |= FS_QUOTA_GDQ_ACCT; if (state->s_state[GRPQUOTA].flags & QCI_LIMITS_ENFORCED) flags |= FS_QUOTA_GDQ_ENFD; if (state->s_state[PRJQUOTA].flags & QCI_ACCT_ENABLED) flags |= FS_QUOTA_PDQ_ACCT; if (state->s_state[PRJQUOTA].flags & QCI_LIMITS_ENFORCED) flags |= FS_QUOTA_PDQ_ENFD; return flags; } static int quota_getstate(struct super_block *sb, int type, struct fs_quota_stat *fqs) { struct qc_state state; int ret; memset(&state, 0, sizeof (struct qc_state)); ret = sb->s_qcop->get_state(sb, &state); if (ret < 0) return ret; memset(fqs, 0, sizeof(*fqs)); fqs->qs_version = FS_QSTAT_VERSION; fqs->qs_flags = quota_state_to_flags(&state); /* No quota enabled? */ if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit; fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit; /* Inodes may be allocated even if inactive; copy out if present */ if (state.s_state[USRQUOTA].ino) { fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino; fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks; fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents; } if (state.s_state[GRPQUOTA].ino) { fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino; fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks; fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents; } if (state.s_state[PRJQUOTA].ino) { /* * Q_XGETQSTAT doesn't have room for both group and project * quotas. So, allow the project quota values to be copied out * only if there is no group quota information available. */ if (!(state.s_state[GRPQUOTA].flags & QCI_ACCT_ENABLED)) { fqs->qs_gquota.qfs_ino = state.s_state[PRJQUOTA].ino; fqs->qs_gquota.qfs_nblks = state.s_state[PRJQUOTA].blocks; fqs->qs_gquota.qfs_nextents = state.s_state[PRJQUOTA].nextents; } } return 0; } static int compat_copy_fs_qfilestat(struct compat_fs_qfilestat __user *to, struct fs_qfilestat *from) { if (copy_to_user(to, from, sizeof(*to)) || put_user(from->qfs_nextents, &to->qfs_nextents)) return -EFAULT; return 0; } static int compat_copy_fs_quota_stat(struct compat_fs_quota_stat __user *to, struct fs_quota_stat *from) { if (put_user(from->qs_version, &to->qs_version) || put_user(from->qs_flags, &to->qs_flags) || put_user(from->qs_pad, &to->qs_pad) || compat_copy_fs_qfilestat(&to->qs_uquota, &from->qs_uquota) || compat_copy_fs_qfilestat(&to->qs_gquota, &from->qs_gquota) || put_user(from->qs_incoredqs, &to->qs_incoredqs) || put_user(from->qs_btimelimit, &to->qs_btimelimit) || put_user(from->qs_itimelimit, &to->qs_itimelimit) || put_user(from->qs_rtbtimelimit, &to->qs_rtbtimelimit) || put_user(from->qs_bwarnlimit, &to->qs_bwarnlimit) || put_user(from->qs_iwarnlimit, &to->qs_iwarnlimit)) return -EFAULT; return 0; } static int quota_getxstate(struct super_block *sb, int type, void __user *addr) { struct fs_quota_stat fqs; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; ret = quota_getstate(sb, type, &fqs); if (ret) return ret; if (compat_need_64bit_alignment_fixup()) return compat_copy_fs_quota_stat(addr, &fqs); if (copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return 0; } static int quota_getstatev(struct super_block *sb, int type, struct fs_quota_statv *fqs) { struct qc_state state; int ret; memset(&state, 0, sizeof (struct qc_state)); ret = sb->s_qcop->get_state(sb, &state); if (ret < 0) return ret; memset(fqs, 0, sizeof(*fqs)); fqs->qs_version = FS_QSTAT_VERSION; fqs->qs_flags = quota_state_to_flags(&state); /* No quota enabled? */ if (!fqs->qs_flags) return -ENOSYS; fqs->qs_incoredqs = state.s_incoredqs; fqs->qs_btimelimit = state.s_state[type].spc_timelimit; fqs->qs_itimelimit = state.s_state[type].ino_timelimit; fqs->qs_rtbtimelimit = state.s_state[type].rt_spc_timelimit; fqs->qs_bwarnlimit = state.s_state[type].spc_warnlimit; fqs->qs_iwarnlimit = state.s_state[type].ino_warnlimit; fqs->qs_rtbwarnlimit = state.s_state[type].rt_spc_warnlimit; /* Inodes may be allocated even if inactive; copy out if present */ if (state.s_state[USRQUOTA].ino) { fqs->qs_uquota.qfs_ino = state.s_state[USRQUOTA].ino; fqs->qs_uquota.qfs_nblks = state.s_state[USRQUOTA].blocks; fqs->qs_uquota.qfs_nextents = state.s_state[USRQUOTA].nextents; } if (state.s_state[GRPQUOTA].ino) { fqs->qs_gquota.qfs_ino = state.s_state[GRPQUOTA].ino; fqs->qs_gquota.qfs_nblks = state.s_state[GRPQUOTA].blocks; fqs->qs_gquota.qfs_nextents = state.s_state[GRPQUOTA].nextents; } if (state.s_state[PRJQUOTA].ino) { fqs->qs_pquota.qfs_ino = state.s_state[PRJQUOTA].ino; fqs->qs_pquota.qfs_nblks = state.s_state[PRJQUOTA].blocks; fqs->qs_pquota.qfs_nextents = state.s_state[PRJQUOTA].nextents; } return 0; } static int quota_getxstatev(struct super_block *sb, int type, void __user *addr) { struct fs_quota_statv fqs; int ret; if (!sb->s_qcop->get_state) return -ENOSYS; memset(&fqs, 0, sizeof(fqs)); if (copy_from_user(&fqs, addr, 1)) /* Just read qs_version */ return -EFAULT; /* If this kernel doesn't support user specified version, fail */ switch (fqs.qs_version) { case FS_QSTATV_VERSION1: break; default: return -EINVAL; } ret = quota_getstatev(sb, type, &fqs); if (!ret && copy_to_user(addr, &fqs, sizeof(fqs))) return -EFAULT; return ret; } /* * XFS defines BBTOB and BTOBB macros inside fs/xfs/ and we cannot move them * out of there as xfsprogs rely on definitions being in that header file. So * just define same functions here for quota purposes. */ #define XFS_BB_SHIFT 9 static inline u64 quota_bbtob(u64 blocks) { return blocks << XFS_BB_SHIFT; } static inline u64 quota_btobb(u64 bytes) { return (bytes + (1 << XFS_BB_SHIFT) - 1) >> XFS_BB_SHIFT; } static inline s64 copy_from_xfs_dqblk_ts(const struct fs_disk_quota *d, __s32 timer, __s8 timer_hi) { if (d->d_fieldmask & FS_DQ_BIGTIME) return (u32)timer | (s64)timer_hi << 32; return timer; } static void copy_from_xfs_dqblk(struct qc_dqblk *dst, struct fs_disk_quota *src) { dst->d_spc_hardlimit = quota_bbtob(src->d_blk_hardlimit); dst->d_spc_softlimit = quota_bbtob(src->d_blk_softlimit); dst->d_ino_hardlimit = src->d_ino_hardlimit; dst->d_ino_softlimit = src->d_ino_softlimit; dst->d_space = quota_bbtob(src->d_bcount); dst->d_ino_count = src->d_icount; dst->d_ino_timer = copy_from_xfs_dqblk_ts(src, src->d_itimer, src->d_itimer_hi); dst->d_spc_timer = copy_from_xfs_dqblk_ts(src, src->d_btimer, src->d_btimer_hi); dst->d_ino_warns = src->d_iwarns; dst->d_spc_warns = src->d_bwarns; dst->d_rt_spc_hardlimit = quota_bbtob(src->d_rtb_hardlimit); dst->d_rt_spc_softlimit = quota_bbtob(src->d_rtb_softlimit); dst->d_rt_space = quota_bbtob(src->d_rtbcount); dst->d_rt_spc_timer = copy_from_xfs_dqblk_ts(src, src->d_rtbtimer, src->d_rtbtimer_hi); dst->d_rt_spc_warns = src->d_rtbwarns; dst->d_fieldmask = 0; if (src->d_fieldmask & FS_DQ_ISOFT) dst->d_fieldmask |= QC_INO_SOFT; if (src->d_fieldmask & FS_DQ_IHARD) dst->d_fieldmask |= QC_INO_HARD; if (src->d_fieldmask & FS_DQ_BSOFT) dst->d_fieldmask |= QC_SPC_SOFT; if (src->d_fieldmask & FS_DQ_BHARD) dst->d_fieldmask |= QC_SPC_HARD; if (src->d_fieldmask & FS_DQ_RTBSOFT) dst->d_fieldmask |= QC_RT_SPC_SOFT; if (src->d_fieldmask & FS_DQ_RTBHARD) dst->d_fieldmask |= QC_RT_SPC_HARD; if (src->d_fieldmask & FS_DQ_BTIMER) dst->d_fieldmask |= QC_SPC_TIMER; if (src->d_fieldmask & FS_DQ_ITIMER) dst->d_fieldmask |= QC_INO_TIMER; if (src->d_fieldmask & FS_DQ_RTBTIMER) dst->d_fieldmask |= QC_RT_SPC_TIMER; if (src->d_fieldmask & FS_DQ_BWARNS) dst->d_fieldmask |= QC_SPC_WARNS; if (src->d_fieldmask & FS_DQ_IWARNS) dst->d_fieldmask |= QC_INO_WARNS; if (src->d_fieldmask & FS_DQ_RTBWARNS) dst->d_fieldmask |= QC_RT_SPC_WARNS; if (src->d_fieldmask & FS_DQ_BCOUNT) dst->d_fieldmask |= QC_SPACE; if (src->d_fieldmask & FS_DQ_ICOUNT) dst->d_fieldmask |= QC_INO_COUNT; if (src->d_fieldmask & FS_DQ_RTBCOUNT) dst->d_fieldmask |= QC_RT_SPACE; } static void copy_qcinfo_from_xfs_dqblk(struct qc_info *dst, struct fs_disk_quota *src) { memset(dst, 0, sizeof(*dst)); dst->i_spc_timelimit = src->d_btimer; dst->i_ino_timelimit = src->d_itimer; dst->i_rt_spc_timelimit = src->d_rtbtimer; dst->i_ino_warnlimit = src->d_iwarns; dst->i_spc_warnlimit = src->d_bwarns; dst->i_rt_spc_warnlimit = src->d_rtbwarns; if (src->d_fieldmask & FS_DQ_BWARNS) dst->i_fieldmask |= QC_SPC_WARNS; if (src->d_fieldmask & FS_DQ_IWARNS) dst->i_fieldmask |= QC_INO_WARNS; if (src->d_fieldmask & FS_DQ_RTBWARNS) dst->i_fieldmask |= QC_RT_SPC_WARNS; if (src->d_fieldmask & FS_DQ_BTIMER) dst->i_fieldmask |= QC_SPC_TIMER; if (src->d_fieldmask & FS_DQ_ITIMER) dst->i_fieldmask |= QC_INO_TIMER; if (src->d_fieldmask & FS_DQ_RTBTIMER) dst->i_fieldmask |= QC_RT_SPC_TIMER; } static int quota_setxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; struct qc_dqblk qdq; struct kqid qid; if (copy_from_user(&fdq, addr, sizeof(fdq))) return -EFAULT; if (!sb->s_qcop->set_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; /* Are we actually setting timer / warning limits for all users? */ if (from_kqid(sb->s_user_ns, qid) == 0 && fdq.d_fieldmask & (FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK)) { struct qc_info qinfo; int ret; if (!sb->s_qcop->set_info) return -EINVAL; copy_qcinfo_from_xfs_dqblk(&qinfo, &fdq); ret = sb->s_qcop->set_info(sb, type, &qinfo); if (ret) return ret; /* These are already done */ fdq.d_fieldmask &= ~(FS_DQ_WARNS_MASK | FS_DQ_TIMER_MASK); } copy_from_xfs_dqblk(&qdq, &fdq); return sb->s_qcop->set_dqblk(sb, qid, &qdq); } static inline void copy_to_xfs_dqblk_ts(const struct fs_disk_quota *d, __s32 *timer_lo, __s8 *timer_hi, s64 timer) { *timer_lo = timer; if (d->d_fieldmask & FS_DQ_BIGTIME) *timer_hi = timer >> 32; } static inline bool want_bigtime(s64 timer) { return timer > S32_MAX || timer < S32_MIN; } static void copy_to_xfs_dqblk(struct fs_disk_quota *dst, struct qc_dqblk *src, int type, qid_t id) { memset(dst, 0, sizeof(*dst)); if (want_bigtime(src->d_ino_timer) || want_bigtime(src->d_spc_timer) || want_bigtime(src->d_rt_spc_timer)) dst->d_fieldmask |= FS_DQ_BIGTIME; dst->d_version = FS_DQUOT_VERSION; dst->d_id = id; if (type == USRQUOTA) dst->d_flags = FS_USER_QUOTA; else if (type == PRJQUOTA) dst->d_flags = FS_PROJ_QUOTA; else dst->d_flags = FS_GROUP_QUOTA; dst->d_blk_hardlimit = quota_btobb(src->d_spc_hardlimit); dst->d_blk_softlimit = quota_btobb(src->d_spc_softlimit); dst->d_ino_hardlimit = src->d_ino_hardlimit; dst->d_ino_softlimit = src->d_ino_softlimit; dst->d_bcount = quota_btobb(src->d_space); dst->d_icount = src->d_ino_count; copy_to_xfs_dqblk_ts(dst, &dst->d_itimer, &dst->d_itimer_hi, src->d_ino_timer); copy_to_xfs_dqblk_ts(dst, &dst->d_btimer, &dst->d_btimer_hi, src->d_spc_timer); dst->d_iwarns = src->d_ino_warns; dst->d_bwarns = src->d_spc_warns; dst->d_rtb_hardlimit = quota_btobb(src->d_rt_spc_hardlimit); dst->d_rtb_softlimit = quota_btobb(src->d_rt_spc_softlimit); dst->d_rtbcount = quota_btobb(src->d_rt_space); copy_to_xfs_dqblk_ts(dst, &dst->d_rtbtimer, &dst->d_rtbtimer_hi, src->d_rt_spc_timer); dst->d_rtbwarns = src->d_rt_spc_warns; } static int quota_getxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; struct qc_dqblk qdq; struct kqid qid; int ret; if (!sb->s_qcop->get_dqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_dqblk(sb, qid, &qdq); if (ret) return ret; copy_to_xfs_dqblk(&fdq, &qdq, type, id); if (copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; } /* * Return quota for next active quota >= this id, if any exists, * otherwise return -ENOENT via ->get_nextdqblk. */ static int quota_getnextxquota(struct super_block *sb, int type, qid_t id, void __user *addr) { struct fs_disk_quota fdq; struct qc_dqblk qdq; struct kqid qid; qid_t id_out; int ret; if (!sb->s_qcop->get_nextdqblk) return -ENOSYS; qid = make_kqid(current_user_ns(), type, id); if (!qid_has_mapping(sb->s_user_ns, qid)) return -EINVAL; ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq); if (ret) return ret; id_out = from_kqid(current_user_ns(), qid); copy_to_xfs_dqblk(&fdq, &qdq, type, id_out); if (copy_to_user(addr, &fdq, sizeof(fdq))) return -EFAULT; return ret; } static int quota_rmxquota(struct super_block *sb, void __user *addr) { __u32 flags; if (copy_from_user(&flags, addr, sizeof(flags))) return -EFAULT; if (!sb->s_qcop->rm_xquota) return -ENOSYS; return sb->s_qcop->rm_xquota(sb, flags); } /* Copy parameters and call proper function */ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id, void __user *addr, const struct path *path) { int ret; type = array_index_nospec(type, MAXQUOTAS); /* * Quota not supported on this fs? Check this before s_quota_types * since they needn't be set if quota is not supported at all. */ if (!sb->s_qcop) return -ENOSYS; if (!(sb->s_quota_types & (1 << type))) return -EINVAL; ret = check_quotactl_permission(sb, type, cmd, id); if (ret < 0) return ret; switch (cmd) { case Q_QUOTAON: return quota_quotaon(sb, type, id, path); case Q_QUOTAOFF: return quota_quotaoff(sb, type); case Q_GETFMT: return quota_getfmt(sb, type, addr); case Q_GETINFO: return quota_getinfo(sb, type, addr); case Q_SETINFO: return quota_setinfo(sb, type, addr); case Q_GETQUOTA: return quota_getquota(sb, type, id, addr); case Q_GETNEXTQUOTA: return quota_getnextquota(sb, type, id, addr); case Q_SETQUOTA: return quota_setquota(sb, type, id, addr); case Q_SYNC: if (!sb->s_qcop->quota_sync) return -ENOSYS; return sb->s_qcop->quota_sync(sb, type); case Q_XQUOTAON: return quota_enable(sb, addr); case Q_XQUOTAOFF: return quota_disable(sb, addr); case Q_XQUOTARM: return quota_rmxquota(sb, addr); case Q_XGETQSTAT: return quota_getxstate(sb, type, addr); case Q_XGETQSTATV: return quota_getxstatev(sb, type, addr); case Q_XSETQLIM: return quota_setxquota(sb, type, id, addr); case Q_XGETQUOTA: return quota_getxquota(sb, type, id, addr); case Q_XGETNEXTQUOTA: return quota_getnextxquota(sb, type, id, addr); case Q_XQUOTASYNC: if (sb_rdonly(sb)) return -EROFS; /* XFS quotas are fully coherent now, making this call a noop */ return 0; default: return -EINVAL; } } /* Return 1 if 'cmd' will block on frozen filesystem */ static int quotactl_cmd_write(int cmd) { /* * We cannot allow Q_GETQUOTA and Q_GETNEXTQUOTA without write access * as dquot_acquire() may allocate space for new structure and OCFS2 * needs to increment on-disk use count. */ switch (cmd) { case Q_GETFMT: case Q_GETINFO: case Q_SYNC: case Q_XGETQSTAT: case Q_XGETQSTATV: case Q_XGETQUOTA: case Q_XGETNEXTQUOTA: case Q_XQUOTASYNC: return 0; } return 1; } /* Return true if quotactl command is manipulating quota on/off state */ static bool quotactl_cmd_onoff(int cmd) { return (cmd == Q_QUOTAON) || (cmd == Q_QUOTAOFF) || (cmd == Q_XQUOTAON) || (cmd == Q_XQUOTAOFF); } /* * look up a superblock on which quota ops will be performed * - use the name of a block device to find the superblock thereon */ static struct super_block *quotactl_block(const char __user *special, int cmd) { #ifdef CONFIG_BLOCK struct super_block *sb; struct filename *tmp = getname(special); bool excl = false, thawed = false; int error; dev_t dev; if (IS_ERR(tmp)) return ERR_CAST(tmp); error = lookup_bdev(tmp->name, &dev); putname(tmp); if (error) return ERR_PTR(error); if (quotactl_cmd_onoff(cmd)) { excl = true; thawed = true; } else if (quotactl_cmd_write(cmd)) { thawed = true; } retry: sb = user_get_super(dev, excl); if (!sb) return ERR_PTR(-ENODEV); if (thawed && sb->s_writers.frozen != SB_UNFROZEN) { if (excl) up_write(&sb->s_umount); else up_read(&sb->s_umount); /* Wait for sb to unfreeze */ sb_start_write(sb); sb_end_write(sb); put_super(sb); goto retry; } return sb; #else return ERR_PTR(-ENODEV); #endif } /* * This is the system call interface. This communicates with * the user-level programs. Currently this only supports diskquota * calls. Maybe we need to add the process quotas etc. in the future, * but we probably should use rlimits for that. */ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special, qid_t, id, void __user *, addr) { uint cmds, type; struct super_block *sb = NULL; struct path path, *pathp = NULL; int ret; cmds = cmd >> SUBCMDSHIFT; type = cmd & SUBCMDMASK; if (type >= MAXQUOTAS) return -EINVAL; /* * As a special case Q_SYNC can be called without a specific device. * It will iterate all superblocks that have quota enabled and call * the sync action on each of them. */ if (!special) { if (cmds == Q_SYNC) return quota_sync_all(type); return -ENODEV; } /* * Path for quotaon has to be resolved before grabbing superblock * because that gets s_umount sem which is also possibly needed by path * resolution (think about autofs) and thus deadlocks could arise. */ if (cmds == Q_QUOTAON) { ret = user_path_at(AT_FDCWD, addr, LOOKUP_FOLLOW|LOOKUP_AUTOMOUNT, &path); if (ret) pathp = ERR_PTR(ret); else pathp = &path; } sb = quotactl_block(special, cmds); if (IS_ERR(sb)) { ret = PTR_ERR(sb); goto out; } ret = do_quotactl(sb, type, cmds, id, addr, pathp); if (!quotactl_cmd_onoff(cmds)) drop_super(sb); else drop_super_exclusive(sb); out: if (pathp && !IS_ERR(pathp)) path_put(pathp); return ret; } SYSCALL_DEFINE4(quotactl_fd, unsigned int, fd, unsigned int, cmd, qid_t, id, void __user *, addr) { struct super_block *sb; unsigned int cmds = cmd >> SUBCMDSHIFT; unsigned int type = cmd & SUBCMDMASK; CLASS(fd_raw, f)(fd); int ret; if (fd_empty(f)) return -EBADF; if (type >= MAXQUOTAS) return -EINVAL; if (quotactl_cmd_write(cmds)) { ret = mnt_want_write(fd_file(f)->f_path.mnt); if (ret) return ret; } sb = fd_file(f)->f_path.mnt->mnt_sb; if (quotactl_cmd_onoff(cmds)) down_write(&sb->s_umount); else down_read(&sb->s_umount); ret = do_quotactl(sb, type, cmds, id, addr, ERR_PTR(-EINVAL)); if (quotactl_cmd_onoff(cmds)) up_write(&sb->s_umount); else up_read(&sb->s_umount); if (quotactl_cmd_write(cmds)) mnt_drop_write(fd_file(f)->f_path.mnt); return ret; }
1401 1255 6 5 6 168 169 15 17 307 307 157 67 67 67 6 62 66 157 2976 2976 1325 307 3516 3518 3521 2180 2176 2172 1994 936 1476 1474 473 1404 1466 1464 554 541 552 550 554 554 554 553 100 552 100 100 519 519 522 522 52 505 500 14 505 466 506 507 466 548 545 1197 269 1476 2613 2613 722 300 724 303 81 81 79 79 79 82 82 3308 12 12 22 22 22 21 22 20 20 20 20 20 20 101 70 14 17 17 17 100 13 13 13 13 13 13 13 13 13 13 13 13 13 323 325 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 // SPDX-License-Identifier: GPL-2.0-only /* * Integrity Measurement Architecture * * Copyright (C) 2005,2006,2007,2008 IBM Corporation * * Authors: * Reiner Sailer <sailer@watson.ibm.com> * Serge Hallyn <serue@us.ibm.com> * Kylene Hall <kylene@us.ibm.com> * Mimi Zohar <zohar@us.ibm.com> * * File: ima_main.c * implements the IMA hooks: ima_bprm_check, ima_file_mmap, * and ima_file_check. */ #include <linux/module.h> #include <linux/file.h> #include <linux/binfmts.h> #include <linux/kernel_read_file.h> #include <linux/mount.h> #include <linux/mman.h> #include <linux/slab.h> #include <linux/xattr.h> #include <linux/ima.h> #include <linux/fs.h> #include <linux/iversion.h> #include <linux/evm.h> #include <linux/crash_dump.h> #include "ima.h" #ifdef CONFIG_IMA_APPRAISE int ima_appraise = IMA_APPRAISE_ENFORCE; #else int ima_appraise; #endif int __ro_after_init ima_hash_algo = HASH_ALGO_SHA1; static int hash_setup_done; static int ima_disabled __ro_after_init; static struct notifier_block ima_lsm_policy_notifier = { .notifier_call = ima_lsm_policy_change, }; static int __init ima_setup(char *str) { if (!is_kdump_kernel()) { pr_info("Warning: ima setup option only permitted in kdump"); return 1; } if (strncmp(str, "off", 3) == 0) ima_disabled = 1; else if (strncmp(str, "on", 2) == 0) ima_disabled = 0; else pr_err("Invalid ima setup option: \"%s\" , please specify ima=on|off.", str); return 1; } __setup("ima=", ima_setup); static int __init hash_setup(char *str) { struct ima_template_desc *template_desc = ima_template_desc_current(); int i; if (hash_setup_done) return 1; if (strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) == 0) { if (strncmp(str, "sha1", 4) == 0) { ima_hash_algo = HASH_ALGO_SHA1; } else if (strncmp(str, "md5", 3) == 0) { ima_hash_algo = HASH_ALGO_MD5; } else { pr_err("invalid hash algorithm \"%s\" for template \"%s\"", str, IMA_TEMPLATE_IMA_NAME); return 1; } goto out; } i = match_string(hash_algo_name, HASH_ALGO__LAST, str); if (i < 0) { pr_err("invalid hash algorithm \"%s\"", str); return 1; } ima_hash_algo = i; out: hash_setup_done = 1; return 1; } __setup("ima_hash=", hash_setup); enum hash_algo ima_get_current_hash_algo(void) { return ima_hash_algo; } /* Prevent mmap'ing a file execute that is already mmap'ed write */ static int mmap_violation_check(enum ima_hooks func, struct file *file, char **pathbuf, const char **pathname, char *filename) { struct inode *inode; int rc = 0; if ((func == MMAP_CHECK || func == MMAP_CHECK_REQPROT) && mapping_writably_mapped(file->f_mapping)) { rc = -ETXTBSY; inode = file_inode(file); if (!*pathbuf) /* ima_rdwr_violation possibly pre-fetched */ *pathname = ima_d_path(&file->f_path, pathbuf, filename); integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, *pathname, "mmap_file", "mmapped_writers", rc, 0); } return rc; } /* * ima_rdwr_violation_check * * Only invalidate the PCR for measured files: * - Opening a file for write when already open for read, * results in a time of measure, time of use (ToMToU) error. * - Opening a file for read when already open for write, * could result in a file measurement error. * */ static void ima_rdwr_violation_check(struct file *file, struct ima_iint_cache *iint, int must_measure, char **pathbuf, const char **pathname, char *filename) { struct inode *inode = file_inode(file); fmode_t mode = file->f_mode; bool send_tomtou = false, send_writers = false; if (mode & FMODE_WRITE) { if (atomic_read(&inode->i_readcount) && IS_IMA(inode)) { if (!iint) iint = ima_iint_find(inode); /* IMA_MEASURE is set from reader side */ if (iint && test_and_clear_bit(IMA_MAY_EMIT_TOMTOU, &iint->atomic_flags)) send_tomtou = true; } } else { if (must_measure) set_bit(IMA_MAY_EMIT_TOMTOU, &iint->atomic_flags); /* Limit number of open_writers violations */ if (inode_is_open_for_write(inode) && must_measure) { if (!test_and_set_bit(IMA_EMITTED_OPENWRITERS, &iint->atomic_flags)) send_writers = true; } } if (!send_tomtou && !send_writers) return; *pathname = ima_d_path(&file->f_path, pathbuf, filename); if (send_tomtou) ima_add_violation(file, *pathname, iint, "invalid_pcr", "ToMToU"); if (send_writers) ima_add_violation(file, *pathname, iint, "invalid_pcr", "open_writers"); } static void ima_check_last_writer(struct ima_iint_cache *iint, struct inode *inode, struct file *file) { fmode_t mode = file->f_mode; bool update; if (!(mode & FMODE_WRITE)) return; mutex_lock(&iint->mutex); if (atomic_read(&inode->i_writecount) == 1) { struct kstat stat; clear_bit(IMA_EMITTED_OPENWRITERS, &iint->atomic_flags); update = test_and_clear_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); if ((iint->flags & IMA_NEW_FILE) || vfs_getattr_nosec(&file->f_path, &stat, STATX_CHANGE_COOKIE, AT_STATX_SYNC_AS_STAT) || !(stat.result_mask & STATX_CHANGE_COOKIE) || stat.change_cookie != iint->real_inode.version) { iint->flags &= ~(IMA_DONE_MASK | IMA_NEW_FILE); iint->measured_pcrs = 0; if (update) ima_update_xattr(iint, file); } } mutex_unlock(&iint->mutex); } /** * ima_file_free - called on __fput() * @file: pointer to file structure being freed * * Flag files that changed, based on i_version */ static void ima_file_free(struct file *file) { struct inode *inode = file_inode(file); struct ima_iint_cache *iint; if (!ima_policy_flag || !S_ISREG(inode->i_mode)) return; iint = ima_iint_find(inode); if (!iint) return; ima_check_last_writer(iint, inode, file); } static int process_measurement(struct file *file, const struct cred *cred, struct lsm_prop *prop, char *buf, loff_t size, int mask, enum ima_hooks func) { struct inode *real_inode, *inode = file_inode(file); struct ima_iint_cache *iint = NULL; struct ima_template_desc *template_desc = NULL; struct inode *metadata_inode; char *pathbuf = NULL; char filename[NAME_MAX]; const char *pathname = NULL; int rc = 0, action, must_appraise = 0; int pcr = CONFIG_IMA_MEASURE_PCR_IDX; struct evm_ima_xattr_data *xattr_value = NULL; struct modsig *modsig = NULL; int xattr_len = 0; bool violation_check; enum hash_algo hash_algo; unsigned int allowed_algos = 0; if (!ima_policy_flag || !S_ISREG(inode->i_mode)) return 0; /* Return an IMA_MEASURE, IMA_APPRAISE, IMA_AUDIT action * bitmask based on the appraise/audit/measurement policy. * Included is the appraise submask. */ action = ima_get_action(file_mnt_idmap(file), inode, cred, prop, mask, func, &pcr, &template_desc, NULL, &allowed_algos); violation_check = ((func == FILE_CHECK || func == MMAP_CHECK || func == MMAP_CHECK_REQPROT) && (ima_policy_flag & IMA_MEASURE) && ((action & IMA_MEASURE) || (file->f_mode & FMODE_WRITE))); if (!action && !violation_check) return 0; must_appraise = action & IMA_APPRAISE; /* Is the appraise rule hook specific? */ if (action & IMA_FILE_APPRAISE) func = FILE_CHECK; inode_lock(inode); if (action) { iint = ima_inode_get(inode); if (!iint) rc = -ENOMEM; } if (!rc && violation_check) ima_rdwr_violation_check(file, iint, action & IMA_MEASURE, &pathbuf, &pathname, filename); inode_unlock(inode); if (rc) goto out; if (!action) goto out; mutex_lock(&iint->mutex); if (test_and_clear_bit(IMA_CHANGE_ATTR, &iint->atomic_flags)) /* * Reset appraisal flags (action and non-action rule-specific) * if ima_inode_post_setattr was called. */ iint->flags &= ~(IMA_APPRAISE | IMA_APPRAISED | IMA_APPRAISE_SUBMASK | IMA_APPRAISED_SUBMASK | IMA_NONACTION_RULE_FLAGS); /* * Re-evaulate the file if either the xattr has changed or the * kernel has no way of detecting file change on the filesystem. * (Limited to privileged mounted filesystems.) */ if (test_and_clear_bit(IMA_CHANGE_XATTR, &iint->atomic_flags) || ((inode->i_sb->s_iflags & SB_I_IMA_UNVERIFIABLE_SIGNATURE) && !(inode->i_sb->s_iflags & SB_I_UNTRUSTED_MOUNTER) && !(action & IMA_FAIL_UNVERIFIABLE_SIGS))) { iint->flags &= ~IMA_DONE_MASK; iint->measured_pcrs = 0; } /* * On stacked filesystems, detect and re-evaluate file data and * metadata changes. */ real_inode = d_real_inode(file_dentry(file)); if (real_inode != inode && (action & IMA_DO_MASK) && (iint->flags & IMA_DONE_MASK)) { if (!IS_I_VERSION(real_inode) || integrity_inode_attrs_changed(&iint->real_inode, real_inode)) { iint->flags &= ~IMA_DONE_MASK; iint->measured_pcrs = 0; } /* * Reset the EVM status when metadata changed. */ metadata_inode = d_inode(d_real(file_dentry(file), D_REAL_METADATA)); if (evm_metadata_changed(inode, metadata_inode)) iint->flags &= ~(IMA_APPRAISED | IMA_APPRAISED_SUBMASK); } /* Determine if already appraised/measured based on bitmask * (IMA_MEASURE, IMA_MEASURED, IMA_XXXX_APPRAISE, IMA_XXXX_APPRAISED, * IMA_AUDIT, IMA_AUDITED) */ iint->flags |= action; action &= IMA_DO_MASK; action &= ~((iint->flags & (IMA_DONE_MASK ^ IMA_MEASURED)) >> 1); /* If target pcr is already measured, unset IMA_MEASURE action */ if ((action & IMA_MEASURE) && (iint->measured_pcrs & (0x1 << pcr))) action ^= IMA_MEASURE; /* HASH sets the digital signature and update flags, nothing else */ if ((action & IMA_HASH) && !(test_bit(IMA_DIGSIG, &iint->atomic_flags))) { xattr_len = ima_read_xattr(file_dentry(file), &xattr_value, xattr_len); if ((xattr_value && xattr_len > 2) && (xattr_value->type == EVM_IMA_XATTR_DIGSIG)) set_bit(IMA_DIGSIG, &iint->atomic_flags); iint->flags |= IMA_HASHED; action ^= IMA_HASH; set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); } /* Nothing to do, just return existing appraised status */ if (!action) { if (must_appraise) { rc = mmap_violation_check(func, file, &pathbuf, &pathname, filename); if (!rc) rc = ima_get_cache_status(iint, func); } goto out_locked; } if ((action & IMA_APPRAISE_SUBMASK) || strcmp(template_desc->name, IMA_TEMPLATE_IMA_NAME) != 0) { /* read 'security.ima' */ xattr_len = ima_read_xattr(file_dentry(file), &xattr_value, xattr_len); /* * Read the appended modsig if allowed by the policy, and allow * an additional measurement list entry, if needed, based on the * template format and whether the file was already measured. */ if (iint->flags & IMA_MODSIG_ALLOWED) { rc = ima_read_modsig(func, buf, size, &modsig); if (!rc && ima_template_has_modsig(template_desc) && iint->flags & IMA_MEASURED) action |= IMA_MEASURE; } } hash_algo = ima_get_hash_algo(xattr_value, xattr_len); rc = ima_collect_measurement(iint, file, buf, size, hash_algo, modsig); if (rc != 0 && rc != -EBADF && rc != -EINVAL) goto out_locked; if (!pathbuf) /* ima_rdwr_violation possibly pre-fetched */ pathname = ima_d_path(&file->f_path, &pathbuf, filename); if (action & IMA_MEASURE) ima_store_measurement(iint, file, pathname, xattr_value, xattr_len, modsig, pcr, template_desc); if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) { rc = ima_check_blacklist(iint, modsig, pcr); if (rc != -EPERM) { inode_lock(inode); rc = ima_appraise_measurement(func, iint, file, pathname, xattr_value, xattr_len, modsig); inode_unlock(inode); } if (!rc) rc = mmap_violation_check(func, file, &pathbuf, &pathname, filename); } if (action & IMA_AUDIT) ima_audit_measurement(iint, pathname); if ((file->f_flags & O_DIRECT) && (iint->flags & IMA_PERMIT_DIRECTIO)) rc = 0; /* Ensure the digest was generated using an allowed algorithm */ if (rc == 0 && must_appraise && allowed_algos != 0 && (allowed_algos & (1U << hash_algo)) == 0) { rc = -EACCES; integrity_audit_msg(AUDIT_INTEGRITY_DATA, file_inode(file), pathname, "collect_data", "denied-hash-algorithm", rc, 0); } out_locked: if ((mask & MAY_WRITE) && test_bit(IMA_DIGSIG, &iint->atomic_flags) && !(iint->flags & IMA_NEW_FILE)) rc = -EACCES; mutex_unlock(&iint->mutex); kfree(xattr_value); ima_free_modsig(modsig); out: if (pathbuf) __putname(pathbuf); if (must_appraise) { if (rc && (ima_appraise & IMA_APPRAISE_ENFORCE)) return -EACCES; if (file->f_mode & FMODE_WRITE) set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); } return 0; } /** * ima_file_mmap - based on policy, collect/store measurement. * @file: pointer to the file to be measured (May be NULL) * @reqprot: protection requested by the application * @prot: protection that will be applied by the kernel * @flags: operational flags * * Measure files being mmapped executable based on the ima_must_measure() * policy decision. * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_file_mmap(struct file *file, unsigned long reqprot, unsigned long prot, unsigned long flags) { struct lsm_prop prop; int ret; if (!file) return 0; security_current_getlsmprop_subj(&prop); if (reqprot & PROT_EXEC) { ret = process_measurement(file, current_cred(), &prop, NULL, 0, MAY_EXEC, MMAP_CHECK_REQPROT); if (ret) return ret; } if (prot & PROT_EXEC) return process_measurement(file, current_cred(), &prop, NULL, 0, MAY_EXEC, MMAP_CHECK); return 0; } /** * ima_file_mprotect - based on policy, limit mprotect change * @vma: vm_area_struct protection is set to * @reqprot: protection requested by the application * @prot: protection that will be applied by the kernel * * Files can be mmap'ed read/write and later changed to execute to circumvent * IMA's mmap appraisal policy rules. Due to locking issues (mmap semaphore * would be taken before i_mutex), files can not be measured or appraised at * this point. Eliminate this integrity gap by denying the mprotect * PROT_EXECUTE change, if an mmap appraise policy rule exists. * * On mprotect change success, return 0. On failure, return -EACESS. */ static int ima_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot, unsigned long prot) { struct ima_template_desc *template = NULL; struct file *file; char filename[NAME_MAX]; char *pathbuf = NULL; const char *pathname = NULL; struct inode *inode; struct lsm_prop prop; int result = 0; int action; int pcr; /* Is mprotect making an mmap'ed file executable? */ if (!(ima_policy_flag & IMA_APPRAISE) || !vma->vm_file || !(prot & PROT_EXEC) || (vma->vm_flags & VM_EXEC)) return 0; security_current_getlsmprop_subj(&prop); inode = file_inode(vma->vm_file); action = ima_get_action(file_mnt_idmap(vma->vm_file), inode, current_cred(), &prop, MAY_EXEC, MMAP_CHECK, &pcr, &template, NULL, NULL); action |= ima_get_action(file_mnt_idmap(vma->vm_file), inode, current_cred(), &prop, MAY_EXEC, MMAP_CHECK_REQPROT, &pcr, &template, NULL, NULL); /* Is the mmap'ed file in policy? */ if (!(action & (IMA_MEASURE | IMA_APPRAISE_SUBMASK))) return 0; if (action & IMA_APPRAISE_SUBMASK) result = -EPERM; file = vma->vm_file; pathname = ima_d_path(&file->f_path, &pathbuf, filename); integrity_audit_msg(AUDIT_INTEGRITY_DATA, inode, pathname, "collect_data", "failed-mprotect", result, 0); if (pathbuf) __putname(pathbuf); return result; } /** * ima_bprm_check - based on policy, collect/store measurement. * @bprm: contains the linux_binprm structure * * The OS protects against an executable file, already open for write, * from being executed in deny_write_access() and an executable file, * already open for execute, from being modified in get_write_access(). * So we can be certain that what we verify and measure here is actually * what is being executed. * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_bprm_check(struct linux_binprm *bprm) { int ret; struct lsm_prop prop; security_current_getlsmprop_subj(&prop); ret = process_measurement(bprm->file, current_cred(), &prop, NULL, 0, MAY_EXEC, BPRM_CHECK); if (ret) return ret; security_cred_getlsmprop(bprm->cred, &prop); return process_measurement(bprm->file, bprm->cred, &prop, NULL, 0, MAY_EXEC, CREDS_CHECK); } /** * ima_bprm_creds_for_exec - collect/store/appraise measurement. * @bprm: contains the linux_binprm structure * * Based on the IMA policy and the execveat(2) AT_EXECVE_CHECK flag, measure * and appraise the integrity of a file to be executed by script interpreters. * Unlike any of the other LSM hooks where the kernel enforces file integrity, * enforcing file integrity is left up to the discretion of the script * interpreter (userspace). * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_bprm_creds_for_exec(struct linux_binprm *bprm) { /* * As security_bprm_check() is called multiple times, both * the script and the shebang interpreter are measured, appraised, * and audited. Limit usage of this LSM hook to just measuring, * appraising, and auditing the indirect script execution * (e.g. ./sh example.sh). */ if (!bprm->is_check) return 0; return ima_bprm_check(bprm); } /** * ima_file_check - based on policy, collect/store measurement. * @file: pointer to the file to be measured * @mask: contains MAY_READ, MAY_WRITE, MAY_EXEC or MAY_APPEND * * Measure files based on the ima_must_measure() policy decision. * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_file_check(struct file *file, int mask) { struct lsm_prop prop; security_current_getlsmprop_subj(&prop); return process_measurement(file, current_cred(), &prop, NULL, 0, mask & (MAY_READ | MAY_WRITE | MAY_EXEC | MAY_APPEND), FILE_CHECK); } static int __ima_inode_hash(struct inode *inode, struct file *file, char *buf, size_t buf_size) { struct ima_iint_cache *iint = NULL, tmp_iint; int rc, hash_algo; if (ima_policy_flag) { iint = ima_iint_find(inode); if (iint) mutex_lock(&iint->mutex); } if ((!iint || !(iint->flags & IMA_COLLECTED)) && file) { if (iint) mutex_unlock(&iint->mutex); memset(&tmp_iint, 0, sizeof(tmp_iint)); mutex_init(&tmp_iint.mutex); rc = ima_collect_measurement(&tmp_iint, file, NULL, 0, ima_hash_algo, NULL); if (rc < 0) { /* ima_hash could be allocated in case of failure. */ if (rc != -ENOMEM) kfree(tmp_iint.ima_hash); return -EOPNOTSUPP; } iint = &tmp_iint; mutex_lock(&iint->mutex); } if (!iint) return -EOPNOTSUPP; /* * ima_file_hash can be called when ima_collect_measurement has still * not been called, we might not always have a hash. */ if (!iint->ima_hash || !(iint->flags & IMA_COLLECTED)) { mutex_unlock(&iint->mutex); return -EOPNOTSUPP; } if (buf) { size_t copied_size; copied_size = min_t(size_t, iint->ima_hash->length, buf_size); memcpy(buf, iint->ima_hash->digest, copied_size); } hash_algo = iint->ima_hash->algo; mutex_unlock(&iint->mutex); if (iint == &tmp_iint) kfree(iint->ima_hash); return hash_algo; } /** * ima_file_hash - return a measurement of the file * @file: pointer to the file * @buf: buffer in which to store the hash * @buf_size: length of the buffer * * On success, return the hash algorithm (as defined in the enum hash_algo). * If buf is not NULL, this function also outputs the hash into buf. * If the hash is larger than buf_size, then only buf_size bytes will be copied. * It generally just makes sense to pass a buffer capable of holding the largest * possible hash: IMA_MAX_DIGEST_SIZE. * The file hash returned is based on the entire file, including the appended * signature. * * If the measurement cannot be performed, return -EOPNOTSUPP. * If the parameters are incorrect, return -EINVAL. */ int ima_file_hash(struct file *file, char *buf, size_t buf_size) { if (!file) return -EINVAL; return __ima_inode_hash(file_inode(file), file, buf, buf_size); } EXPORT_SYMBOL_GPL(ima_file_hash); /** * ima_inode_hash - return the stored measurement if the inode has been hashed * and is in the iint cache. * @inode: pointer to the inode * @buf: buffer in which to store the hash * @buf_size: length of the buffer * * On success, return the hash algorithm (as defined in the enum hash_algo). * If buf is not NULL, this function also outputs the hash into buf. * If the hash is larger than buf_size, then only buf_size bytes will be copied. * It generally just makes sense to pass a buffer capable of holding the largest * possible hash: IMA_MAX_DIGEST_SIZE. * The hash returned is based on the entire contents, including the appended * signature. * * If IMA is disabled or if no measurement is available, return -EOPNOTSUPP. * If the parameters are incorrect, return -EINVAL. */ int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size) { if (!inode) return -EINVAL; return __ima_inode_hash(inode, NULL, buf, buf_size); } EXPORT_SYMBOL_GPL(ima_inode_hash); /** * ima_post_create_tmpfile - mark newly created tmpfile as new * @idmap: idmap of the mount the inode was found from * @inode: inode of the newly created tmpfile * * No measuring, appraising or auditing of newly created tmpfiles is needed. * Skip calling process_measurement(), but indicate which newly, created * tmpfiles are in policy. */ static void ima_post_create_tmpfile(struct mnt_idmap *idmap, struct inode *inode) { struct ima_iint_cache *iint; int must_appraise; if (!ima_policy_flag || !S_ISREG(inode->i_mode)) return; must_appraise = ima_must_appraise(idmap, inode, MAY_ACCESS, FILE_CHECK); if (!must_appraise) return; /* Nothing to do if we can't allocate memory */ iint = ima_inode_get(inode); if (!iint) return; /* needed for writing the security xattrs */ set_bit(IMA_UPDATE_XATTR, &iint->atomic_flags); iint->ima_file_status = INTEGRITY_PASS; } /** * ima_post_path_mknod - mark as a new inode * @idmap: idmap of the mount the inode was found from * @dentry: newly created dentry * * Mark files created via the mknodat syscall as new, so that the * file data can be written later. */ static void ima_post_path_mknod(struct mnt_idmap *idmap, struct dentry *dentry) { struct ima_iint_cache *iint; struct inode *inode = dentry->d_inode; int must_appraise; if (!ima_policy_flag || !S_ISREG(inode->i_mode)) return; must_appraise = ima_must_appraise(idmap, inode, MAY_ACCESS, FILE_CHECK); if (!must_appraise) return; /* Nothing to do if we can't allocate memory */ iint = ima_inode_get(inode); if (!iint) return; /* needed for re-opening empty files */ iint->flags |= IMA_NEW_FILE; } /** * ima_read_file - pre-measure/appraise hook decision based on policy * @file: pointer to the file to be measured/appraised/audit * @read_id: caller identifier * @contents: whether a subsequent call will be made to ima_post_read_file() * * Permit reading a file based on policy. The policy rules are written * in terms of the policy identifier. Appraising the integrity of * a file requires a file descriptor. * * For permission return 0, otherwise return -EACCES. */ static int ima_read_file(struct file *file, enum kernel_read_file_id read_id, bool contents) { enum ima_hooks func; struct lsm_prop prop; /* * Do devices using pre-allocated memory run the risk of the * firmware being accessible to the device prior to the completion * of IMA's signature verification any more than when using two * buffers? It may be desirable to include the buffer address * in this API and walk all the dma_map_single() mappings to check. */ /* * There will be a call made to ima_post_read_file() with * a filled buffer, so we don't need to perform an extra * read early here. */ if (contents) return 0; /* Read entire file for all partial reads. */ func = read_idmap[read_id] ?: FILE_CHECK; security_current_getlsmprop_subj(&prop); return process_measurement(file, current_cred(), &prop, NULL, 0, MAY_READ, func); } const int read_idmap[READING_MAX_ID] = { [READING_FIRMWARE] = FIRMWARE_CHECK, [READING_MODULE] = MODULE_CHECK, [READING_KEXEC_IMAGE] = KEXEC_KERNEL_CHECK, [READING_KEXEC_INITRAMFS] = KEXEC_INITRAMFS_CHECK, [READING_POLICY] = POLICY_CHECK }; /** * ima_post_read_file - in memory collect/appraise/audit measurement * @file: pointer to the file to be measured/appraised/audit * @buf: pointer to in memory file contents * @size: size of in memory file contents * @read_id: caller identifier * * Measure/appraise/audit in memory file based on policy. Policy rules * are written in terms of a policy identifier. * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_post_read_file(struct file *file, char *buf, loff_t size, enum kernel_read_file_id read_id) { enum ima_hooks func; struct lsm_prop prop; /* permit signed certs */ if (!file && read_id == READING_X509_CERTIFICATE) return 0; if (!file || !buf || size == 0) { /* should never happen */ if (ima_appraise & IMA_APPRAISE_ENFORCE) return -EACCES; return 0; } func = read_idmap[read_id] ?: FILE_CHECK; security_current_getlsmprop_subj(&prop); return process_measurement(file, current_cred(), &prop, buf, size, MAY_READ, func); } /** * ima_load_data - appraise decision based on policy * @id: kernel load data caller identifier * @contents: whether the full contents will be available in a later * call to ima_post_load_data(). * * Callers of this LSM hook can not measure, appraise, or audit the * data provided by userspace. Enforce policy rules requiring a file * signature (eg. kexec'ed kernel image). * * For permission return 0, otherwise return -EACCES. */ static int ima_load_data(enum kernel_load_data_id id, bool contents) { bool ima_enforce, sig_enforce; ima_enforce = (ima_appraise & IMA_APPRAISE_ENFORCE) == IMA_APPRAISE_ENFORCE; switch (id) { case LOADING_KEXEC_IMAGE: if (IS_ENABLED(CONFIG_KEXEC_SIG) && arch_ima_get_secureboot()) { pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n"); return -EACCES; } if (ima_enforce && (ima_appraise & IMA_APPRAISE_KEXEC)) { pr_err("impossible to appraise a kernel image without a file descriptor; try using kexec_file_load syscall.\n"); return -EACCES; /* INTEGRITY_UNKNOWN */ } break; case LOADING_FIRMWARE: if (ima_enforce && (ima_appraise & IMA_APPRAISE_FIRMWARE) && !contents) { pr_err("Prevent firmware sysfs fallback loading.\n"); return -EACCES; /* INTEGRITY_UNKNOWN */ } break; case LOADING_MODULE: sig_enforce = is_module_sig_enforced(); if (ima_enforce && (!sig_enforce && (ima_appraise & IMA_APPRAISE_MODULES))) { pr_err("impossible to appraise a module without a file descriptor. sig_enforce kernel parameter might help\n"); return -EACCES; /* INTEGRITY_UNKNOWN */ } break; default: break; } return 0; } /** * ima_post_load_data - appraise decision based on policy * @buf: pointer to in memory file contents * @size: size of in memory file contents * @load_id: kernel load data caller identifier * @description: @load_id-specific description of contents * * Measure/appraise/audit in memory buffer based on policy. Policy rules * are written in terms of a policy identifier. * * On success return 0. On integrity appraisal error, assuming the file * is in policy and IMA-appraisal is in enforcing mode, return -EACCES. */ static int ima_post_load_data(char *buf, loff_t size, enum kernel_load_data_id load_id, char *description) { if (load_id == LOADING_FIRMWARE) { if ((ima_appraise & IMA_APPRAISE_FIRMWARE) && (ima_appraise & IMA_APPRAISE_ENFORCE)) { pr_err("Prevent firmware loading_store.\n"); return -EACCES; /* INTEGRITY_UNKNOWN */ } return 0; } /* * Measure the init_module syscall buffer containing the ELF image. */ if (load_id == LOADING_MODULE) ima_measure_critical_data("modules", "init_module", buf, size, true, NULL, 0); return 0; } /** * process_buffer_measurement - Measure the buffer or the buffer data hash * @idmap: idmap of the mount the inode was found from * @inode: inode associated with the object being measured (NULL for KEY_CHECK) * @buf: pointer to the buffer that needs to be added to the log. * @size: size of buffer(in bytes). * @eventname: event name to be used for the buffer entry. * @func: IMA hook * @pcr: pcr to extend the measurement * @func_data: func specific data, may be NULL * @buf_hash: measure buffer data hash * @digest: buffer digest will be written to * @digest_len: buffer length * * Based on policy, either the buffer data or buffer data hash is measured * * Return: 0 if the buffer has been successfully measured, 1 if the digest * has been written to the passed location but not added to a measurement entry, * a negative value otherwise. */ int process_buffer_measurement(struct mnt_idmap *idmap, struct inode *inode, const void *buf, int size, const char *eventname, enum ima_hooks func, int pcr, const char *func_data, bool buf_hash, u8 *digest, size_t digest_len) { int ret = 0; const char *audit_cause = "ENOMEM"; struct ima_template_entry *entry = NULL; struct ima_iint_cache iint = {}; struct ima_event_data event_data = {.iint = &iint, .filename = eventname, .buf = buf, .buf_len = size}; struct ima_template_desc *template; struct ima_max_digest_data hash; struct ima_digest_data *hash_hdr = container_of(&hash.hdr, struct ima_digest_data, hdr); char digest_hash[IMA_MAX_DIGEST_SIZE]; int digest_hash_len = hash_digest_size[ima_hash_algo]; int violation = 0; int action = 0; struct lsm_prop prop; if (digest && digest_len < digest_hash_len) return -EINVAL; if (!ima_policy_flag && !digest) return -ENOENT; template = ima_template_desc_buf(); if (!template) { ret = -EINVAL; audit_cause = "ima_template_desc_buf"; goto out; } /* * Both LSM hooks and auxiliary based buffer measurements are * based on policy. To avoid code duplication, differentiate * between the LSM hooks and auxiliary buffer measurements, * retrieving the policy rule information only for the LSM hook * buffer measurements. */ if (func) { security_current_getlsmprop_subj(&prop); action = ima_get_action(idmap, inode, current_cred(), &prop, 0, func, &pcr, &template, func_data, NULL); if (!(action & IMA_MEASURE) && !digest) return -ENOENT; } if (!pcr) pcr = CONFIG_IMA_MEASURE_PCR_IDX; iint.ima_hash = hash_hdr; iint.ima_hash->algo = ima_hash_algo; iint.ima_hash->length = hash_digest_size[ima_hash_algo]; ret = ima_calc_buffer_hash(buf, size, iint.ima_hash); if (ret < 0) { audit_cause = "hashing_error"; goto out; } if (buf_hash) { memcpy(digest_hash, hash_hdr->digest, digest_hash_len); ret = ima_calc_buffer_hash(digest_hash, digest_hash_len, iint.ima_hash); if (ret < 0) { audit_cause = "hashing_error"; goto out; } event_data.buf = digest_hash; event_data.buf_len = digest_hash_len; } if (digest) memcpy(digest, iint.ima_hash->digest, digest_hash_len); if (!ima_policy_flag || (func && !(action & IMA_MEASURE))) return 1; ret = ima_alloc_init_template(&event_data, &entry, template); if (ret < 0) { audit_cause = "alloc_entry"; goto out; } ret = ima_store_template(entry, violation, NULL, event_data.buf, pcr); if (ret < 0) { audit_cause = "store_entry"; ima_free_template_entry(entry); } out: if (ret < 0) integrity_audit_message(AUDIT_INTEGRITY_PCR, NULL, eventname, func_measure_str(func), audit_cause, ret, 0, ret); return ret; } /** * ima_kexec_cmdline - measure kexec cmdline boot args * @kernel_fd: file descriptor of the kexec kernel being loaded * @buf: pointer to buffer * @size: size of buffer * * Buffers can only be measured, not appraised. */ void ima_kexec_cmdline(int kernel_fd, const void *buf, int size) { if (!buf || !size) return; CLASS(fd, f)(kernel_fd); if (fd_empty(f)) return; process_buffer_measurement(file_mnt_idmap(fd_file(f)), file_inode(fd_file(f)), buf, size, "kexec-cmdline", KEXEC_CMDLINE, 0, NULL, false, NULL, 0); } /** * ima_measure_critical_data - measure kernel integrity critical data * @event_label: unique event label for grouping and limiting critical data * @event_name: event name for the record in the IMA measurement list * @buf: pointer to buffer data * @buf_len: length of buffer data (in bytes) * @hash: measure buffer data hash * @digest: buffer digest will be written to * @digest_len: buffer length * * Measure data critical to the integrity of the kernel into the IMA log * and extend the pcr. Examples of critical data could be various data * structures, policies, and states stored in kernel memory that can * impact the integrity of the system. * * Return: 0 if the buffer has been successfully measured, 1 if the digest * has been written to the passed location but not added to a measurement entry, * a negative value otherwise. */ int ima_measure_critical_data(const char *event_label, const char *event_name, const void *buf, size_t buf_len, bool hash, u8 *digest, size_t digest_len) { if (!event_name || !event_label || !buf || !buf_len) return -ENOPARAM; return process_buffer_measurement(&nop_mnt_idmap, NULL, buf, buf_len, event_name, CRITICAL_DATA, 0, event_label, hash, digest, digest_len); } EXPORT_SYMBOL_GPL(ima_measure_critical_data); #ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS /** * ima_kernel_module_request - Prevent crypto-pkcs1(rsa,*) requests * @kmod_name: kernel module name * * Avoid a verification loop where verifying the signature of the modprobe * binary requires executing modprobe itself. Since the modprobe iint->mutex * is already held when the signature verification is performed, a deadlock * occurs as soon as modprobe is executed within the critical region, since * the same lock cannot be taken again. * * This happens when public_key_verify_signature(), in case of RSA algorithm, * use alg_name to store internal information in order to construct an * algorithm on the fly, but crypto_larval_lookup() will try to use alg_name * in order to load a kernel module with same name. * * Since we don't have any real "crypto-pkcs1(rsa,*)" kernel modules, * we are safe to fail such module request from crypto_larval_lookup(), and * avoid the verification loop. * * Return: Zero if it is safe to load the kernel module, -EINVAL otherwise. */ static int ima_kernel_module_request(char *kmod_name) { if (strncmp(kmod_name, "crypto-pkcs1(rsa,", 17) == 0) return -EINVAL; return 0; } #endif /* CONFIG_INTEGRITY_ASYMMETRIC_KEYS */ static int __init init_ima(void) { int error; /*Note that turning IMA off is intentionally limited to kdump kernel.*/ if (ima_disabled && is_kdump_kernel()) { pr_info("IMA functionality is disabled"); return 0; } ima_appraise_parse_cmdline(); ima_init_template_list(); hash_setup(CONFIG_IMA_DEFAULT_HASH); error = ima_init(); if (error && strcmp(hash_algo_name[ima_hash_algo], CONFIG_IMA_DEFAULT_HASH) != 0) { pr_info("Allocating %s failed, going to use default hash algorithm %s\n", hash_algo_name[ima_hash_algo], CONFIG_IMA_DEFAULT_HASH); hash_setup_done = 0; hash_setup(CONFIG_IMA_DEFAULT_HASH); error = ima_init(); } if (error) return error; error = register_blocking_lsm_notifier(&ima_lsm_policy_notifier); if (error) pr_warn("Couldn't register LSM notifier, error %d\n", error); if (!error) ima_update_policy_flags(); return error; } static struct security_hook_list ima_hooks[] __ro_after_init = { LSM_HOOK_INIT(bprm_check_security, ima_bprm_check), LSM_HOOK_INIT(bprm_creds_for_exec, ima_bprm_creds_for_exec), LSM_HOOK_INIT(file_post_open, ima_file_check), LSM_HOOK_INIT(inode_post_create_tmpfile, ima_post_create_tmpfile), LSM_HOOK_INIT(file_release, ima_file_free), LSM_HOOK_INIT(mmap_file, ima_file_mmap), LSM_HOOK_INIT(file_mprotect, ima_file_mprotect), LSM_HOOK_INIT(kernel_load_data, ima_load_data), LSM_HOOK_INIT(kernel_post_load_data, ima_post_load_data), LSM_HOOK_INIT(kernel_read_file, ima_read_file), LSM_HOOK_INIT(kernel_post_read_file, ima_post_read_file), LSM_HOOK_INIT(path_post_mknod, ima_post_path_mknod), #ifdef CONFIG_IMA_MEASURE_ASYMMETRIC_KEYS LSM_HOOK_INIT(key_post_create_or_update, ima_post_key_create_or_update), #endif #ifdef CONFIG_INTEGRITY_ASYMMETRIC_KEYS LSM_HOOK_INIT(kernel_module_request, ima_kernel_module_request), #endif LSM_HOOK_INIT(inode_free_security_rcu, ima_inode_free_rcu), }; static const struct lsm_id ima_lsmid = { .name = "ima", .id = LSM_ID_IMA, }; static int __init init_ima_lsm(void) { ima_iintcache_init(); security_add_hooks(ima_hooks, ARRAY_SIZE(ima_hooks), &ima_lsmid); init_ima_appraise_lsm(&ima_lsmid); return 0; } struct lsm_blob_sizes ima_blob_sizes __ro_after_init = { .lbs_inode = sizeof(struct ima_iint_cache *), }; DEFINE_LSM(ima) = { .name = "ima", .init = init_ima_lsm, .order = LSM_ORDER_LAST, .blobs = &ima_blob_sizes, }; late_initcall(init_ima); /* Start IMA after the TPM is available */
315 367 406 367 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 /* SPDX-License-Identifier: GPL-2.0 * * Network memory * * Author: Mina Almasry <almasrymina@google.com> */ #ifndef _NET_NETMEM_H #define _NET_NETMEM_H #include <linux/dma-mapping.h> #include <linux/mm.h> #include <net/net_debug.h> /* These fields in struct page are used by the page_pool and net stack: * * struct { * unsigned long pp_magic; * struct page_pool *pp; * unsigned long _pp_mapping_pad; * unsigned long dma_addr; * atomic_long_t pp_ref_count; * }; * * We mirror the page_pool fields here so the page_pool can access these * fields without worrying whether the underlying fields belong to a * page or netmem_desc. * * CAUTION: Do not update the fields in netmem_desc without also * updating the anonymous aliasing union in struct net_iov. */ struct netmem_desc { unsigned long _flags; unsigned long pp_magic; struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; atomic_long_t pp_ref_count; }; #define NETMEM_DESC_ASSERT_OFFSET(pg, desc) \ static_assert(offsetof(struct page, pg) == \ offsetof(struct netmem_desc, desc)) NETMEM_DESC_ASSERT_OFFSET(flags, _flags); NETMEM_DESC_ASSERT_OFFSET(pp_magic, pp_magic); NETMEM_DESC_ASSERT_OFFSET(pp, pp); NETMEM_DESC_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad); NETMEM_DESC_ASSERT_OFFSET(dma_addr, dma_addr); NETMEM_DESC_ASSERT_OFFSET(pp_ref_count, pp_ref_count); #undef NETMEM_DESC_ASSERT_OFFSET /* * Since struct netmem_desc uses the space in struct page, the size * should be checked, until struct netmem_desc has its own instance from * slab, to avoid conflicting with other members within struct page. */ static_assert(sizeof(struct netmem_desc) <= offsetof(struct page, _refcount)); /* net_iov */ DECLARE_STATIC_KEY_FALSE(page_pool_mem_providers); /* We overload the LSB of the struct page pointer to indicate whether it's * a page or net_iov. */ #define NET_IOV 0x01UL enum net_iov_type { NET_IOV_DMABUF, NET_IOV_IOURING, /* Force size to unsigned long to make the NET_IOV_ASSERTS below pass. */ NET_IOV_MAX = ULONG_MAX }; /* A memory descriptor representing abstract networking I/O vectors, * generally for non-pages memory that doesn't have its corresponding * struct page and needs to be explicitly allocated through slab. * * net_iovs are allocated and used by networking code, and the size of * the chunk is PAGE_SIZE. * * This memory can be any form of non-struct paged memory. Examples * include imported dmabuf memory and imported io_uring memory. See * net_iov_type for all the supported types. * * @pp_magic: pp field, similar to the one in struct page/struct * netmem_desc. * @pp: the pp this net_iov belongs to, if any. * @dma_addr: the dma addrs of the net_iov. Needed for the network * card to send/receive this net_iov. * @pp_ref_count: the pp ref count of this net_iov, exactly the same * usage as struct page/struct netmem_desc. * @owner: the net_iov_area this net_iov belongs to, if any. * @type: the type of the memory. Different types of net_iovs are * supported. */ struct net_iov { union { struct netmem_desc desc; /* XXX: The following part should be removed once all * the references to them are converted so as to be * accessed via netmem_desc e.g. niov->desc.pp instead * of niov->pp. */ struct { unsigned long _flags; unsigned long pp_magic; struct page_pool *pp; unsigned long _pp_mapping_pad; unsigned long dma_addr; atomic_long_t pp_ref_count; }; }; struct net_iov_area *owner; enum net_iov_type type; }; struct net_iov_area { /* Array of net_iovs for this area. */ struct net_iov *niovs; size_t num_niovs; /* Offset into the dma-buf where this chunk starts. */ unsigned long base_virtual; }; /* net_iov is union'ed with struct netmem_desc mirroring struct page, so * the page_pool can access these fields without worrying whether the * underlying fields are accessed via netmem_desc or directly via * net_iov, until all the references to them are converted so as to be * accessed via netmem_desc e.g. niov->desc.pp instead of niov->pp. * * The non-net stack fields of struct page are private to the mm stack * and must never be mirrored to net_iov. */ #define NET_IOV_ASSERT_OFFSET(desc, iov) \ static_assert(offsetof(struct netmem_desc, desc) == \ offsetof(struct net_iov, iov)) NET_IOV_ASSERT_OFFSET(_flags, _flags); NET_IOV_ASSERT_OFFSET(pp_magic, pp_magic); NET_IOV_ASSERT_OFFSET(pp, pp); NET_IOV_ASSERT_OFFSET(_pp_mapping_pad, _pp_mapping_pad); NET_IOV_ASSERT_OFFSET(dma_addr, dma_addr); NET_IOV_ASSERT_OFFSET(pp_ref_count, pp_ref_count); #undef NET_IOV_ASSERT_OFFSET static inline struct net_iov_area *net_iov_owner(const struct net_iov *niov) { return niov->owner; } static inline unsigned int net_iov_idx(const struct net_iov *niov) { return niov - net_iov_owner(niov)->niovs; } /* netmem */ /** * typedef netmem_ref - a nonexistent type marking a reference to generic * network memory. * * A netmem_ref can be a struct page* or a struct net_iov* underneath. * * Use the supplied helpers to obtain the underlying memory pointer and fields. */ typedef unsigned long __bitwise netmem_ref; static inline bool netmem_is_net_iov(const netmem_ref netmem) { return (__force unsigned long)netmem & NET_IOV; } /** * __netmem_to_page - unsafely get pointer to the &page backing @netmem * @netmem: netmem reference to convert * * Unsafe version of netmem_to_page(). When @netmem is always page-backed, * e.g. when it's a header buffer, performs faster and generates smaller * object code (no check for the LSB, no WARN). When @netmem points to IOV, * provokes undefined behaviour. * * Return: pointer to the &page (garbage if @netmem is not page-backed). */ static inline struct page *__netmem_to_page(netmem_ref netmem) { return (__force struct page *)netmem; } static inline struct page *netmem_to_page(netmem_ref netmem) { if (WARN_ON_ONCE(netmem_is_net_iov(netmem))) return NULL; return __netmem_to_page(netmem); } static inline struct net_iov *netmem_to_net_iov(netmem_ref netmem) { if (netmem_is_net_iov(netmem)) return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); DEBUG_NET_WARN_ON_ONCE(true); return NULL; } static inline netmem_ref net_iov_to_netmem(struct net_iov *niov) { return (__force netmem_ref)((unsigned long)niov | NET_IOV); } #define page_to_netmem(p) (_Generic((p), \ const struct page * : (__force const netmem_ref)(p), \ struct page * : (__force netmem_ref)(p))) /** * virt_to_netmem - convert virtual memory pointer to a netmem reference * @data: host memory pointer to convert * * Return: netmem reference to the &page backing this virtual address. */ static inline netmem_ref virt_to_netmem(const void *data) { return page_to_netmem(virt_to_page(data)); } static inline int netmem_ref_count(netmem_ref netmem) { /* The non-pp refcount of net_iov is always 1. On net_iov, we only * support pp refcounting which uses the pp_ref_count field. */ if (netmem_is_net_iov(netmem)) return 1; return page_ref_count(netmem_to_page(netmem)); } static inline unsigned long netmem_pfn_trace(netmem_ref netmem) { if (netmem_is_net_iov(netmem)) return 0; return page_to_pfn(netmem_to_page(netmem)); } /** * __netmem_to_nmdesc - unsafely get pointer to the &netmem_desc backing * @netmem * @netmem: netmem reference to convert * * Unsafe version that can be used only when @netmem is always backed by * system memory, performs faster and generates smaller object code (no * check for the LSB, no WARN). When @netmem points to IOV, provokes * undefined behaviour. * * Return: pointer to the &netmem_desc (garbage if @netmem is not backed * by system memory). */ static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem) { return (__force struct netmem_desc *)netmem; } /* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to * common fields. * @netmem: netmem reference to extract as net_iov. * * All the sub types of netmem_ref (page, net_iov) have the same pp, pp_magic, * dma_addr, and pp_ref_count fields at the same offsets. Thus, we can access * these fields without a type check to make sure that the underlying mem is * net_iov or page. * * The resulting value of this function can only be used to access the fields * that are NET_IOV_ASSERT_OFFSET'd. Accessing any other fields will result in * undefined behavior. * * Return: the netmem_ref cast to net_iov* regardless of its underlying type. */ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem) { return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV); } /* XXX: How to extract netmem_desc from page must be changed, once * netmem_desc no longer overlays on page and will be allocated through * slab. */ #define __pp_page_to_nmdesc(p) (_Generic((p), \ const struct page * : (const struct netmem_desc *)(p), \ struct page * : (struct netmem_desc *)(p))) /* CAUTION: Check if the page is a pp page before calling this helper or * know it's a pp page. */ #define pp_page_to_nmdesc(p) \ ({ \ DEBUG_NET_WARN_ON_ONCE(!page_pool_page_is_pp(p)); \ __pp_page_to_nmdesc(p); \ }) /** * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem * @netmem: netmem reference to get the pointer from * * Unsafe version of netmem_get_pp(). When @netmem is always page-backed, * e.g. when it's a header buffer, performs faster and generates smaller * object code (avoids clearing the LSB). When @netmem points to IOV, * provokes invalid memory access. * * Return: pointer to the &page_pool (garbage if @netmem is not page-backed). */ static inline struct page_pool *__netmem_get_pp(netmem_ref netmem) { return __netmem_to_nmdesc(netmem)->pp; } static inline struct page_pool *netmem_get_pp(netmem_ref netmem) { return __netmem_clear_lsb(netmem)->pp; } static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem) { return &__netmem_clear_lsb(netmem)->pp_ref_count; } static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid) { /* NUMA node preference only makes sense if we're allocating * system memory. Memory providers (which give us net_iovs) * choose for us. */ if (netmem_is_net_iov(netmem)) return true; return page_to_nid(netmem_to_page(netmem)) == pref_nid; } static inline netmem_ref netmem_compound_head(netmem_ref netmem) { /* niov are never compounded */ if (netmem_is_net_iov(netmem)) return netmem; return page_to_netmem(compound_head(netmem_to_page(netmem))); } /** * __netmem_address - unsafely get pointer to the memory backing @netmem * @netmem: netmem reference to get the pointer for * * Unsafe version of netmem_address(). When @netmem is always page-backed, * e.g. when it's a header buffer, performs faster and generates smaller * object code (no check for the LSB). When @netmem points to IOV, provokes * undefined behaviour. * * Return: pointer to the memory (garbage if @netmem is not page-backed). */ static inline void *__netmem_address(netmem_ref netmem) { return page_address(__netmem_to_page(netmem)); } static inline void *netmem_address(netmem_ref netmem) { if (netmem_is_net_iov(netmem)) return NULL; return __netmem_address(netmem); } /** * netmem_is_pfmemalloc - check if @netmem was allocated under memory pressure * @netmem: netmem reference to check * * Return: true if @netmem is page-backed and the page was allocated under * memory pressure, false otherwise. */ static inline bool netmem_is_pfmemalloc(netmem_ref netmem) { if (netmem_is_net_iov(netmem)) return false; return page_is_pfmemalloc(netmem_to_page(netmem)); } static inline unsigned long netmem_get_dma_addr(netmem_ref netmem) { return __netmem_clear_lsb(netmem)->dma_addr; } void get_netmem(netmem_ref netmem); void put_netmem(netmem_ref netmem); #define netmem_dma_unmap_addr_set(NETMEM, PTR, ADDR_NAME, VAL) \ do { \ if (!netmem_is_net_iov(NETMEM)) \ dma_unmap_addr_set(PTR, ADDR_NAME, VAL); \ else \ dma_unmap_addr_set(PTR, ADDR_NAME, 0); \ } while (0) static inline void netmem_dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { if (!addr) return; dma_unmap_page_attrs(dev, addr, size, dir, attrs); } #endif /* _NET_NETMEM_H */
8 11 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 /* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM mctp #if !defined(_TRACE_MCTP_H) || defined(TRACE_HEADER_MULTI_READ) #define _TRACE_MCTP_H #include <linux/tracepoint.h> #ifndef __TRACE_MCTP_ENUMS #define __TRACE_MCTP_ENUMS enum { MCTP_TRACE_KEY_TIMEOUT, MCTP_TRACE_KEY_REPLIED, MCTP_TRACE_KEY_INVALIDATED, MCTP_TRACE_KEY_CLOSED, MCTP_TRACE_KEY_DROPPED, }; #endif /* __TRACE_MCTP_ENUMS */ TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_TIMEOUT); TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_REPLIED); TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_INVALIDATED); TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_CLOSED); TRACE_DEFINE_ENUM(MCTP_TRACE_KEY_DROPPED); TRACE_EVENT(mctp_key_acquire, TP_PROTO(const struct mctp_sk_key *key), TP_ARGS(key), TP_STRUCT__entry( __field(__u8, paddr) __field(__u8, laddr) __field(__u8, tag) ), TP_fast_assign( __entry->paddr = key->peer_addr; __entry->laddr = key->local_addr; __entry->tag = key->tag; ), TP_printk("local %d, peer %d, tag %1x", __entry->laddr, __entry->paddr, __entry->tag ) ); TRACE_EVENT(mctp_key_release, TP_PROTO(const struct mctp_sk_key *key, int reason), TP_ARGS(key, reason), TP_STRUCT__entry( __field(__u8, paddr) __field(__u8, laddr) __field(__u8, tag) __field(int, reason) ), TP_fast_assign( __entry->paddr = key->peer_addr; __entry->laddr = key->local_addr; __entry->tag = key->tag; __entry->reason = reason; ), TP_printk("local %d, peer %d, tag %1x %s", __entry->laddr, __entry->paddr, __entry->tag, __print_symbolic(__entry->reason, { MCTP_TRACE_KEY_TIMEOUT, "timeout" }, { MCTP_TRACE_KEY_REPLIED, "replied" }, { MCTP_TRACE_KEY_INVALIDATED, "invalidated" }, { MCTP_TRACE_KEY_CLOSED, "closed" }, { MCTP_TRACE_KEY_DROPPED, "dropped" }) ) ); #endif #include <trace/define_trace.h>
1 1 1 1 1 1 1 1 1 1 1 1 1 1 43 43 43 43 5 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) ST-Ericsson AB 2010 * Authors: Sjur Brendeland * Daniel Martensson */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s(): " fmt, __func__ #include <linux/fs.h> #include <linux/init.h> #include <linux/module.h> #include <linux/netdevice.h> #include <linux/if_ether.h> #include <linux/ip.h> #include <linux/sched.h> #include <linux/sockios.h> #include <linux/caif/if_caif.h> #include <net/rtnetlink.h> #include <net/caif/caif_layer.h> #include <net/caif/cfpkt.h> #include <net/caif/caif_dev.h> /* GPRS PDP connection has MTU to 1500 */ #define GPRS_PDP_MTU 1500 /* 5 sec. connect timeout */ #define CONNECT_TIMEOUT (5 * HZ) #define CAIF_NET_DEFAULT_QUEUE_LEN 500 #define UNDEF_CONNID 0xffffffff /*This list is protected by the rtnl lock. */ static LIST_HEAD(chnl_net_list); MODULE_DESCRIPTION("ST-Ericsson CAIF modem protocol GPRS network device"); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("caif"); enum caif_states { CAIF_CONNECTED = 1, CAIF_CONNECTING, CAIF_DISCONNECTED, CAIF_SHUTDOWN }; struct chnl_net { struct cflayer chnl; struct caif_connect_request conn_req; struct list_head list_field; struct net_device *netdev; wait_queue_head_t netmgmt_wq; /* Flow status to remember and control the transmission. */ bool flowenabled; enum caif_states state; }; static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) { struct sk_buff *skb; struct chnl_net *priv; int pktlen; const u8 *ip_version; u8 buf; priv = container_of(layr, struct chnl_net, chnl); skb = (struct sk_buff *) cfpkt_tonative(pkt); /* Get length of CAIF packet. */ pktlen = skb->len; /* Pass some minimum information and * send the packet to the net stack. */ skb->dev = priv->netdev; /* check the version of IP */ ip_version = skb_header_pointer(skb, 0, 1, &buf); if (!ip_version) { kfree_skb(skb); return -EINVAL; } switch (*ip_version >> 4) { case 4: skb->protocol = htons(ETH_P_IP); break; case 6: skb->protocol = htons(ETH_P_IPV6); break; default: kfree_skb(skb); priv->netdev->stats.rx_errors++; return -EINVAL; } /* If we change the header in loop mode, the checksum is corrupted. */ if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP) skb->ip_summed = CHECKSUM_UNNECESSARY; else skb->ip_summed = CHECKSUM_NONE; netif_rx(skb); /* Update statistics. */ priv->netdev->stats.rx_packets++; priv->netdev->stats.rx_bytes += pktlen; return 0; } static int delete_device(struct chnl_net *dev) { ASSERT_RTNL(); if (dev->netdev) unregister_netdevice(dev->netdev); return 0; } static void close_work(struct work_struct *work) { struct chnl_net *dev = NULL; struct list_head *list_node; struct list_head *_tmp; rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); if (dev->state == CAIF_SHUTDOWN) dev_close(dev->netdev); } rtnl_unlock(); } static DECLARE_WORK(close_worker, close_work); static void chnl_hold(struct cflayer *lyr) { struct chnl_net *priv = container_of(lyr, struct chnl_net, chnl); dev_hold(priv->netdev); } static void chnl_put(struct cflayer *lyr) { struct chnl_net *priv = container_of(lyr, struct chnl_net, chnl); dev_put(priv->netdev); } static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow, int phyid) { struct chnl_net *priv = container_of(layr, struct chnl_net, chnl); pr_debug("NET flowctrl func called flow: %s\n", flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" : flow == CAIF_CTRLCMD_INIT_RSP ? "INIT" : flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" : flow == CAIF_CTRLCMD_DEINIT_RSP ? "CLOSE/DEINIT" : flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "OPEN_FAIL" : flow == CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? "REMOTE_SHUTDOWN" : "UNKNOWN CTRL COMMAND"); switch (flow) { case CAIF_CTRLCMD_FLOW_OFF_IND: priv->flowenabled = false; netif_stop_queue(priv->netdev); break; case CAIF_CTRLCMD_DEINIT_RSP: priv->state = CAIF_DISCONNECTED; break; case CAIF_CTRLCMD_INIT_FAIL_RSP: priv->state = CAIF_DISCONNECTED; wake_up_interruptible(&priv->netmgmt_wq); break; case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND: priv->state = CAIF_SHUTDOWN; netif_tx_disable(priv->netdev); schedule_work(&close_worker); break; case CAIF_CTRLCMD_FLOW_ON_IND: priv->flowenabled = true; netif_wake_queue(priv->netdev); break; case CAIF_CTRLCMD_INIT_RSP: caif_client_register_refcnt(&priv->chnl, chnl_hold, chnl_put); priv->state = CAIF_CONNECTED; priv->flowenabled = true; netif_wake_queue(priv->netdev); wake_up_interruptible(&priv->netmgmt_wq); break; default: break; } } static netdev_tx_t chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct chnl_net *priv; struct cfpkt *pkt = NULL; int len; int result = -1; /* Get our private data. */ priv = netdev_priv(dev); if (skb->len > priv->netdev->mtu) { pr_warn("Size of skb exceeded MTU\n"); kfree_skb(skb); dev->stats.tx_errors++; return NETDEV_TX_OK; } if (!priv->flowenabled) { pr_debug("dropping packets flow off\n"); kfree_skb(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; } if (priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP) swap(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr); /* Store original SKB length. */ len = skb->len; pkt = cfpkt_fromnative(CAIF_DIR_OUT, (void *) skb); /* Send the packet down the stack. */ result = priv->chnl.dn->transmit(priv->chnl.dn, pkt); if (result) { dev->stats.tx_dropped++; return NETDEV_TX_OK; } /* Update statistics. */ dev->stats.tx_packets++; dev->stats.tx_bytes += len; return NETDEV_TX_OK; } static int chnl_net_open(struct net_device *dev) { struct chnl_net *priv = NULL; int result = -1; int llifindex, headroom, tailroom, mtu; struct net_device *lldev; ASSERT_RTNL(); priv = netdev_priv(dev); if (!priv) { pr_debug("chnl_net_open: no priv\n"); return -ENODEV; } if (priv->state != CAIF_CONNECTING) { priv->state = CAIF_CONNECTING; result = caif_connect_client(dev_net(dev), &priv->conn_req, &priv->chnl, &llifindex, &headroom, &tailroom); if (result != 0) { pr_debug("err: " "Unable to register and open device," " Err:%d\n", result); goto error; } lldev = __dev_get_by_index(dev_net(dev), llifindex); if (lldev == NULL) { pr_debug("no interface?\n"); result = -ENODEV; goto error; } dev->needed_tailroom = tailroom + lldev->needed_tailroom; dev->hard_header_len = headroom + lldev->hard_header_len + lldev->needed_tailroom; /* * MTU, head-room etc is not know before we have a * CAIF link layer device available. MTU calculation may * override initial RTNL configuration. * MTU is minimum of current mtu, link layer mtu pluss * CAIF head and tail, and PDP GPRS contexts max MTU. */ mtu = min_t(int, dev->mtu, lldev->mtu - (headroom + tailroom)); mtu = min_t(int, GPRS_PDP_MTU, mtu); dev_set_mtu(dev, mtu); if (mtu < 100) { pr_warn("CAIF Interface MTU too small (%d)\n", mtu); result = -ENODEV; goto error; } } rtnl_unlock(); /* Release RTNL lock during connect wait */ result = wait_event_interruptible_timeout(priv->netmgmt_wq, priv->state != CAIF_CONNECTING, CONNECT_TIMEOUT); rtnl_lock(); if (result == -ERESTARTSYS) { pr_debug("wait_event_interruptible woken by a signal\n"); result = -ERESTARTSYS; goto error; } if (result == 0) { pr_debug("connect timeout\n"); result = -ETIMEDOUT; goto error; } if (priv->state != CAIF_CONNECTED) { pr_debug("connect failed\n"); result = -ECONNREFUSED; goto error; } pr_debug("CAIF Netdevice connected\n"); return 0; error: caif_disconnect_client(dev_net(dev), &priv->chnl); priv->state = CAIF_DISCONNECTED; pr_debug("state disconnected\n"); return result; } static int chnl_net_stop(struct net_device *dev) { struct chnl_net *priv; ASSERT_RTNL(); priv = netdev_priv(dev); priv->state = CAIF_DISCONNECTED; caif_disconnect_client(dev_net(dev), &priv->chnl); return 0; } static int chnl_net_init(struct net_device *dev) { struct chnl_net *priv; ASSERT_RTNL(); priv = netdev_priv(dev); INIT_LIST_HEAD(&priv->list_field); return 0; } static void chnl_net_uninit(struct net_device *dev) { struct chnl_net *priv; ASSERT_RTNL(); priv = netdev_priv(dev); list_del_init(&priv->list_field); } static const struct net_device_ops netdev_ops = { .ndo_open = chnl_net_open, .ndo_stop = chnl_net_stop, .ndo_init = chnl_net_init, .ndo_uninit = chnl_net_uninit, .ndo_start_xmit = chnl_net_start_xmit, }; static void chnl_net_destructor(struct net_device *dev) { struct chnl_net *priv = netdev_priv(dev); caif_free_client(&priv->chnl); } static void ipcaif_net_setup(struct net_device *dev) { struct chnl_net *priv; dev->netdev_ops = &netdev_ops; dev->needs_free_netdev = true; dev->priv_destructor = chnl_net_destructor; dev->flags |= IFF_NOARP; dev->flags |= IFF_POINTOPOINT; dev->mtu = GPRS_PDP_MTU; dev->tx_queue_len = CAIF_NET_DEFAULT_QUEUE_LEN; priv = netdev_priv(dev); priv->chnl.receive = chnl_recv_cb; priv->chnl.ctrlcmd = chnl_flowctrl_cb; priv->netdev = dev; priv->conn_req.protocol = CAIFPROTO_DATAGRAM; priv->conn_req.link_selector = CAIF_LINK_HIGH_BANDW; priv->conn_req.priority = CAIF_PRIO_LOW; /* Insert illegal value */ priv->conn_req.sockaddr.u.dgm.connection_id = UNDEF_CONNID; priv->flowenabled = false; init_waitqueue_head(&priv->netmgmt_wq); } static int ipcaif_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct chnl_net *priv; u8 loop; priv = netdev_priv(dev); if (nla_put_u32(skb, IFLA_CAIF_IPV4_CONNID, priv->conn_req.sockaddr.u.dgm.connection_id) || nla_put_u32(skb, IFLA_CAIF_IPV6_CONNID, priv->conn_req.sockaddr.u.dgm.connection_id)) goto nla_put_failure; loop = priv->conn_req.protocol == CAIFPROTO_DATAGRAM_LOOP; if (nla_put_u8(skb, IFLA_CAIF_LOOPBACK, loop)) goto nla_put_failure; return 0; nla_put_failure: return -EMSGSIZE; } static void caif_netlink_parms(struct nlattr *data[], struct caif_connect_request *conn_req) { if (!data) { pr_warn("no params data found\n"); return; } if (data[IFLA_CAIF_IPV4_CONNID]) conn_req->sockaddr.u.dgm.connection_id = nla_get_u32(data[IFLA_CAIF_IPV4_CONNID]); if (data[IFLA_CAIF_IPV6_CONNID]) conn_req->sockaddr.u.dgm.connection_id = nla_get_u32(data[IFLA_CAIF_IPV6_CONNID]); if (data[IFLA_CAIF_LOOPBACK]) { if (nla_get_u8(data[IFLA_CAIF_LOOPBACK])) conn_req->protocol = CAIFPROTO_DATAGRAM_LOOP; else conn_req->protocol = CAIFPROTO_DATAGRAM; } } static int ipcaif_newlink(struct net_device *dev, struct rtnl_newlink_params *params, struct netlink_ext_ack *extack) { struct nlattr **data = params->data; int ret; struct chnl_net *caifdev; ASSERT_RTNL(); caifdev = netdev_priv(dev); caif_netlink_parms(data, &caifdev->conn_req); ret = register_netdevice(dev); if (ret) pr_warn("device rtml registration failed\n"); else list_add(&caifdev->list_field, &chnl_net_list); /* Use ifindex as connection id, and use loopback channel default. */ if (caifdev->conn_req.sockaddr.u.dgm.connection_id == UNDEF_CONNID) { caifdev->conn_req.sockaddr.u.dgm.connection_id = dev->ifindex; caifdev->conn_req.protocol = CAIFPROTO_DATAGRAM_LOOP; } return ret; } static int ipcaif_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { struct chnl_net *caifdev; ASSERT_RTNL(); caifdev = netdev_priv(dev); caif_netlink_parms(data, &caifdev->conn_req); netdev_state_change(dev); return 0; } static size_t ipcaif_get_size(const struct net_device *dev) { return /* IFLA_CAIF_IPV4_CONNID */ nla_total_size(4) + /* IFLA_CAIF_IPV6_CONNID */ nla_total_size(4) + /* IFLA_CAIF_LOOPBACK */ nla_total_size(2) + 0; } static const struct nla_policy ipcaif_policy[IFLA_CAIF_MAX + 1] = { [IFLA_CAIF_IPV4_CONNID] = { .type = NLA_U32 }, [IFLA_CAIF_IPV6_CONNID] = { .type = NLA_U32 }, [IFLA_CAIF_LOOPBACK] = { .type = NLA_U8 } }; static struct rtnl_link_ops ipcaif_link_ops __read_mostly = { .kind = "caif", .priv_size = sizeof(struct chnl_net), .setup = ipcaif_net_setup, .maxtype = IFLA_CAIF_MAX, .policy = ipcaif_policy, .newlink = ipcaif_newlink, .changelink = ipcaif_changelink, .get_size = ipcaif_get_size, .fill_info = ipcaif_fill_info, }; static int __init chnl_init_module(void) { return rtnl_link_register(&ipcaif_link_ops); } static void __exit chnl_exit_module(void) { struct chnl_net *dev = NULL; struct list_head *list_node; struct list_head *_tmp; rtnl_link_unregister(&ipcaif_link_ops); rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); list_del_init(list_node); delete_device(dev); } rtnl_unlock(); } module_init(chnl_init_module); module_exit(chnl_exit_module);
15 6 15 15 15 15 15 1 1 1 15 15 14 15 1 15 6 6 6 6 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 // SPDX-License-Identifier: GPL-2.0 /* * Performance events callchain code, extracted from core.c: * * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> * Copyright (C) 2008-2011 Red Hat, Inc., Ingo Molnar * Copyright (C) 2008-2011 Red Hat, Inc., Peter Zijlstra * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> */ #include <linux/perf_event.h> #include <linux/slab.h> #include <linux/sched/task_stack.h> #include <linux/uprobes.h> #include "internal.h" struct callchain_cpus_entries { struct rcu_head rcu_head; struct perf_callchain_entry *cpu_entries[]; }; int sysctl_perf_event_max_stack __read_mostly = PERF_MAX_STACK_DEPTH; int sysctl_perf_event_max_contexts_per_stack __read_mostly = PERF_MAX_CONTEXTS_PER_STACK; static const int six_hundred_forty_kb = 640 * 1024; static inline size_t perf_callchain_entry__sizeof(void) { return (sizeof(struct perf_callchain_entry) + sizeof(__u64) * (sysctl_perf_event_max_stack + sysctl_perf_event_max_contexts_per_stack)); } static DEFINE_PER_CPU(u8, callchain_recursion[PERF_NR_CONTEXTS]); static atomic_t nr_callchain_events; static DEFINE_MUTEX(callchain_mutex); static struct callchain_cpus_entries *callchain_cpus_entries; __weak void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { } __weak void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { } static void release_callchain_buffers_rcu(struct rcu_head *head) { struct callchain_cpus_entries *entries; int cpu; entries = container_of(head, struct callchain_cpus_entries, rcu_head); for_each_possible_cpu(cpu) kfree(entries->cpu_entries[cpu]); kfree(entries); } static void release_callchain_buffers(void) { struct callchain_cpus_entries *entries; entries = callchain_cpus_entries; RCU_INIT_POINTER(callchain_cpus_entries, NULL); call_rcu(&entries->rcu_head, release_callchain_buffers_rcu); } static int alloc_callchain_buffers(void) { int cpu; int size; struct callchain_cpus_entries *entries; /* * We can't use the percpu allocation API for data that can be * accessed from NMI. Use a temporary manual per cpu allocation * until that gets sorted out. */ size = offsetof(struct callchain_cpus_entries, cpu_entries[nr_cpu_ids]); entries = kzalloc(size, GFP_KERNEL); if (!entries) return -ENOMEM; size = perf_callchain_entry__sizeof() * PERF_NR_CONTEXTS; for_each_possible_cpu(cpu) { entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL, cpu_to_node(cpu)); if (!entries->cpu_entries[cpu]) goto fail; } rcu_assign_pointer(callchain_cpus_entries, entries); return 0; fail: for_each_possible_cpu(cpu) kfree(entries->cpu_entries[cpu]); kfree(entries); return -ENOMEM; } int get_callchain_buffers(int event_max_stack) { int err = 0; int count; mutex_lock(&callchain_mutex); count = atomic_inc_return(&nr_callchain_events); if (WARN_ON_ONCE(count < 1)) { err = -EINVAL; goto exit; } /* * If requesting per event more than the global cap, * return a different error to help userspace figure * this out. * * And also do it here so that we have &callchain_mutex held. */ if (event_max_stack > sysctl_perf_event_max_stack) { err = -EOVERFLOW; goto exit; } if (count == 1) err = alloc_callchain_buffers(); exit: if (err) atomic_dec(&nr_callchain_events); mutex_unlock(&callchain_mutex); return err; } void put_callchain_buffers(void) { if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) { release_callchain_buffers(); mutex_unlock(&callchain_mutex); } } struct perf_callchain_entry *get_callchain_entry(int *rctx) { int cpu; struct callchain_cpus_entries *entries; *rctx = get_recursion_context(this_cpu_ptr(callchain_recursion)); if (*rctx == -1) return NULL; entries = rcu_dereference(callchain_cpus_entries); if (!entries) { put_recursion_context(this_cpu_ptr(callchain_recursion), *rctx); return NULL; } cpu = smp_processor_id(); return (((void *)entries->cpu_entries[cpu]) + (*rctx * perf_callchain_entry__sizeof())); } void put_callchain_entry(int rctx) { put_recursion_context(this_cpu_ptr(callchain_recursion), rctx); } static void fixup_uretprobe_trampoline_entries(struct perf_callchain_entry *entry, int start_entry_idx) { #ifdef CONFIG_UPROBES struct uprobe_task *utask = current->utask; struct return_instance *ri; __u64 *cur_ip, *last_ip, tramp_addr; if (likely(!utask || !utask->return_instances)) return; cur_ip = &entry->ip[start_entry_idx]; last_ip = &entry->ip[entry->nr - 1]; ri = utask->return_instances; tramp_addr = uprobe_get_trampoline_vaddr(); /* * If there are pending uretprobes for the current thread, they are * recorded in a list inside utask->return_instances; each such * pending uretprobe replaces traced user function's return address on * the stack, so when stack trace is captured, instead of seeing * actual function's return address, we'll have one or many uretprobe * trampoline addresses in the stack trace, which are not helpful and * misleading to users. * So here we go over the pending list of uretprobes, and each * encountered trampoline address is replaced with actual return * address. */ while (ri && cur_ip <= last_ip) { if (*cur_ip == tramp_addr) { *cur_ip = ri->orig_ret_vaddr; ri = ri->next; } cur_ip++; } #endif } struct perf_callchain_entry * get_perf_callchain(struct pt_regs *regs, bool kernel, bool user, u32 max_stack, bool crosstask, bool add_mark) { struct perf_callchain_entry *entry; struct perf_callchain_entry_ctx ctx; int rctx, start_entry_idx; /* crosstask is not supported for user stacks */ if (crosstask && user && !kernel) return NULL; entry = get_callchain_entry(&rctx); if (!entry) return NULL; ctx.entry = entry; ctx.max_stack = max_stack; ctx.nr = entry->nr = 0; ctx.contexts = 0; ctx.contexts_maxed = false; if (kernel && !user_mode(regs)) { if (add_mark) perf_callchain_store_context(&ctx, PERF_CONTEXT_KERNEL); perf_callchain_kernel(&ctx, regs); } if (user && !crosstask) { if (!user_mode(regs)) { if (current->flags & (PF_KTHREAD | PF_USER_WORKER)) goto exit_put; regs = task_pt_regs(current); } if (add_mark) perf_callchain_store_context(&ctx, PERF_CONTEXT_USER); start_entry_idx = entry->nr; perf_callchain_user(&ctx, regs); fixup_uretprobe_trampoline_entries(entry, start_entry_idx); } exit_put: put_callchain_entry(rctx); return entry; } static int perf_event_max_stack_handler(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int *value = table->data; int new_value = *value, ret; struct ctl_table new_table = *table; new_table.data = &new_value; ret = proc_dointvec_minmax(&new_table, write, buffer, lenp, ppos); if (ret || !write) return ret; mutex_lock(&callchain_mutex); if (atomic_read(&nr_callchain_events)) ret = -EBUSY; else *value = new_value; mutex_unlock(&callchain_mutex); return ret; } static const struct ctl_table callchain_sysctl_table[] = { { .procname = "perf_event_max_stack", .data = &sysctl_perf_event_max_stack, .maxlen = sizeof(sysctl_perf_event_max_stack), .mode = 0644, .proc_handler = perf_event_max_stack_handler, .extra1 = SYSCTL_ZERO, .extra2 = (void *)&six_hundred_forty_kb, }, { .procname = "perf_event_max_contexts_per_stack", .data = &sysctl_perf_event_max_contexts_per_stack, .maxlen = sizeof(sysctl_perf_event_max_contexts_per_stack), .mode = 0644, .proc_handler = perf_event_max_stack_handler, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE_THOUSAND, }, }; static int __init init_callchain_sysctls(void) { register_sysctl_init("kernel", callchain_sysctl_table); return 0; } core_initcall(init_callchain_sysctls);
49 915 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RATELIMIT_H #define _LINUX_RATELIMIT_H #include <linux/ratelimit_types.h> #include <linux/sched.h> #include <linux/spinlock.h> static inline void ratelimit_state_init(struct ratelimit_state *rs, int interval, int burst) { memset(rs, 0, sizeof(*rs)); raw_spin_lock_init(&rs->lock); rs->interval = interval; rs->burst = burst; } static inline void ratelimit_default_init(struct ratelimit_state *rs) { return ratelimit_state_init(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); } static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs) { atomic_inc(&rs->missed); } static inline int ratelimit_state_get_miss(struct ratelimit_state *rs) { return atomic_read(&rs->missed); } static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs) { return atomic_xchg_relaxed(&rs->missed, 0); } static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init) { unsigned long flags; raw_spin_lock_irqsave(&rs->lock, flags); rs->interval = interval_init; rs->flags &= ~RATELIMIT_INITIALIZED; atomic_set(&rs->rs_n_left, rs->burst); ratelimit_state_reset_miss(rs); raw_spin_unlock_irqrestore(&rs->lock, flags); } static inline void ratelimit_state_exit(struct ratelimit_state *rs) { int m; if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) return; m = ratelimit_state_reset_miss(rs); if (m) pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, m); } static inline void ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags) { rs->flags = flags; } extern struct ratelimit_state printk_ratelimit_state; #ifdef CONFIG_PRINTK #define WARN_ON_RATELIMIT(condition, state) ({ \ bool __rtn_cond = !!(condition); \ WARN_ON(__rtn_cond && __ratelimit(state)); \ __rtn_cond; \ }) #define WARN_RATELIMIT(condition, format, ...) \ ({ \ static DEFINE_RATELIMIT_STATE(_rs, \ DEFAULT_RATELIMIT_INTERVAL, \ DEFAULT_RATELIMIT_BURST); \ int rtn = !!(condition); \ \ if (unlikely(rtn && __ratelimit(&_rs))) \ WARN(rtn, format, ##__VA_ARGS__); \ \ rtn; \ }) #else #define WARN_ON_RATELIMIT(condition, state) \ WARN_ON(condition) #define WARN_RATELIMIT(condition, format, ...) \ ({ \ int rtn = WARN(condition, format, ##__VA_ARGS__); \ rtn; \ }) #endif #endif /* _LINUX_RATELIMIT_H */
2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Copyright (c) 2015 Tom Herbert <tom@herbertland.com> */ #ifndef __ILA_H #define __ILA_H #include <linux/errno.h> #include <linux/ip.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/types.h> #include <net/checksum.h> #include <net/genetlink.h> #include <net/ip.h> #include <net/protocol.h> #include <uapi/linux/ila.h> struct ila_locator { union { __u8 v8[8]; __be16 v16[4]; __be32 v32[2]; __be64 v64; }; }; struct ila_identifier { union { struct { #if defined(__LITTLE_ENDIAN_BITFIELD) u8 __space:4; u8 csum_neutral:1; u8 type:3; #elif defined(__BIG_ENDIAN_BITFIELD) u8 type:3; u8 csum_neutral:1; u8 __space:4; #else #error "Adjust your <asm/byteorder.h> defines" #endif u8 __space2[7]; }; __u8 v8[8]; __be16 v16[4]; __be32 v32[2]; __be64 v64; }; }; #define CSUM_NEUTRAL_FLAG htonl(0x10000000) struct ila_addr { union { struct in6_addr addr; struct { struct ila_locator loc; struct ila_identifier ident; }; }; }; static inline struct ila_addr *ila_a2i(struct in6_addr *addr) { return (struct ila_addr *)addr; } struct ila_params { struct ila_locator locator; struct ila_locator locator_match; __wsum csum_diff; u8 csum_mode; u8 ident_type; }; static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) { __be32 diff[] = { ~from[0], ~from[1], to[0], to[1], }; return csum_partial(diff, sizeof(diff), 0); } static inline bool ila_csum_neutral_set(struct ila_identifier ident) { return !!(ident.csum_neutral); } void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, bool set_csum_neutral); void ila_init_saved_csum(struct ila_params *p); struct ila_net { struct { struct rhashtable rhash_table; spinlock_t *locks; /* Bucket locks for entry manipulation */ unsigned int locks_mask; bool hooks_registered; } xlat; }; int ila_lwt_init(void); void ila_lwt_fini(void); int ila_xlat_init_net(struct net *net); void ila_xlat_pre_exit_net(struct net *net); void ila_xlat_exit_net(struct net *net); int ila_xlat_nl_cmd_add_mapping(struct sk_buff *skb, struct genl_info *info); int ila_xlat_nl_cmd_del_mapping(struct sk_buff *skb, struct genl_info *info); int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info); int ila_xlat_nl_cmd_flush(struct sk_buff *skb, struct genl_info *info); int ila_xlat_nl_dump_start(struct netlink_callback *cb); int ila_xlat_nl_dump_done(struct netlink_callback *cb); int ila_xlat_nl_dump(struct sk_buff *skb, struct netlink_callback *cb); extern unsigned int ila_net_id; extern struct genl_family ila_nl_family; #endif /* __ILA_H */
4 4 4 4 4 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 // SPDX-License-Identifier: GPL-2.0-or-later /* Request key authorisation token key definition. * * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * See Documentation/security/keys/request-key.rst */ #include <linux/sched.h> #include <linux/err.h> #include <linux/seq_file.h> #include <linux/slab.h> #include <linux/uaccess.h> #include "internal.h" #include <keys/request_key_auth-type.h> static int request_key_auth_preparse(struct key_preparsed_payload *); static void request_key_auth_free_preparse(struct key_preparsed_payload *); static int request_key_auth_instantiate(struct key *, struct key_preparsed_payload *); static void request_key_auth_describe(const struct key *, struct seq_file *); static void request_key_auth_revoke(struct key *); static void request_key_auth_destroy(struct key *); static long request_key_auth_read(const struct key *, char *, size_t); /* * The request-key authorisation key type definition. */ struct key_type key_type_request_key_auth = { .name = ".request_key_auth", .def_datalen = sizeof(struct request_key_auth), .preparse = request_key_auth_preparse, .free_preparse = request_key_auth_free_preparse, .instantiate = request_key_auth_instantiate, .describe = request_key_auth_describe, .revoke = request_key_auth_revoke, .destroy = request_key_auth_destroy, .read = request_key_auth_read, }; static int request_key_auth_preparse(struct key_preparsed_payload *prep) { return 0; } static void request_key_auth_free_preparse(struct key_preparsed_payload *prep) { } /* * Instantiate a request-key authorisation key. */ static int request_key_auth_instantiate(struct key *key, struct key_preparsed_payload *prep) { rcu_assign_keypointer(key, (struct request_key_auth *)prep->data); return 0; } /* * Describe an authorisation token. */ static void request_key_auth_describe(const struct key *key, struct seq_file *m) { struct request_key_auth *rka = dereference_key_rcu(key); if (!rka) return; seq_puts(m, "key:"); seq_puts(m, key->description); if (key_is_positive(key)) seq_printf(m, " pid:%d ci:%zu", rka->pid, rka->callout_len); } /* * Read the callout_info data (retrieves the callout information). * - the key's semaphore is read-locked */ static long request_key_auth_read(const struct key *key, char *buffer, size_t buflen) { struct request_key_auth *rka = dereference_key_locked(key); size_t datalen; long ret; if (!rka) return -EKEYREVOKED; datalen = rka->callout_len; ret = datalen; /* we can return the data as is */ if (buffer && buflen > 0) { if (buflen > datalen) buflen = datalen; memcpy(buffer, rka->callout_info, buflen); } return ret; } static void free_request_key_auth(struct request_key_auth *rka) { if (!rka) return; key_put(rka->target_key); key_put(rka->dest_keyring); if (rka->cred) put_cred(rka->cred); kfree(rka->callout_info); kfree(rka); } /* * Dispose of the request_key_auth record under RCU conditions */ static void request_key_auth_rcu_disposal(struct rcu_head *rcu) { struct request_key_auth *rka = container_of(rcu, struct request_key_auth, rcu); free_request_key_auth(rka); } /* * Handle revocation of an authorisation token key. * * Called with the key sem write-locked. */ static void request_key_auth_revoke(struct key *key) { struct request_key_auth *rka = dereference_key_locked(key); kenter("{%d}", key->serial); rcu_assign_keypointer(key, NULL); call_rcu(&rka->rcu, request_key_auth_rcu_disposal); } /* * Destroy an instantiation authorisation token key. */ static void request_key_auth_destroy(struct key *key) { struct request_key_auth *rka = rcu_access_pointer(key->payload.rcu_data0); kenter("{%d}", key->serial); if (rka) { rcu_assign_keypointer(key, NULL); call_rcu(&rka->rcu, request_key_auth_rcu_disposal); } } /* * Create an authorisation token for /sbin/request-key or whoever to gain * access to the caller's security data. */ struct key *request_key_auth_new(struct key *target, const char *op, const void *callout_info, size_t callout_len, struct key *dest_keyring) { struct request_key_auth *rka, *irka; const struct cred *cred = current_cred(); struct key *authkey = NULL; char desc[20]; int ret = -ENOMEM; kenter("%d,", target->serial); /* allocate a auth record */ rka = kzalloc(sizeof(*rka), GFP_KERNEL); if (!rka) goto error; rka->callout_info = kmemdup(callout_info, callout_len, GFP_KERNEL); if (!rka->callout_info) goto error_free_rka; rka->callout_len = callout_len; strscpy(rka->op, op, sizeof(rka->op)); /* see if the calling process is already servicing the key request of * another process */ if (cred->request_key_auth) { /* it is - use that instantiation context here too */ down_read(&cred->request_key_auth->sem); /* if the auth key has been revoked, then the key we're * servicing is already instantiated */ if (test_bit(KEY_FLAG_REVOKED, &cred->request_key_auth->flags)) { up_read(&cred->request_key_auth->sem); ret = -EKEYREVOKED; goto error_free_rka; } irka = cred->request_key_auth->payload.data[0]; rka->cred = get_cred(irka->cred); rka->pid = irka->pid; up_read(&cred->request_key_auth->sem); } else { /* it isn't - use this process as the context */ rka->cred = get_cred(cred); rka->pid = current->pid; } rka->target_key = key_get(target); rka->dest_keyring = key_get(dest_keyring); /* allocate the auth key */ sprintf(desc, "%x", target->serial); authkey = key_alloc(&key_type_request_key_auth, desc, cred->fsuid, cred->fsgid, cred, KEY_POS_VIEW | KEY_POS_READ | KEY_POS_SEARCH | KEY_POS_LINK | KEY_USR_VIEW, KEY_ALLOC_NOT_IN_QUOTA, NULL); if (IS_ERR(authkey)) { ret = PTR_ERR(authkey); goto error_free_rka; } /* construct the auth key */ ret = key_instantiate_and_link(authkey, rka, 0, NULL, NULL); if (ret < 0) goto error_put_authkey; kleave(" = {%d,%d}", authkey->serial, refcount_read(&authkey->usage)); return authkey; error_put_authkey: key_put(authkey); error_free_rka: free_request_key_auth(rka); error: kleave("= %d", ret); return ERR_PTR(ret); } /* * Search the current process's keyrings for the authorisation key for * instantiation of a key. */ struct key *key_get_instantiation_authkey(key_serial_t target_id) { char description[16]; struct keyring_search_context ctx = { .index_key.type = &key_type_request_key_auth, .index_key.description = description, .cred = current_cred(), .match_data.cmp = key_default_cmp, .match_data.raw_data = description, .match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT, .flags = (KEYRING_SEARCH_DO_STATE_CHECK | KEYRING_SEARCH_RECURSE), }; struct key *authkey; key_ref_t authkey_ref; ctx.index_key.desc_len = sprintf(description, "%x", target_id); rcu_read_lock(); authkey_ref = search_process_keyrings_rcu(&ctx); rcu_read_unlock(); if (IS_ERR(authkey_ref)) { authkey = ERR_CAST(authkey_ref); if (authkey == ERR_PTR(-EAGAIN)) authkey = ERR_PTR(-ENOKEY); goto error; } authkey = key_ref_to_ptr(authkey_ref); if (test_bit(KEY_FLAG_REVOKED, &authkey->flags)) { key_put(authkey); authkey = ERR_PTR(-EKEYREVOKED); } error: return authkey; }
66 3 4 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _KBD_KERN_H #define _KBD_KERN_H #include <linux/tty.h> #include <linux/interrupt.h> #include <linux/keyboard.h> extern char *func_table[MAX_NR_FUNC]; /* * kbd->xxx contains the VC-local things (flag settings etc..) * * Note: externally visible are LED_SCR, LED_NUM, LED_CAP defined in kd.h * The code in KDGETLED / KDSETLED depends on the internal and * external order being the same. * * Note: lockstate is used as index in the array key_map. */ struct kbd_struct { unsigned char lockstate; /* 8 modifiers - the names do not have any meaning at all; they can be associated to arbitrarily chosen keys */ #define VC_SHIFTLOCK KG_SHIFT /* shift lock mode */ #define VC_ALTGRLOCK KG_ALTGR /* altgr lock mode */ #define VC_CTRLLOCK KG_CTRL /* control lock mode */ #define VC_ALTLOCK KG_ALT /* alt lock mode */ #define VC_SHIFTLLOCK KG_SHIFTL /* shiftl lock mode */ #define VC_SHIFTRLOCK KG_SHIFTR /* shiftr lock mode */ #define VC_CTRLLLOCK KG_CTRLL /* ctrll lock mode */ #define VC_CTRLRLOCK KG_CTRLR /* ctrlr lock mode */ unsigned char slockstate; /* for `sticky' Shift, Ctrl, etc. */ unsigned char ledmode:1; #define LED_SHOW_FLAGS 0 /* traditional state */ #define LED_SHOW_IOCTL 1 /* only change leds upon ioctl */ unsigned char ledflagstate:4; /* flags, not lights */ unsigned char default_ledflagstate:4; #define VC_SCROLLOCK 0 /* scroll-lock mode */ #define VC_NUMLOCK 1 /* numeric lock mode */ #define VC_CAPSLOCK 2 /* capslock mode */ #define VC_KANALOCK 3 /* kanalock mode */ unsigned char kbdmode:3; /* one 3-bit value */ #define VC_XLATE 0 /* translate keycodes using keymap */ #define VC_MEDIUMRAW 1 /* medium raw (keycode) mode */ #define VC_RAW 2 /* raw (scancode) mode */ #define VC_UNICODE 3 /* Unicode mode */ #define VC_OFF 4 /* disabled mode */ unsigned char modeflags:5; #define VC_APPLIC 0 /* application key mode */ #define VC_CKMODE 1 /* cursor key mode */ #define VC_REPEAT 2 /* keyboard repeat */ #define VC_CRLF 3 /* 0 - enter sends CR, 1 - enter sends CRLF */ #define VC_META 4 /* 0 - meta, 1 - meta=prefix with ESC */ }; extern int kbd_init(void); extern void setledstate(struct kbd_struct *kbd, unsigned int led); extern int do_poke_blanked_console; extern void (*kbd_ledfunc)(unsigned int led); extern int set_console(int nr); extern void schedule_console_callback(void); static inline int vc_kbd_mode(struct kbd_struct * kbd, int flag) { return ((kbd->modeflags >> flag) & 1); } static inline int vc_kbd_led(struct kbd_struct * kbd, int flag) { return ((kbd->ledflagstate >> flag) & 1); } static inline void set_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags |= 1 << flag; } static inline void set_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate |= 1 << flag; } static inline void clr_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags &= ~(1 << flag); } static inline void clr_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate &= ~(1 << flag); } static inline void chg_vc_kbd_lock(struct kbd_struct * kbd, int flag) { kbd->lockstate ^= 1 << flag; } static inline void chg_vc_kbd_slock(struct kbd_struct * kbd, int flag) { kbd->slockstate ^= 1 << flag; } static inline void chg_vc_kbd_mode(struct kbd_struct * kbd, int flag) { kbd->modeflags ^= 1 << flag; } static inline void chg_vc_kbd_led(struct kbd_struct * kbd, int flag) { kbd->ledflagstate ^= 1 << flag; } #define U(x) ((x) ^ 0xf000) #define BRL_UC_ROW 0x2800 /* keyboard.c */ struct console; void vt_set_leds_compute_shiftstate(void); /* defkeymap.c */ extern unsigned int keymap_count; #endif
2 2 2 2 1 1 2 2 1 1 2 5 5 5 2 3 5 5 1 1 1 1 1 1 1 1 1 1 1 1 4 4 4 4 1 4 4 4 4 4 2 2 2 2 2 2 2 1 4 4 4 4 4 4 4 2 5 5 5 5 1 5 5 1 1 1 1 1 4 4 3 2 2 2 2 16 1 5 4 1 1 1 1 1 1 4 4 4 6 5 5 6 5 5 1 1 1 2 2 2 2 2 16 1 1 1 2 1 4 4 2 2 1 1 1 3 3 2 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494 3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603 3604 3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615 3616 3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726 3727 3728 3729 3730 3731 3732 3733 3734 3735 3736 3737 3738 3739 3740 3741 3742 3743 3744 3745 3746 3747 3748 3749 3750 3751 3752 3753 3754 3755 3756 3757 3758 3759 3760 3761 3762 3763 3764 3765 3766 3767 3768 3769 3770 3771 3772 3773 3774 3775 3776 3777 3778 3779 3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825 3826 3827 3828 3829 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867 3868 3869 3870 3871 3872 3873 3874 3875 3876 3877 3878 3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905 3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117 4118 4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144 4145 4146 4147 4148 4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374 4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399 4400 4401 4402 4403 4404 4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599 4600 4601 4602 4603 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623 4624 4625 4626 4627 4628 4629 4630 4631 4632 4633 4634 4635 4636 4637 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 // SPDX-License-Identifier: GPL-2.0 /* * n_gsm.c GSM 0710 tty multiplexor * Copyright (c) 2009/10 Intel Corporation * Copyright (c) 2022/23 Siemens Mobility GmbH * * * THIS IS A DEVELOPMENT SNAPSHOT IT IS NOT A FINAL RELEASE * * * Outgoing path: * tty -> DLCI fifo -> scheduler -> GSM MUX data queue ---o-> ldisc * control message -> GSM MUX control queue --´ * * Incoming path: * ldisc -> gsm_queue() -o--> tty * `-> gsm_control_response() * * TO DO: * Mostly done: ioctls for setting modes/timing * Partly done: hooks so you can pull off frames to non tty devs * Restart DLCI 0 when it closes ? * Improve the tx engine * Resolve tx side locking by adding a queue_head and routing * all control traffic via it * General tidy/document * Review the locking/move to refcounts more (mux now moved to an * alloc/free model ready) * Use newest tty open/close port helpers and install hooks * What to do about power functions ? * Termios setting and negotiation * Do we need a 'which mux are you' ioctl to correlate mux and tty sets * */ #include <linux/types.h> #include <linux/major.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/fcntl.h> #include <linux/sched/signal.h> #include <linux/interrupt.h> #include <linux/tty.h> #include <linux/bitfield.h> #include <linux/ctype.h> #include <linux/mm.h> #include <linux/math.h> #include <linux/nospec.h> #include <linux/string.h> #include <linux/slab.h> #include <linux/poll.h> #include <linux/bitops.h> #include <linux/file.h> #include <linux/uaccess.h> #include <linux/module.h> #include <linux/timer.h> #include <linux/tty_flip.h> #include <linux/tty_driver.h> #include <linux/serial.h> #include <linux/kfifo.h> #include <linux/skbuff.h> #include <net/arp.h> #include <linux/ip.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/gsmmux.h> #include "tty.h" static int debug; module_param(debug, int, 0600); /* Module debug bits */ #define DBG_DUMP BIT(0) /* Data transmission dump. */ #define DBG_CD_ON BIT(1) /* Always assume CD line on. */ #define DBG_DATA BIT(2) /* Data transmission details. */ #define DBG_ERRORS BIT(3) /* Details for fail conditions. */ #define DBG_TTY BIT(4) /* Transmission statistics for DLCI TTYs. */ #define DBG_PAYLOAD BIT(5) /* Limits DBG_DUMP to payload frames. */ /* Defaults: these are from the specification */ #define T1 10 /* 100mS */ #define T2 34 /* 333mS */ #define T3 10 /* 10s */ #define N2 3 /* Retry 3 times */ #define K 2 /* outstanding I frames */ #define MAX_T3 255 /* In seconds. */ #define MAX_WINDOW_SIZE 7 /* Limit of K in error recovery mode. */ /* Use long timers for testing at low speed with debug on */ #ifdef DEBUG_TIMING #define T1 100 #define T2 200 #endif /* * Semi-arbitrary buffer size limits. 0710 is normally run with 32-64 byte * limits so this is plenty */ #define MAX_MRU 1500 #define MAX_MTU 1500 #define MIN_MTU (PROT_OVERHEAD + 1) /* SOF, ADDR, CTRL, LEN1, LEN2, ..., FCS, EOF */ #define PROT_OVERHEAD 7 #define GSM_NET_TX_TIMEOUT (HZ*10) /* * struct gsm_mux_net - network interface * * Created when net interface is initialized. */ struct gsm_mux_net { struct kref ref; struct gsm_dlci *dlci; }; /* * Each block of data we have queued to go out is in the form of * a gsm_msg which holds everything we need in a link layer independent * format */ struct gsm_msg { struct list_head list; u8 addr; /* DLCI address + flags */ u8 ctrl; /* Control byte + flags */ unsigned int len; /* Length of data block (can be zero) */ u8 *data; /* Points into buffer but not at the start */ u8 buffer[]; }; enum gsm_dlci_state { DLCI_CLOSED, DLCI_WAITING_CONFIG, /* Waiting for DLCI configuration from user */ DLCI_CONFIGURE, /* Sending PN (for adaption > 1) */ DLCI_OPENING, /* Sending SABM not seen UA */ DLCI_OPEN, /* SABM/UA complete */ DLCI_CLOSING, /* Sending DISC not seen UA/DM */ }; enum gsm_dlci_mode { DLCI_MODE_ABM, /* Normal Asynchronous Balanced Mode */ DLCI_MODE_ADM, /* Asynchronous Disconnected Mode */ }; /* * Each active data link has a gsm_dlci structure associated which ties * the link layer to an optional tty (if the tty side is open). To avoid * complexity right now these are only ever freed up when the mux is * shut down. * * At the moment we don't free DLCI objects until the mux is torn down * this avoid object life time issues but might be worth review later. */ struct gsm_dlci { struct gsm_mux *gsm; int addr; enum gsm_dlci_state state; struct mutex mutex; /* Link layer */ enum gsm_dlci_mode mode; spinlock_t lock; /* Protects the internal state */ struct timer_list t1; /* Retransmit timer for SABM and UA */ int retries; /* Uplink tty if active */ struct tty_port port; /* The tty bound to this DLCI if there is one */ #define TX_SIZE 4096 /* Must be power of 2. */ struct kfifo fifo; /* Queue fifo for the DLCI */ int adaption; /* Adaption layer in use */ int prev_adaption; u32 modem_rx; /* Our incoming virtual modem lines */ u32 modem_tx; /* Our outgoing modem lines */ unsigned int mtu; bool dead; /* Refuse re-open */ /* Configuration */ u8 prio; /* Priority */ u8 ftype; /* Frame type */ u8 k; /* Window size */ /* Flow control */ bool throttled; /* Private copy of throttle state */ bool constipated; /* Throttle status for outgoing */ /* Packetised I/O */ struct sk_buff *skb; /* Frame being sent */ struct sk_buff_head skb_list; /* Queued frames */ /* Data handling callback */ void (*data)(struct gsm_dlci *dlci, const u8 *data, int len); void (*prev_data)(struct gsm_dlci *dlci, const u8 *data, int len); struct net_device *net; /* network interface, if created */ }; /* * Parameter bits used for parameter negotiation according to 3GPP 27.010 * chapter 5.4.6.3.1. */ struct gsm_dlci_param_bits { u8 d_bits; u8 i_cl_bits; u8 p_bits; u8 t_bits; __le16 n_bits; u8 na_bits; u8 k_bits; }; static_assert(sizeof(struct gsm_dlci_param_bits) == 8); #define PN_D_FIELD_DLCI GENMASK(5, 0) #define PN_I_CL_FIELD_FTYPE GENMASK(3, 0) #define PN_I_CL_FIELD_ADAPTION GENMASK(7, 4) #define PN_P_FIELD_PRIO GENMASK(5, 0) #define PN_T_FIELD_T1 GENMASK(7, 0) #define PN_N_FIELD_N1 GENMASK(15, 0) #define PN_NA_FIELD_N2 GENMASK(7, 0) #define PN_K_FIELD_K GENMASK(2, 0) /* Total number of supported devices */ #define GSM_TTY_MINORS 256 /* DLCI 0, 62/63 are special or reserved see gsmtty_open */ #define NUM_DLCI 64 /* * DLCI 0 is used to pass control blocks out of band of the data * flow (and with a higher link priority). One command can be outstanding * at a time and we use this structure to manage them. They are created * and destroyed by the user context, and updated by the receive paths * and timers */ struct gsm_control { u8 cmd; /* Command we are issuing */ u8 *data; /* Data for the command in case we retransmit */ int len; /* Length of block for retransmission */ int done; /* Done flag */ int error; /* Error if any */ }; enum gsm_encoding { GSM_BASIC_OPT, GSM_ADV_OPT, }; enum gsm_mux_state { GSM_SEARCH, GSM0_ADDRESS, GSM0_CONTROL, GSM0_LEN0, GSM0_LEN1, GSM0_DATA, GSM0_FCS, GSM0_SSOF, GSM1_START, GSM1_ADDRESS, GSM1_CONTROL, GSM1_DATA, GSM1_OVERRUN, }; /* * Each GSM mux we have is represented by this structure. If we are * operating as an ldisc then we use this structure as our ldisc * state. We need to sort out lifetimes and locking with respect * to the gsm mux array. For now we don't free DLCI objects that * have been instantiated until the mux itself is terminated. * * To consider further: tty open versus mux shutdown. */ struct gsm_mux { struct tty_struct *tty; /* The tty our ldisc is bound to */ spinlock_t lock; struct mutex mutex; unsigned int num; struct kref ref; /* Events on the GSM channel */ wait_queue_head_t event; /* ldisc send work */ struct work_struct tx_work; /* Bits for GSM mode decoding */ /* Framing Layer */ u8 *buf; enum gsm_mux_state state; unsigned int len; unsigned int address; unsigned int count; bool escape; enum gsm_encoding encoding; u8 control; u8 fcs; u8 *txframe; /* TX framing buffer */ /* Method for the receiver side */ void (*receive)(struct gsm_mux *gsm, u8 ch); /* Link Layer */ unsigned int mru; unsigned int mtu; int initiator; /* Did we initiate connection */ bool dead; /* Has the mux been shut down */ struct gsm_dlci *dlci[NUM_DLCI]; int old_c_iflag; /* termios c_iflag value before attach */ bool constipated; /* Asked by remote to shut up */ bool has_devices; /* Devices were registered */ spinlock_t tx_lock; unsigned int tx_bytes; /* TX data outstanding */ #define TX_THRESH_HI 8192 #define TX_THRESH_LO 2048 struct list_head tx_ctrl_list; /* Pending control packets */ struct list_head tx_data_list; /* Pending data packets */ /* Control messages */ struct timer_list kick_timer; /* Kick TX queuing on timeout */ struct timer_list t2_timer; /* Retransmit timer for commands */ int cretries; /* Command retry counter */ struct gsm_control *pending_cmd;/* Our current pending command */ spinlock_t control_lock; /* Protects the pending command */ /* Keep-alive */ struct timer_list ka_timer; /* Keep-alive response timer */ u8 ka_num; /* Keep-alive match pattern */ signed int ka_retries; /* Keep-alive retry counter, -1 if not yet initialized */ /* Configuration */ int adaption; /* 1 or 2 supported */ u8 ftype; /* UI or UIH */ int t1, t2; /* Timers in 1/100th of a sec */ unsigned int t3; /* Power wake-up timer in seconds. */ int n2; /* Retry count */ u8 k; /* Window size */ bool wait_config; /* Wait for configuration by ioctl before DLCI open */ u32 keep_alive; /* Control channel keep-alive in 10ms */ /* Statistics (not currently exposed) */ unsigned long bad_fcs; unsigned long malformed; unsigned long io_error; unsigned long open_error; unsigned long bad_size; unsigned long unsupported; }; /* * Mux objects - needed so that we can translate a tty index into the * relevant mux and DLCI. */ #define MAX_MUX 4 /* 256 minors */ static struct gsm_mux *gsm_mux[MAX_MUX]; /* GSM muxes */ static DEFINE_SPINLOCK(gsm_mux_lock); static struct tty_driver *gsm_tty_driver; /* * This section of the driver logic implements the GSM encodings * both the basic and the 'advanced'. Reliable transport is not * supported. */ #define CR 0x02 #define EA 0x01 #define PF 0x10 /* I is special: the rest are ..*/ #define RR 0x01 #define UI 0x03 #define RNR 0x05 #define REJ 0x09 #define DM 0x0F #define SABM 0x2F #define DISC 0x43 #define UA 0x63 #define UIH 0xEF /* Channel commands */ #define CMD_NSC 0x09 #define CMD_TEST 0x11 #define CMD_PSC 0x21 #define CMD_RLS 0x29 #define CMD_FCOFF 0x31 #define CMD_PN 0x41 #define CMD_RPN 0x49 #define CMD_FCON 0x51 #define CMD_CLD 0x61 #define CMD_SNC 0x69 #define CMD_MSC 0x71 /* Virtual modem bits */ #define MDM_FC 0x01 #define MDM_RTC 0x02 #define MDM_RTR 0x04 #define MDM_IC 0x20 #define MDM_DV 0x40 #define GSM0_SOF 0xF9 #define GSM1_SOF 0x7E #define GSM1_ESCAPE 0x7D #define GSM1_ESCAPE_BITS 0x20 #define XON 0x11 #define XOFF 0x13 #define ISO_IEC_646_MASK 0x7F static const struct tty_port_operations gsm_port_ops; /* * CRC table for GSM 0710 */ static const u8 gsm_fcs8[256] = { 0x00, 0x91, 0xE3, 0x72, 0x07, 0x96, 0xE4, 0x75, 0x0E, 0x9F, 0xED, 0x7C, 0x09, 0x98, 0xEA, 0x7B, 0x1C, 0x8D, 0xFF, 0x6E, 0x1B, 0x8A, 0xF8, 0x69, 0x12, 0x83, 0xF1, 0x60, 0x15, 0x84, 0xF6, 0x67, 0x38, 0xA9, 0xDB, 0x4A, 0x3F, 0xAE, 0xDC, 0x4D, 0x36, 0xA7, 0xD5, 0x44, 0x31, 0xA0, 0xD2, 0x43, 0x24, 0xB5, 0xC7, 0x56, 0x23, 0xB2, 0xC0, 0x51, 0x2A, 0xBB, 0xC9, 0x58, 0x2D, 0xBC, 0xCE, 0x5F, 0x70, 0xE1, 0x93, 0x02, 0x77, 0xE6, 0x94, 0x05, 0x7E, 0xEF, 0x9D, 0x0C, 0x79, 0xE8, 0x9A, 0x0B, 0x6C, 0xFD, 0x8F, 0x1E, 0x6B, 0xFA, 0x88, 0x19, 0x62, 0xF3, 0x81, 0x10, 0x65, 0xF4, 0x86, 0x17, 0x48, 0xD9, 0xAB, 0x3A, 0x4F, 0xDE, 0xAC, 0x3D, 0x46, 0xD7, 0xA5, 0x34, 0x41, 0xD0, 0xA2, 0x33, 0x54, 0xC5, 0xB7, 0x26, 0x53, 0xC2, 0xB0, 0x21, 0x5A, 0xCB, 0xB9, 0x28, 0x5D, 0xCC, 0xBE, 0x2F, 0xE0, 0x71, 0x03, 0x92, 0xE7, 0x76, 0x04, 0x95, 0xEE, 0x7F, 0x0D, 0x9C, 0xE9, 0x78, 0x0A, 0x9B, 0xFC, 0x6D, 0x1F, 0x8E, 0xFB, 0x6A, 0x18, 0x89, 0xF2, 0x63, 0x11, 0x80, 0xF5, 0x64, 0x16, 0x87, 0xD8, 0x49, 0x3B, 0xAA, 0xDF, 0x4E, 0x3C, 0xAD, 0xD6, 0x47, 0x35, 0xA4, 0xD1, 0x40, 0x32, 0xA3, 0xC4, 0x55, 0x27, 0xB6, 0xC3, 0x52, 0x20, 0xB1, 0xCA, 0x5B, 0x29, 0xB8, 0xCD, 0x5C, 0x2E, 0xBF, 0x90, 0x01, 0x73, 0xE2, 0x97, 0x06, 0x74, 0xE5, 0x9E, 0x0F, 0x7D, 0xEC, 0x99, 0x08, 0x7A, 0xEB, 0x8C, 0x1D, 0x6F, 0xFE, 0x8B, 0x1A, 0x68, 0xF9, 0x82, 0x13, 0x61, 0xF0, 0x85, 0x14, 0x66, 0xF7, 0xA8, 0x39, 0x4B, 0xDA, 0xAF, 0x3E, 0x4C, 0xDD, 0xA6, 0x37, 0x45, 0xD4, 0xA1, 0x30, 0x42, 0xD3, 0xB4, 0x25, 0x57, 0xC6, 0xB3, 0x22, 0x50, 0xC1, 0xBA, 0x2B, 0x59, 0xC8, 0xBD, 0x2C, 0x5E, 0xCF }; #define INIT_FCS 0xFF #define GOOD_FCS 0xCF static void gsm_dlci_close(struct gsm_dlci *dlci); static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len); static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk); static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len, u8 ctrl); static int gsm_send_packet(struct gsm_mux *gsm, struct gsm_msg *msg); static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr); static void gsmld_write_trigger(struct gsm_mux *gsm); static void gsmld_write_task(struct work_struct *work); static int gsm_modem_send_initial_msc(struct gsm_dlci *dlci); /** * gsm_fcs_add - update FCS * @fcs: Current FCS * @c: Next data * * Update the FCS to include c. Uses the algorithm in the specification * notes. */ static inline u8 gsm_fcs_add(u8 fcs, u8 c) { return gsm_fcs8[fcs ^ c]; } /** * gsm_fcs_add_block - update FCS for a block * @fcs: Current FCS * @c: buffer of data * @len: length of buffer * * Update the FCS to include c. Uses the algorithm in the specification * notes. */ static inline u8 gsm_fcs_add_block(u8 fcs, u8 *c, int len) { while (len--) fcs = gsm_fcs8[fcs ^ *c++]; return fcs; } /** * gsm_read_ea - read a byte into an EA * @val: variable holding value * @c: byte going into the EA * * Processes one byte of an EA. Updates the passed variable * and returns 1 if the EA is now completely read */ static int gsm_read_ea(unsigned int *val, u8 c) { /* Add the next 7 bits into the value */ *val <<= 7; *val |= c >> 1; /* Was this the last byte of the EA 1 = yes*/ return c & EA; } /** * gsm_read_ea_val - read a value until EA * @val: variable holding value * @data: buffer of data * @dlen: length of data * * Processes an EA value. Updates the passed variable and * returns the processed data length. */ static unsigned int gsm_read_ea_val(unsigned int *val, const u8 *data, int dlen) { unsigned int len = 0; for (; dlen > 0; dlen--) { len++; if (gsm_read_ea(val, *data++)) break; } return len; } /** * gsm_encode_modem - encode modem data bits * @dlci: DLCI to encode from * * Returns the correct GSM encoded modem status bits (6 bit field) for * the current status of the DLCI and attached tty object */ static u8 gsm_encode_modem(const struct gsm_dlci *dlci) { u8 modembits = 0; /* FC is true flow control not modem bits */ if (dlci->throttled) modembits |= MDM_FC; if (dlci->modem_tx & TIOCM_DTR) modembits |= MDM_RTC; if (dlci->modem_tx & TIOCM_RTS) modembits |= MDM_RTR; if (dlci->modem_tx & TIOCM_RI) modembits |= MDM_IC; if (dlci->modem_tx & TIOCM_CD || dlci->gsm->initiator) modembits |= MDM_DV; /* special mappings for passive side to operate as UE */ if (dlci->modem_tx & TIOCM_OUT1) modembits |= MDM_IC; if (dlci->modem_tx & TIOCM_OUT2) modembits |= MDM_DV; return modembits; } static void gsm_hex_dump_bytes(const char *fname, const u8 *data, unsigned long len) { char *prefix; if (!fname) { print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, data, len, true); return; } prefix = kasprintf(GFP_ATOMIC, "%s: ", fname); if (!prefix) return; print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, 16, 1, data, len, true); kfree(prefix); } /** * gsm_encode_params - encode DLCI parameters * @dlci: DLCI to encode from * @params: buffer to fill with the encoded parameters * * Encodes the parameters according to GSM 07.10 section 5.4.6.3.1 * table 3. */ static int gsm_encode_params(const struct gsm_dlci *dlci, struct gsm_dlci_param_bits *params) { const struct gsm_mux *gsm = dlci->gsm; unsigned int i, cl; switch (dlci->ftype) { case UIH: i = 0; /* UIH */ break; case UI: i = 1; /* UI */ break; default: pr_debug("unsupported frame type %d\n", dlci->ftype); return -EINVAL; } switch (dlci->adaption) { case 1: /* Unstructured */ cl = 0; /* convergence layer type 1 */ break; case 2: /* Unstructured with modem bits. */ cl = 1; /* convergence layer type 2 */ break; default: pr_debug("unsupported adaption %d\n", dlci->adaption); return -EINVAL; } params->d_bits = FIELD_PREP(PN_D_FIELD_DLCI, dlci->addr); /* UIH, convergence layer type 1 */ params->i_cl_bits = FIELD_PREP(PN_I_CL_FIELD_FTYPE, i) | FIELD_PREP(PN_I_CL_FIELD_ADAPTION, cl); params->p_bits = FIELD_PREP(PN_P_FIELD_PRIO, dlci->prio); params->t_bits = FIELD_PREP(PN_T_FIELD_T1, gsm->t1); params->n_bits = cpu_to_le16(FIELD_PREP(PN_N_FIELD_N1, dlci->mtu)); params->na_bits = FIELD_PREP(PN_NA_FIELD_N2, gsm->n2); params->k_bits = FIELD_PREP(PN_K_FIELD_K, dlci->k); return 0; } /** * gsm_register_devices - register all tty devices for a given mux index * * @driver: the tty driver that describes the tty devices * @index: the mux number is used to calculate the minor numbers of the * ttys for this mux and may differ from the position in the * mux array. */ static int gsm_register_devices(struct tty_driver *driver, unsigned int index) { struct device *dev; int i; unsigned int base; if (!driver || index >= MAX_MUX) return -EINVAL; base = index * NUM_DLCI; /* first minor for this index */ for (i = 1; i < NUM_DLCI; i++) { /* Don't register device 0 - this is the control channel * and not a usable tty interface */ dev = tty_register_device(gsm_tty_driver, base + i, NULL); if (IS_ERR(dev)) { if (debug & DBG_ERRORS) pr_info("%s failed to register device minor %u", __func__, base + i); for (i--; i >= 1; i--) tty_unregister_device(gsm_tty_driver, base + i); return PTR_ERR(dev); } } return 0; } /** * gsm_unregister_devices - unregister all tty devices for a given mux index * * @driver: the tty driver that describes the tty devices * @index: the mux number is used to calculate the minor numbers of the * ttys for this mux and may differ from the position in the * mux array. */ static void gsm_unregister_devices(struct tty_driver *driver, unsigned int index) { int i; unsigned int base; if (!driver || index >= MAX_MUX) return; base = index * NUM_DLCI; /* first minor for this index */ for (i = 1; i < NUM_DLCI; i++) { /* Don't unregister device 0 - this is the control * channel and not a usable tty interface */ tty_unregister_device(gsm_tty_driver, base + i); } } /** * gsm_print_packet - display a frame for debug * @hdr: header to print before decode * @addr: address EA from the frame * @cr: C/R bit seen as initiator * @control: control including PF bit * @data: following data bytes * @dlen: length of data * * Displays a packet in human readable format for debugging purposes. The * style is based on amateur radio LAP-B dump display. */ static void gsm_print_packet(const char *hdr, int addr, int cr, u8 control, const u8 *data, int dlen) { if (!(debug & DBG_DUMP)) return; /* Only show user payload frames if debug & DBG_PAYLOAD */ if (!(debug & DBG_PAYLOAD) && addr != 0) if ((control & ~PF) == UI || (control & ~PF) == UIH) return; pr_info("%s %d) %c: ", hdr, addr, "RC"[cr]); switch (control & ~PF) { case SABM: pr_cont("SABM"); break; case UA: pr_cont("UA"); break; case DISC: pr_cont("DISC"); break; case DM: pr_cont("DM"); break; case UI: pr_cont("UI"); break; case UIH: pr_cont("UIH"); break; default: if (!(control & 0x01)) { pr_cont("I N(S)%d N(R)%d", (control & 0x0E) >> 1, (control & 0xE0) >> 5); } else switch (control & 0x0F) { case RR: pr_cont("RR(%d)", (control & 0xE0) >> 5); break; case RNR: pr_cont("RNR(%d)", (control & 0xE0) >> 5); break; case REJ: pr_cont("REJ(%d)", (control & 0xE0) >> 5); break; default: pr_cont("[%02X]", control); } } if (control & PF) pr_cont("(P)"); else pr_cont("(F)"); gsm_hex_dump_bytes(NULL, data, dlen); } /* * Link level transmission side */ /** * gsm_stuff_frame - bytestuff a packet * @input: input buffer * @output: output buffer * @len: length of input * * Expand a buffer by bytestuffing it. The worst case size change * is doubling and the caller is responsible for handing out * suitable sized buffers. */ static int gsm_stuff_frame(const u8 *input, u8 *output, int len) { int olen = 0; while (len--) { if (*input == GSM1_SOF || *input == GSM1_ESCAPE || (*input & ISO_IEC_646_MASK) == XON || (*input & ISO_IEC_646_MASK) == XOFF) { *output++ = GSM1_ESCAPE; *output++ = *input++ ^ GSM1_ESCAPE_BITS; olen++; } else *output++ = *input++; olen++; } return olen; } /** * gsm_send - send a control frame * @gsm: our GSM mux * @addr: address for control frame * @cr: command/response bit seen as initiator * @control: control byte including PF bit * * Format up and transmit a control frame. These should be transmitted * ahead of data when they are needed. */ static int gsm_send(struct gsm_mux *gsm, int addr, int cr, int control) { struct gsm_msg *msg; u8 *dp; int ocr; unsigned long flags; msg = gsm_data_alloc(gsm, addr, 0, control); if (!msg) return -ENOMEM; /* toggle C/R coding if not initiator */ ocr = cr ^ (gsm->initiator ? 0 : 1); msg->data -= 3; dp = msg->data; *dp++ = (addr << 2) | (ocr << 1) | EA; *dp++ = control; if (gsm->encoding == GSM_BASIC_OPT) *dp++ = EA; /* Length of data = 0 */ *dp = 0xFF - gsm_fcs_add_block(INIT_FCS, msg->data, dp - msg->data); msg->len = (dp - msg->data) + 1; gsm_print_packet("Q->", addr, cr, control, NULL, 0); spin_lock_irqsave(&gsm->tx_lock, flags); list_add_tail(&msg->list, &gsm->tx_ctrl_list); gsm->tx_bytes += msg->len; spin_unlock_irqrestore(&gsm->tx_lock, flags); gsmld_write_trigger(gsm); return 0; } /** * gsm_dlci_clear_queues - remove outstanding data for a DLCI * @gsm: mux * @dlci: clear for this DLCI * * Clears the data queues for a given DLCI. */ static void gsm_dlci_clear_queues(struct gsm_mux *gsm, struct gsm_dlci *dlci) { struct gsm_msg *msg, *nmsg; int addr = dlci->addr; unsigned long flags; /* Clear DLCI write fifo first */ spin_lock_irqsave(&dlci->lock, flags); kfifo_reset(&dlci->fifo); spin_unlock_irqrestore(&dlci->lock, flags); /* Clear data packets in MUX write queue */ spin_lock_irqsave(&gsm->tx_lock, flags); list_for_each_entry_safe(msg, nmsg, &gsm->tx_data_list, list) { if (msg->addr != addr) continue; gsm->tx_bytes -= msg->len; list_del(&msg->list); kfree(msg); } spin_unlock_irqrestore(&gsm->tx_lock, flags); } /** * gsm_response - send a control response * @gsm: our GSM mux * @addr: address for control frame * @control: control byte including PF bit * * Format up and transmit a link level response frame. */ static inline void gsm_response(struct gsm_mux *gsm, int addr, int control) { gsm_send(gsm, addr, 0, control); } /** * gsm_command - send a control command * @gsm: our GSM mux * @addr: address for control frame * @control: control byte including PF bit * * Format up and transmit a link level command frame. */ static inline void gsm_command(struct gsm_mux *gsm, int addr, int control) { gsm_send(gsm, addr, 1, control); } /* Data transmission */ #define HDR_LEN 6 /* ADDR CTRL [LEN.2] DATA FCS */ /** * gsm_data_alloc - allocate data frame * @gsm: GSM mux * @addr: DLCI address * @len: length excluding header and FCS * @ctrl: control byte * * Allocate a new data buffer for sending frames with data. Space is left * at the front for header bytes but that is treated as an implementation * detail and not for the high level code to use */ static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len, u8 ctrl) { struct gsm_msg *m = kmalloc(sizeof(struct gsm_msg) + len + HDR_LEN, GFP_ATOMIC); if (m == NULL) return NULL; m->data = m->buffer + HDR_LEN - 1; /* Allow for FCS */ m->len = len; m->addr = addr; m->ctrl = ctrl; INIT_LIST_HEAD(&m->list); return m; } /** * gsm_send_packet - sends a single packet * @gsm: GSM Mux * @msg: packet to send * * The given packet is encoded and sent out. No memory is freed. * The caller must hold the gsm tx lock. */ static int gsm_send_packet(struct gsm_mux *gsm, struct gsm_msg *msg) { int len, ret; if (gsm->encoding == GSM_BASIC_OPT) { gsm->txframe[0] = GSM0_SOF; memcpy(gsm->txframe + 1, msg->data, msg->len); gsm->txframe[msg->len + 1] = GSM0_SOF; len = msg->len + 2; } else { gsm->txframe[0] = GSM1_SOF; len = gsm_stuff_frame(msg->data, gsm->txframe + 1, msg->len); gsm->txframe[len + 1] = GSM1_SOF; len += 2; } if (debug & DBG_DATA) gsm_hex_dump_bytes(__func__, gsm->txframe, len); gsm_print_packet("-->", msg->addr, gsm->initiator, msg->ctrl, msg->data, msg->len); ret = gsmld_output(gsm, gsm->txframe, len); if (ret <= 0) return ret; /* FIXME: Can eliminate one SOF in many more cases */ gsm->tx_bytes -= msg->len; return 0; } /** * gsm_is_flow_ctrl_msg - checks if flow control message * @msg: message to check * * Returns true if the given message is a flow control command of the * control channel. False is returned in any other case. */ static bool gsm_is_flow_ctrl_msg(struct gsm_msg *msg) { unsigned int cmd; if (msg->addr > 0) return false; switch (msg->ctrl & ~PF) { case UI: case UIH: cmd = 0; if (gsm_read_ea_val(&cmd, msg->data + 2, msg->len - 2) < 1) break; switch (cmd & ~PF) { case CMD_FCOFF: case CMD_FCON: return true; } break; } return false; } /** * gsm_data_kick - poke the queue * @gsm: GSM Mux * * The tty device has called us to indicate that room has appeared in * the transmit queue. Ram more data into the pipe if we have any. * If we have been flow-stopped by a CMD_FCOFF, then we can only * send messages on DLCI0 until CMD_FCON. The caller must hold * the gsm tx lock. */ static int gsm_data_kick(struct gsm_mux *gsm) { struct gsm_msg *msg, *nmsg; struct gsm_dlci *dlci; int ret; clear_bit(TTY_DO_WRITE_WAKEUP, &gsm->tty->flags); /* Serialize control messages and control channel messages first */ list_for_each_entry_safe(msg, nmsg, &gsm->tx_ctrl_list, list) { if (gsm->constipated && !gsm_is_flow_ctrl_msg(msg)) continue; ret = gsm_send_packet(gsm, msg); switch (ret) { case -ENOSPC: return -ENOSPC; case -ENODEV: /* ldisc not open */ gsm->tx_bytes -= msg->len; list_del(&msg->list); kfree(msg); continue; default: if (ret >= 0) { list_del(&msg->list); kfree(msg); } break; } } if (gsm->constipated) return -EAGAIN; /* Serialize other channels */ if (list_empty(&gsm->tx_data_list)) return 0; list_for_each_entry_safe(msg, nmsg, &gsm->tx_data_list, list) { dlci = gsm->dlci[msg->addr]; /* Send only messages for DLCIs with valid state */ if (dlci->state != DLCI_OPEN) { gsm->tx_bytes -= msg->len; list_del(&msg->list); kfree(msg); continue; } ret = gsm_send_packet(gsm, msg); switch (ret) { case -ENOSPC: return -ENOSPC; case -ENODEV: /* ldisc not open */ gsm->tx_bytes -= msg->len; list_del(&msg->list); kfree(msg); continue; default: if (ret >= 0) { list_del(&msg->list); kfree(msg); } break; } } return 1; } /** * __gsm_data_queue - queue a UI or UIH frame * @dlci: DLCI sending the data * @msg: message queued * * Add data to the transmit queue and try and get stuff moving * out of the mux tty if not already doing so. The Caller must hold * the gsm tx lock. */ static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) { struct gsm_mux *gsm = dlci->gsm; u8 *dp = msg->data; u8 *fcs = dp + msg->len; /* Fill in the header */ if (gsm->encoding == GSM_BASIC_OPT) { if (msg->len < 128) *--dp = (msg->len << 1) | EA; else { *--dp = (msg->len >> 7); /* bits 7 - 15 */ *--dp = (msg->len & 127) << 1; /* bits 0 - 6 */ } } *--dp = msg->ctrl; if (gsm->initiator) *--dp = (msg->addr << 2) | CR | EA; else *--dp = (msg->addr << 2) | EA; *fcs = gsm_fcs_add_block(INIT_FCS, dp , msg->data - dp); /* Ugly protocol layering violation */ if (msg->ctrl == UI || msg->ctrl == (UI|PF)) *fcs = gsm_fcs_add_block(*fcs, msg->data, msg->len); *fcs = 0xFF - *fcs; gsm_print_packet("Q> ", msg->addr, gsm->initiator, msg->ctrl, msg->data, msg->len); /* Move the header back and adjust the length, also allow for the FCS now tacked on the end */ msg->len += (msg->data - dp) + 1; msg->data = dp; /* Add to the actual output queue */ switch (msg->ctrl & ~PF) { case UI: case UIH: if (msg->addr > 0) { list_add_tail(&msg->list, &gsm->tx_data_list); break; } fallthrough; default: list_add_tail(&msg->list, &gsm->tx_ctrl_list); break; } gsm->tx_bytes += msg->len; gsmld_write_trigger(gsm); mod_timer(&gsm->kick_timer, jiffies + 10 * gsm->t1 * HZ / 100); } /** * gsm_data_queue - queue a UI or UIH frame * @dlci: DLCI sending the data * @msg: message queued * * Add data to the transmit queue and try and get stuff moving * out of the mux tty if not already doing so. Take the * the gsm tx lock and dlci lock. */ static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg) { unsigned long flags; spin_lock_irqsave(&dlci->gsm->tx_lock, flags); __gsm_data_queue(dlci, msg); spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); } /** * gsm_dlci_data_output - try and push data out of a DLCI * @gsm: mux * @dlci: the DLCI to pull data from * * Pull data from a DLCI and send it into the transmit queue if there * is data. Keep to the MRU of the mux. This path handles the usual tty * interface which is a byte stream with optional modem data. * * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci) { struct gsm_msg *msg; u8 *dp; int h, len, size; /* for modem bits without break data */ h = ((dlci->adaption == 1) ? 0 : 1); len = kfifo_len(&dlci->fifo); if (len == 0) return 0; /* MTU/MRU count only the data bits but watch adaption mode */ if ((len + h) > dlci->mtu) len = dlci->mtu - h; size = len + h; msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype); if (!msg) return -ENOMEM; dp = msg->data; switch (dlci->adaption) { case 1: /* Unstructured */ break; case 2: /* Unstructured with modem bits. * Always one byte as we never send inline break data */ *dp++ = (gsm_encode_modem(dlci) << 1) | EA; break; default: pr_err("%s: unsupported adaption %d\n", __func__, dlci->adaption); break; } WARN_ON(len != kfifo_out_locked(&dlci->fifo, dp, len, &dlci->lock)); /* Notify upper layer about available send space. */ tty_port_tty_wakeup(&dlci->port); __gsm_data_queue(dlci, msg); /* Bytes of data we used up */ return size; } /** * gsm_dlci_data_output_framed - try and push data out of a DLCI * @gsm: mux * @dlci: the DLCI to pull data from * * Pull data from a DLCI and send it into the transmit queue if there * is data. Keep to the MRU of the mux. This path handles framed data * queued as skbuffs to the DLCI. * * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm, struct gsm_dlci *dlci) { struct gsm_msg *msg; u8 *dp; int len, size; int last = 0, first = 0; int overhead = 0; /* One byte per frame is used for B/F flags */ if (dlci->adaption == 4) overhead = 1; /* dlci->skb is locked by tx_lock */ if (dlci->skb == NULL) { dlci->skb = skb_dequeue_tail(&dlci->skb_list); if (dlci->skb == NULL) return 0; first = 1; } len = dlci->skb->len + overhead; /* MTU/MRU count only the data bits */ if (len > dlci->mtu) { if (dlci->adaption == 3) { /* Over long frame, bin it */ dev_kfree_skb_any(dlci->skb); dlci->skb = NULL; return 0; } len = dlci->mtu; } else last = 1; size = len + overhead; msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype); if (msg == NULL) { skb_queue_tail(&dlci->skb_list, dlci->skb); dlci->skb = NULL; return -ENOMEM; } dp = msg->data; if (dlci->adaption == 4) { /* Interruptible framed (Packetised Data) */ /* Flag byte to carry the start/end info */ *dp++ = last << 7 | first << 6 | 1; /* EA */ len--; } memcpy(dp, dlci->skb->data, len); skb_pull(dlci->skb, len); __gsm_data_queue(dlci, msg); if (last) { dev_kfree_skb_any(dlci->skb); dlci->skb = NULL; } return size; } /** * gsm_dlci_modem_output - try and push modem status out of a DLCI * @gsm: mux * @dlci: the DLCI to pull modem status from * @brk: break signal * * Push an empty frame in to the transmit queue to update the modem status * bits and to transmit an optional break. * * Caller must hold the tx_lock of the mux. */ static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci, u8 brk) { u8 *dp = NULL; struct gsm_msg *msg; int size = 0; /* for modem bits without break data */ switch (dlci->adaption) { case 1: /* Unstructured */ break; case 2: /* Unstructured with modem bits. */ size++; if (brk > 0) size++; break; default: pr_err("%s: unsupported adaption %d\n", __func__, dlci->adaption); return -EINVAL; } msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype); if (!msg) { pr_err("%s: gsm_data_alloc error", __func__); return -ENOMEM; } dp = msg->data; switch (dlci->adaption) { case 1: /* Unstructured */ break; case 2: /* Unstructured with modem bits. */ if (brk == 0) { *dp++ = (gsm_encode_modem(dlci) << 1) | EA; } else { *dp++ = gsm_encode_modem(dlci) << 1; *dp++ = (brk << 4) | 2 | EA; /* Length, Break, EA */ } break; default: /* Handled above */ break; } __gsm_data_queue(dlci, msg); return size; } /** * gsm_dlci_data_sweep - look for data to send * @gsm: the GSM mux * * Sweep the GSM mux channels in priority order looking for ones with * data to send. We could do with optimising this scan a bit. We aim * to fill the queue totally or up to TX_THRESH_HI bytes. Once we hit * TX_THRESH_LO we get called again * * FIXME: We should round robin between groups and in theory you can * renegotiate DLCI priorities with optional stuff. Needs optimising. */ static int gsm_dlci_data_sweep(struct gsm_mux *gsm) { /* Priority ordering: We should do priority with RR of the groups */ int i, len, ret = 0; bool sent; struct gsm_dlci *dlci; while (gsm->tx_bytes < TX_THRESH_HI) { for (sent = false, i = 1; i < NUM_DLCI; i++) { dlci = gsm->dlci[i]; /* skip unused or blocked channel */ if (!dlci || dlci->constipated) continue; /* skip channels with invalid state */ if (dlci->state != DLCI_OPEN) continue; /* count the sent data per adaption */ if (dlci->adaption < 3 && !dlci->net) len = gsm_dlci_data_output(gsm, dlci); else len = gsm_dlci_data_output_framed(gsm, dlci); /* on error exit */ if (len < 0) return ret; if (len > 0) { ret++; sent = true; /* The lower DLCs can starve the higher DLCs! */ break; } /* try next */ } if (!sent) break; } return ret; } /** * gsm_dlci_data_kick - transmit if possible * @dlci: DLCI to kick * * Transmit data from this DLCI if the queue is empty. We can't rely on * a tty wakeup except when we filled the pipe so we need to fire off * new data ourselves in other cases. */ static void gsm_dlci_data_kick(struct gsm_dlci *dlci) { unsigned long flags; int sweep; if (dlci->constipated) return; spin_lock_irqsave(&dlci->gsm->tx_lock, flags); /* If we have nothing running then we need to fire up */ sweep = (dlci->gsm->tx_bytes < TX_THRESH_LO); if (dlci->gsm->tx_bytes == 0) { if (dlci->net) gsm_dlci_data_output_framed(dlci->gsm, dlci); else gsm_dlci_data_output(dlci->gsm, dlci); } if (sweep) gsm_dlci_data_sweep(dlci->gsm); spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags); } /* * Control message processing */ /** * gsm_control_command - send a command frame to a control * @gsm: gsm channel * @cmd: the command to use * @data: data to follow encoded info * @dlen: length of data * * Encode up and queue a UI/UIH frame containing our command. */ static int gsm_control_command(struct gsm_mux *gsm, int cmd, const u8 *data, int dlen) { struct gsm_msg *msg; struct gsm_dlci *dlci = gsm->dlci[0]; msg = gsm_data_alloc(gsm, 0, dlen + 2, dlci->ftype); if (msg == NULL) return -ENOMEM; msg->data[0] = (cmd << 1) | CR | EA; /* Set C/R */ msg->data[1] = (dlen << 1) | EA; memcpy(msg->data + 2, data, dlen); gsm_data_queue(dlci, msg); return 0; } /** * gsm_control_reply - send a response frame to a control * @gsm: gsm channel * @cmd: the command to use * @data: data to follow encoded info * @dlen: length of data * * Encode up and queue a UI/UIH frame containing our response. */ static void gsm_control_reply(struct gsm_mux *gsm, int cmd, const u8 *data, int dlen) { struct gsm_msg *msg; struct gsm_dlci *dlci = gsm->dlci[0]; msg = gsm_data_alloc(gsm, 0, dlen + 2, dlci->ftype); if (msg == NULL) return; msg->data[0] = (cmd & 0xFE) << 1 | EA; /* Clear C/R */ msg->data[1] = (dlen << 1) | EA; memcpy(msg->data + 2, data, dlen); gsm_data_queue(dlci, msg); } /** * gsm_process_modem - process received modem status * @tty: virtual tty bound to the DLCI * @dlci: DLCI to affect * @modem: modem bits (full EA) * @slen: number of signal octets * * Used when a modem control message or line state inline in adaption * layer 2 is processed. Sort out the local modem state and throttles */ static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci, u32 modem, int slen) { int mlines = 0; u8 brk = 0; int fc; /* The modem status command can either contain one octet (V.24 signals) * or two octets (V.24 signals + break signals). This is specified in * section 5.4.6.3.7 of the 07.10 mux spec. */ if (slen == 1) modem = modem & 0x7f; else { brk = modem & 0x7f; modem = (modem >> 7) & 0x7f; } /* Flow control/ready to communicate */ fc = (modem & MDM_FC) || !(modem & MDM_RTR); if (fc && !dlci->constipated) { /* Need to throttle our output on this device */ dlci->constipated = true; } else if (!fc && dlci->constipated) { dlci->constipated = false; gsm_dlci_data_kick(dlci); } /* Map modem bits */ if (modem & MDM_RTC) mlines |= TIOCM_DSR | TIOCM_DTR; if (modem & MDM_RTR) mlines |= TIOCM_RTS | TIOCM_CTS; if (modem & MDM_IC) mlines |= TIOCM_RI; if (modem & MDM_DV) mlines |= TIOCM_CD; /* Carrier drop -> hangup */ if (tty) { if ((mlines & TIOCM_CD) == 0 && (dlci->modem_rx & TIOCM_CD)) if (!C_CLOCAL(tty)) tty_hangup(tty); } if (brk & 0x01) tty_insert_flip_char(&dlci->port, 0, TTY_BREAK); dlci->modem_rx = mlines; wake_up_interruptible(&dlci->gsm->event); } /** * gsm_process_negotiation - process received parameters * @gsm: GSM channel * @addr: DLCI address * @cr: command/response * @params: encoded parameters from the parameter negotiation message * * Used when the response for our parameter negotiation command was * received. */ static int gsm_process_negotiation(struct gsm_mux *gsm, unsigned int addr, unsigned int cr, const struct gsm_dlci_param_bits *params) { struct gsm_dlci *dlci = gsm->dlci[addr]; unsigned int ftype, i, adaption, prio, n1, k; i = FIELD_GET(PN_I_CL_FIELD_FTYPE, params->i_cl_bits); adaption = FIELD_GET(PN_I_CL_FIELD_ADAPTION, params->i_cl_bits) + 1; prio = FIELD_GET(PN_P_FIELD_PRIO, params->p_bits); n1 = FIELD_GET(PN_N_FIELD_N1, get_unaligned_le16(&params->n_bits)); k = FIELD_GET(PN_K_FIELD_K, params->k_bits); if (n1 < MIN_MTU) { if (debug & DBG_ERRORS) pr_info("%s N1 out of range in PN\n", __func__); return -EINVAL; } switch (i) { case 0x00: ftype = UIH; break; case 0x01: ftype = UI; break; case 0x02: /* I frames are not supported */ if (debug & DBG_ERRORS) pr_info("%s unsupported I frame request in PN\n", __func__); gsm->unsupported++; return -EINVAL; default: if (debug & DBG_ERRORS) pr_info("%s i out of range in PN\n", __func__); return -EINVAL; } if (!cr && gsm->initiator) { if (adaption != dlci->adaption) { if (debug & DBG_ERRORS) pr_info("%s invalid adaption %d in PN\n", __func__, adaption); return -EINVAL; } if (prio != dlci->prio) { if (debug & DBG_ERRORS) pr_info("%s invalid priority %d in PN", __func__, prio); return -EINVAL; } if (n1 > gsm->mru || n1 > dlci->mtu) { /* We requested a frame size but the other party wants * to send larger frames. The standard allows only a * smaller response value than requested (5.4.6.3.1). */ if (debug & DBG_ERRORS) pr_info("%s invalid N1 %d in PN\n", __func__, n1); return -EINVAL; } dlci->mtu = n1; if (ftype != dlci->ftype) { if (debug & DBG_ERRORS) pr_info("%s invalid i %d in PN\n", __func__, i); return -EINVAL; } if (ftype != UI && ftype != UIH && k > dlci->k) { if (debug & DBG_ERRORS) pr_info("%s invalid k %d in PN\n", __func__, k); return -EINVAL; } dlci->k = k; } else if (cr && !gsm->initiator) { /* Only convergence layer type 1 and 2 are supported. */ if (adaption != 1 && adaption != 2) { if (debug & DBG_ERRORS) pr_info("%s invalid adaption %d in PN\n", __func__, adaption); return -EINVAL; } dlci->adaption = adaption; if (n1 > gsm->mru) { /* Propose a smaller value */ dlci->mtu = gsm->mru; } else if (n1 > MAX_MTU) { /* Propose a smaller value */ dlci->mtu = MAX_MTU; } else { dlci->mtu = n1; } dlci->prio = prio; dlci->ftype = ftype; dlci->k = k; } else { return -EINVAL; } return 0; } /** * gsm_control_modem - modem status received * @gsm: GSM channel * @data: data following command * @clen: command length * * We have received a modem status control message. This is used by * the GSM mux protocol to pass virtual modem line status and optionally * to indicate break signals. Unpack it, convert to Linux representation * and if need be stuff a break message down the tty. */ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen) { unsigned int addr = 0; unsigned int modem = 0; struct gsm_dlci *dlci; int len = clen; int cl = clen; const u8 *dp = data; struct tty_struct *tty; len = gsm_read_ea_val(&addr, data, cl); if (len < 1) return; addr >>= 1; /* Closed port, or invalid ? */ if (addr == 0 || addr >= NUM_DLCI || gsm->dlci[addr] == NULL) return; dlci = gsm->dlci[addr]; /* Must be at least one byte following the EA */ if ((cl - len) < 1) return; dp += len; cl -= len; /* get the modem status */ len = gsm_read_ea_val(&modem, dp, cl); if (len < 1) return; tty = tty_port_tty_get(&dlci->port); gsm_process_modem(tty, dlci, modem, cl); if (tty) { tty_wakeup(tty); tty_kref_put(tty); } gsm_control_reply(gsm, CMD_MSC, data, clen); } /** * gsm_control_negotiation - parameter negotiation received * @gsm: GSM channel * @cr: command/response flag * @data: data following command * @dlen: data length * * We have received a parameter negotiation message. This is used by * the GSM mux protocol to configure protocol parameters for a new DLCI. */ static void gsm_control_negotiation(struct gsm_mux *gsm, unsigned int cr, const u8 *data, unsigned int dlen) { unsigned int addr; struct gsm_dlci_param_bits pn_reply; struct gsm_dlci *dlci; struct gsm_dlci_param_bits *params; if (dlen < sizeof(struct gsm_dlci_param_bits)) { gsm->open_error++; return; } /* Invalid DLCI? */ params = (struct gsm_dlci_param_bits *)data; addr = FIELD_GET(PN_D_FIELD_DLCI, params->d_bits); if (addr == 0 || addr >= NUM_DLCI || !gsm->dlci[addr]) { gsm->open_error++; return; } dlci = gsm->dlci[addr]; /* Too late for parameter negotiation? */ if ((!cr && dlci->state == DLCI_OPENING) || dlci->state == DLCI_OPEN) { gsm->open_error++; return; } /* Process the received parameters */ if (gsm_process_negotiation(gsm, addr, cr, params) != 0) { /* Negotiation failed. Close the link. */ if (debug & DBG_ERRORS) pr_info("%s PN failed\n", __func__); gsm->open_error++; gsm_dlci_close(dlci); return; } if (cr) { /* Reply command with accepted parameters. */ if (gsm_encode_params(dlci, &pn_reply) == 0) gsm_control_reply(gsm, CMD_PN, (const u8 *)&pn_reply, sizeof(pn_reply)); else if (debug & DBG_ERRORS) pr_info("%s PN invalid\n", __func__); } else if (dlci->state == DLCI_CONFIGURE) { /* Proceed with link setup by sending SABM before UA */ dlci->state = DLCI_OPENING; gsm_command(gsm, dlci->addr, SABM|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } else { if (debug & DBG_ERRORS) pr_info("%s PN in invalid state\n", __func__); gsm->open_error++; } } /** * gsm_control_rls - remote line status * @gsm: GSM channel * @data: data bytes * @clen: data length * * The modem sends us a two byte message on the control channel whenever * it wishes to send us an error state from the virtual link. Stuff * this into the uplink tty if present */ static void gsm_control_rls(struct gsm_mux *gsm, const u8 *data, int clen) { struct tty_port *port; unsigned int addr = 0; u8 bits; int len = clen; const u8 *dp = data; while (gsm_read_ea(&addr, *dp++) == 0) { len--; if (len == 0) return; } /* Must be at least one byte following ea */ len--; if (len <= 0) return; addr >>= 1; /* Closed port, or invalid ? */ if (addr == 0 || addr >= NUM_DLCI || gsm->dlci[addr] == NULL) return; /* No error ? */ bits = *dp; if ((bits & 1) == 0) return; port = &gsm->dlci[addr]->port; if (bits & 2) tty_insert_flip_char(port, 0, TTY_OVERRUN); if (bits & 4) tty_insert_flip_char(port, 0, TTY_PARITY); if (bits & 8) tty_insert_flip_char(port, 0, TTY_FRAME); tty_flip_buffer_push(port); gsm_control_reply(gsm, CMD_RLS, data, clen); } static void gsm_dlci_begin_close(struct gsm_dlci *dlci); /** * gsm_control_message - DLCI 0 control processing * @gsm: our GSM mux * @command: the command EA * @data: data beyond the command/length EAs * @clen: length * * Input processor for control messages from the other end of the link. * Processes the incoming request and queues a response frame or an * NSC response if not supported */ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command, const u8 *data, int clen) { u8 buf[1]; switch (command) { case CMD_CLD: { struct gsm_dlci *dlci = gsm->dlci[0]; /* Modem wishes to close down */ if (dlci) { dlci->dead = true; gsm->dead = true; gsm_dlci_begin_close(dlci); } } break; case CMD_TEST: /* Modem wishes to test, reply with the data */ gsm_control_reply(gsm, CMD_TEST, data, clen); break; case CMD_FCON: /* Modem can accept data again */ gsm->constipated = false; gsm_control_reply(gsm, CMD_FCON, NULL, 0); /* Kick the link in case it is idling */ gsmld_write_trigger(gsm); break; case CMD_FCOFF: /* Modem wants us to STFU */ gsm->constipated = true; gsm_control_reply(gsm, CMD_FCOFF, NULL, 0); break; case CMD_MSC: /* Out of band modem line change indicator for a DLCI */ gsm_control_modem(gsm, data, clen); break; case CMD_RLS: /* Out of band error reception for a DLCI */ gsm_control_rls(gsm, data, clen); break; case CMD_PSC: /* Modem wishes to enter power saving state */ gsm_control_reply(gsm, CMD_PSC, NULL, 0); break; /* Optional commands */ case CMD_PN: /* Modem sends a parameter negotiation command */ gsm_control_negotiation(gsm, 1, data, clen); break; /* Optional unsupported commands */ case CMD_RPN: /* Remote port negotiation */ case CMD_SNC: /* Service negotiation command */ gsm->unsupported++; fallthrough; default: /* Reply to bad commands with an NSC */ buf[0] = command; gsm_control_reply(gsm, CMD_NSC, buf, 1); break; } } /** * gsm_control_response - process a response to our control * @gsm: our GSM mux * @command: the command (response) EA * @data: data beyond the command/length EA * @clen: length * * Process a response to an outstanding command. We only allow a single * control message in flight so this is fairly easy. All the clean up * is done by the caller, we just update the fields, flag it as done * and return */ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command, const u8 *data, int clen) { struct gsm_control *ctrl; struct gsm_dlci *dlci; unsigned long flags; spin_lock_irqsave(&gsm->control_lock, flags); ctrl = gsm->pending_cmd; dlci = gsm->dlci[0]; command |= 1; /* Does the reply match our command */ if (ctrl != NULL && (command == ctrl->cmd || command == CMD_NSC)) { /* Our command was replied to, kill the retry timer */ timer_delete(&gsm->t2_timer); gsm->pending_cmd = NULL; /* Rejected by the other end */ if (command == CMD_NSC) ctrl->error = -EOPNOTSUPP; ctrl->done = 1; wake_up(&gsm->event); /* Or did we receive the PN response to our PN command */ } else if (command == CMD_PN) { gsm_control_negotiation(gsm, 0, data, clen); /* Or did we receive the TEST response to our TEST command */ } else if (command == CMD_TEST && clen == 1 && *data == gsm->ka_num) { gsm->ka_retries = -1; /* trigger new keep-alive message */ if (dlci && !dlci->dead) mod_timer(&gsm->ka_timer, jiffies + gsm->keep_alive * HZ / 100); } spin_unlock_irqrestore(&gsm->control_lock, flags); } /** * gsm_control_keep_alive - check timeout or start keep-alive * @t: timer contained in our gsm object * * Called off the keep-alive timer expiry signaling that our link * partner is not responding anymore. Link will be closed. * This is also called to startup our timer. */ static void gsm_control_keep_alive(struct timer_list *t) { struct gsm_mux *gsm = timer_container_of(gsm, t, ka_timer); unsigned long flags; spin_lock_irqsave(&gsm->control_lock, flags); if (gsm->ka_num && gsm->ka_retries == 0) { /* Keep-alive expired -> close the link */ if (debug & DBG_ERRORS) pr_debug("%s keep-alive timed out\n", __func__); spin_unlock_irqrestore(&gsm->control_lock, flags); if (gsm->dlci[0]) gsm_dlci_begin_close(gsm->dlci[0]); return; } else if (gsm->keep_alive && gsm->dlci[0] && !gsm->dlci[0]->dead) { if (gsm->ka_retries > 0) { /* T2 expired for keep-alive -> resend */ gsm->ka_retries--; } else { /* Start keep-alive timer */ gsm->ka_num++; if (!gsm->ka_num) gsm->ka_num++; gsm->ka_retries = (signed int)gsm->n2; } gsm_control_command(gsm, CMD_TEST, &gsm->ka_num, sizeof(gsm->ka_num)); mod_timer(&gsm->ka_timer, jiffies + gsm->t2 * HZ / 100); } spin_unlock_irqrestore(&gsm->control_lock, flags); } /** * gsm_control_transmit - send control packet * @gsm: gsm mux * @ctrl: frame to send * * Send out a pending control command (called under control lock) */ static void gsm_control_transmit(struct gsm_mux *gsm, struct gsm_control *ctrl) { gsm_control_command(gsm, ctrl->cmd, ctrl->data, ctrl->len); } /** * gsm_control_retransmit - retransmit a control frame * @t: timer contained in our gsm object * * Called off the T2 timer expiry in order to retransmit control frames * that have been lost in the system somewhere. The control_lock protects * us from colliding with another sender or a receive completion event. * In that situation the timer may still occur in a small window but * gsm->pending_cmd will be NULL and we just let the timer expire. */ static void gsm_control_retransmit(struct timer_list *t) { struct gsm_mux *gsm = timer_container_of(gsm, t, t2_timer); struct gsm_control *ctrl; unsigned long flags; spin_lock_irqsave(&gsm->control_lock, flags); ctrl = gsm->pending_cmd; if (ctrl) { if (gsm->cretries == 0 || !gsm->dlci[0] || gsm->dlci[0]->dead) { gsm->pending_cmd = NULL; ctrl->error = -ETIMEDOUT; ctrl->done = 1; spin_unlock_irqrestore(&gsm->control_lock, flags); wake_up(&gsm->event); return; } gsm->cretries--; gsm_control_transmit(gsm, ctrl); mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100); } spin_unlock_irqrestore(&gsm->control_lock, flags); } /** * gsm_control_send - send a control frame on DLCI 0 * @gsm: the GSM channel * @command: command to send including CR bit * @data: bytes of data (must be kmalloced) * @clen: length of the block to send * * Queue and dispatch a control command. Only one command can be * active at a time. In theory more can be outstanding but the matching * gets really complicated so for now stick to one outstanding. */ static struct gsm_control *gsm_control_send(struct gsm_mux *gsm, unsigned int command, u8 *data, int clen) { struct gsm_control *ctrl = kzalloc(sizeof(struct gsm_control), GFP_ATOMIC); unsigned long flags; if (ctrl == NULL) return NULL; retry: wait_event(gsm->event, gsm->pending_cmd == NULL); spin_lock_irqsave(&gsm->control_lock, flags); if (gsm->pending_cmd != NULL) { spin_unlock_irqrestore(&gsm->control_lock, flags); goto retry; } ctrl->cmd = command; ctrl->data = data; ctrl->len = clen; gsm->pending_cmd = ctrl; /* If DLCI0 is in ADM mode skip retries, it won't respond */ if (gsm->dlci[0]->mode == DLCI_MODE_ADM) gsm->cretries = 0; else gsm->cretries = gsm->n2; mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100); gsm_control_transmit(gsm, ctrl); spin_unlock_irqrestore(&gsm->control_lock, flags); return ctrl; } /** * gsm_control_wait - wait for a control to finish * @gsm: GSM mux * @control: control we are waiting on * * Waits for the control to complete or time out. Frees any used * resources and returns 0 for success, or an error if the remote * rejected or ignored the request. */ static int gsm_control_wait(struct gsm_mux *gsm, struct gsm_control *control) { int err; wait_event(gsm->event, control->done == 1); err = control->error; kfree(control); return err; } /* * DLCI level handling: Needs krefs */ /* * State transitions and timers */ /** * gsm_dlci_close - a DLCI has closed * @dlci: DLCI that closed * * Perform processing when moving a DLCI into closed state. If there * is an attached tty this is hung up */ static void gsm_dlci_close(struct gsm_dlci *dlci) { timer_delete(&dlci->t1); if (debug & DBG_ERRORS) pr_debug("DLCI %d goes closed.\n", dlci->addr); dlci->state = DLCI_CLOSED; /* Prevent us from sending data before the link is up again */ dlci->constipated = true; if (dlci->addr != 0) { tty_port_tty_hangup(&dlci->port, false); gsm_dlci_clear_queues(dlci->gsm, dlci); /* Ensure that gsmtty_open() can return. */ tty_port_set_initialized(&dlci->port, false); wake_up_interruptible(&dlci->port.open_wait); } else { timer_delete(&dlci->gsm->ka_timer); dlci->gsm->dead = true; } /* A DLCI 0 close is a MUX termination so we need to kick that back to userspace somehow */ gsm_dlci_data_kick(dlci); wake_up_all(&dlci->gsm->event); } /** * gsm_dlci_open - a DLCI has opened * @dlci: DLCI that opened * * Perform processing when moving a DLCI into open state. */ static void gsm_dlci_open(struct gsm_dlci *dlci) { struct gsm_mux *gsm = dlci->gsm; /* Note that SABM UA .. SABM UA first UA lost can mean that we go open -> open */ timer_delete(&dlci->t1); /* This will let a tty open continue */ dlci->state = DLCI_OPEN; dlci->constipated = false; if (debug & DBG_ERRORS) pr_debug("DLCI %d goes open.\n", dlci->addr); /* Send current modem state */ if (dlci->addr) { gsm_modem_send_initial_msc(dlci); } else { /* Start keep-alive control */ gsm->ka_num = 0; gsm->ka_retries = -1; mod_timer(&gsm->ka_timer, jiffies + gsm->keep_alive * HZ / 100); } gsm_dlci_data_kick(dlci); wake_up(&dlci->gsm->event); } /** * gsm_dlci_negotiate - start parameter negotiation * @dlci: DLCI to open * * Starts the parameter negotiation for the new DLCI. This needs to be done * before the DLCI initialized the channel via SABM. */ static int gsm_dlci_negotiate(struct gsm_dlci *dlci) { struct gsm_mux *gsm = dlci->gsm; struct gsm_dlci_param_bits params; int ret; ret = gsm_encode_params(dlci, &params); if (ret != 0) return ret; /* We cannot asynchronous wait for the command response with * gsm_command() and gsm_control_wait() at this point. */ ret = gsm_control_command(gsm, CMD_PN, (const u8 *)&params, sizeof(params)); return ret; } /** * gsm_dlci_t1 - T1 timer expiry * @t: timer contained in the DLCI that opened * * The T1 timer handles retransmits of control frames (essentially of * SABM and DISC). We resend the command until the retry count runs out * in which case an opening port goes back to closed and a closing port * is simply put into closed state (any further frames from the other * end will get a DM response) * * Some control dlci can stay in ADM mode with other dlci working just * fine. In that case we can just keep the control dlci open after the * DLCI_OPENING receives DM. */ static void gsm_dlci_t1(struct timer_list *t) { struct gsm_dlci *dlci = timer_container_of(dlci, t, t1); struct gsm_mux *gsm = dlci->gsm; switch (dlci->state) { case DLCI_CONFIGURE: if (dlci->retries && gsm_dlci_negotiate(dlci) == 0) { dlci->retries--; mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } else { gsm->open_error++; gsm_dlci_begin_close(dlci); /* prevent half open link */ } break; case DLCI_OPENING: if (!dlci->addr && gsm->control == (DM | PF)) { if (debug & DBG_ERRORS) pr_info("DLCI 0 opening in ADM mode.\n"); dlci->mode = DLCI_MODE_ADM; gsm_dlci_open(dlci); } else if (dlci->retries) { if (!dlci->addr || !gsm->dlci[0] || gsm->dlci[0]->state != DLCI_OPENING) { dlci->retries--; gsm_command(dlci->gsm, dlci->addr, SABM|PF); } mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } else { gsm->open_error++; gsm_dlci_begin_close(dlci); /* prevent half open link */ } break; case DLCI_CLOSING: if (dlci->retries) { dlci->retries--; gsm_command(dlci->gsm, dlci->addr, DISC|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); } else gsm_dlci_close(dlci); break; default: pr_debug("%s: unhandled state: %d\n", __func__, dlci->state); break; } } /** * gsm_dlci_begin_open - start channel open procedure * @dlci: DLCI to open * * Commence opening a DLCI from the Linux side. We issue SABM messages * to the modem which should then reply with a UA or ADM, at which point * we will move into open state. Opening is done asynchronously with retry * running off timers and the responses. * Parameter negotiation is performed before SABM if required. */ static void gsm_dlci_begin_open(struct gsm_dlci *dlci) { struct gsm_mux *gsm = dlci ? dlci->gsm : NULL; bool need_pn = false; if (!gsm) return; if (dlci->addr != 0) { if (gsm->adaption != 1 || gsm->adaption != dlci->adaption) need_pn = true; if (dlci->prio != (roundup(dlci->addr + 1, 8) - 1)) need_pn = true; if (gsm->ftype != dlci->ftype) need_pn = true; } switch (dlci->state) { case DLCI_CLOSED: case DLCI_WAITING_CONFIG: case DLCI_CLOSING: dlci->retries = gsm->n2; if (!need_pn) { dlci->state = DLCI_OPENING; if (!dlci->addr || !gsm->dlci[0] || gsm->dlci[0]->state != DLCI_OPENING) gsm_command(gsm, dlci->addr, SABM|PF); } else { /* Configure DLCI before setup */ dlci->state = DLCI_CONFIGURE; if (gsm_dlci_negotiate(dlci) != 0) { gsm_dlci_close(dlci); return; } } mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); break; default: break; } } /** * gsm_dlci_set_opening - change state to opening * @dlci: DLCI to open * * Change internal state to wait for DLCI open from initiator side. * We set off timers and responses upon reception of an SABM. */ static void gsm_dlci_set_opening(struct gsm_dlci *dlci) { switch (dlci->state) { case DLCI_CLOSED: case DLCI_WAITING_CONFIG: case DLCI_CLOSING: dlci->state = DLCI_OPENING; break; default: break; } } /** * gsm_dlci_set_wait_config - wait for channel configuration * @dlci: DLCI to configure * * Wait for a DLCI configuration from the application. */ static void gsm_dlci_set_wait_config(struct gsm_dlci *dlci) { switch (dlci->state) { case DLCI_CLOSED: case DLCI_CLOSING: dlci->state = DLCI_WAITING_CONFIG; break; default: break; } } /** * gsm_dlci_begin_close - start channel open procedure * @dlci: DLCI to open * * Commence closing a DLCI from the Linux side. We issue DISC messages * to the modem which should then reply with a UA, at which point we * will move into closed state. Closing is done asynchronously with retry * off timers. We may also receive a DM reply from the other end which * indicates the channel was already closed. */ static void gsm_dlci_begin_close(struct gsm_dlci *dlci) { struct gsm_mux *gsm = dlci->gsm; if (dlci->state == DLCI_CLOSED || dlci->state == DLCI_CLOSING) return; dlci->retries = gsm->n2; dlci->state = DLCI_CLOSING; gsm_command(dlci->gsm, dlci->addr, DISC|PF); mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100); wake_up_interruptible(&gsm->event); } /** * gsm_dlci_data - data arrived * @dlci: channel * @data: block of bytes received * @clen: length of received block * * A UI or UIH frame has arrived which contains data for a channel * other than the control channel. If the relevant virtual tty is * open we shovel the bits down it, if not we drop them. */ static void gsm_dlci_data(struct gsm_dlci *dlci, const u8 *data, int clen) { /* krefs .. */ struct tty_port *port = &dlci->port; struct tty_struct *tty; unsigned int modem = 0; int len; if (debug & DBG_TTY) pr_debug("%d bytes for tty\n", clen); switch (dlci->adaption) { /* Unsupported types */ case 4: /* Packetised interruptible data */ break; case 3: /* Packetised uininterruptible voice/data */ break; case 2: /* Asynchronous serial with line state in each frame */ len = gsm_read_ea_val(&modem, data, clen); if (len < 1) return; tty = tty_port_tty_get(port); if (tty) { gsm_process_modem(tty, dlci, modem, len); tty_wakeup(tty); tty_kref_put(tty); } /* Skip processed modem data */ data += len; clen -= len; fallthrough; case 1: /* Line state will go via DLCI 0 controls only */ default: tty_insert_flip_string(port, data, clen); tty_flip_buffer_push(port); } } /** * gsm_dlci_command - data arrived on control channel * @dlci: channel * @data: block of bytes received * @len: length of received block * * A UI or UIH frame has arrived which contains data for DLCI 0 the * control channel. This should contain a command EA followed by * control data bytes. The command EA contains a command/response bit * and we divide up the work accordingly. */ static void gsm_dlci_command(struct gsm_dlci *dlci, const u8 *data, int len) { /* See what command is involved */ unsigned int command = 0; unsigned int clen = 0; unsigned int dlen; /* read the command */ dlen = gsm_read_ea_val(&command, data, len); len -= dlen; data += dlen; /* read any control data */ dlen = gsm_read_ea_val(&clen, data, len); len -= dlen; data += dlen; /* Malformed command? */ if (clen > len) { dlci->gsm->malformed++; return; } if (command & 1) gsm_control_message(dlci->gsm, command, data, clen); else gsm_control_response(dlci->gsm, command, data, clen); } /** * gsm_kick_timer - transmit if possible * @t: timer contained in our gsm object * * Transmit data from DLCIs if the queue is empty. We can't rely on * a tty wakeup except when we filled the pipe so we need to fire off * new data ourselves in other cases. */ static void gsm_kick_timer(struct timer_list *t) { struct gsm_mux *gsm = timer_container_of(gsm, t, kick_timer); unsigned long flags; int sent = 0; spin_lock_irqsave(&gsm->tx_lock, flags); /* If we have nothing running then we need to fire up */ if (gsm->tx_bytes < TX_THRESH_LO) sent = gsm_dlci_data_sweep(gsm); spin_unlock_irqrestore(&gsm->tx_lock, flags); if (sent && debug & DBG_DATA) pr_info("%s TX queue stalled\n", __func__); } /** * gsm_dlci_copy_config_values - copy DLCI configuration * @dlci: source DLCI * @dc: configuration structure to fill */ static void gsm_dlci_copy_config_values(struct gsm_dlci *dlci, struct gsm_dlci_config *dc) { memset(dc, 0, sizeof(*dc)); dc->channel = (u32)dlci->addr; dc->adaption = (u32)dlci->adaption; dc->mtu = (u32)dlci->mtu; dc->priority = (u32)dlci->prio; if (dlci->ftype == UIH) dc->i = 1; else dc->i = 2; dc->k = (u32)dlci->k; } /** * gsm_dlci_config - configure DLCI from configuration * @dlci: DLCI to configure * @dc: DLCI configuration * @open: open DLCI after configuration? */ static int gsm_dlci_config(struct gsm_dlci *dlci, struct gsm_dlci_config *dc, int open) { struct gsm_mux *gsm; bool need_restart = false; bool need_open = false; unsigned int i; /* * Check that userspace doesn't put stuff in here to prevent breakages * in the future. */ for (i = 0; i < ARRAY_SIZE(dc->reserved); i++) if (dc->reserved[i]) return -EINVAL; if (!dlci) return -EINVAL; gsm = dlci->gsm; /* Stuff we don't support yet - I frame transport */ if (dc->adaption != 1 && dc->adaption != 2) return -EOPNOTSUPP; if (dc->mtu > MAX_MTU || dc->mtu < MIN_MTU || dc->mtu > gsm->mru) return -EINVAL; if (dc->priority >= 64) return -EINVAL; if (dc->i == 0 || dc->i > 2) /* UIH and UI only */ return -EINVAL; if (dc->k > 7) return -EINVAL; if (dc->flags & ~GSM_FL_RESTART) /* allow future extensions */ return -EINVAL; /* * See what is needed for reconfiguration */ /* Framing fields */ if (dc->adaption != dlci->adaption) need_restart = true; if (dc->mtu != dlci->mtu) need_restart = true; if (dc->i != dlci->ftype) need_restart = true; /* Requires care */ if (dc->priority != dlci->prio) need_restart = true; if (dc->flags & GSM_FL_RESTART) need_restart = true; if ((open && gsm->wait_config) || need_restart) need_open = true; if (dlci->state == DLCI_WAITING_CONFIG) { need_restart = false; need_open = true; } /* * Close down what is needed, restart and initiate the new * configuration. */ if (need_restart) { gsm_dlci_begin_close(dlci); wait_event_interruptible(gsm->event, dlci->state == DLCI_CLOSED); if (signal_pending(current)) return -EINTR; } /* * Setup the new configuration values */ dlci->adaption = (int)dc->adaption; if (dc->mtu) dlci->mtu = (unsigned int)dc->mtu; else dlci->mtu = gsm->mtu; if (dc->priority) dlci->prio = (u8)dc->priority; else dlci->prio = roundup(dlci->addr + 1, 8) - 1; if (dc->i == 1) dlci->ftype = UIH; else if (dc->i == 2) dlci->ftype = UI; if (dc->k) dlci->k = (u8)dc->k; else dlci->k = gsm->k; if (need_open) { if (gsm->initiator) gsm_dlci_begin_open(dlci); else gsm_dlci_set_opening(dlci); } return 0; } /* * Allocate/Free DLCI channels */ /** * gsm_dlci_alloc - allocate a DLCI * @gsm: GSM mux * @addr: address of the DLCI * * Allocate and install a new DLCI object into the GSM mux. * * FIXME: review locking races */ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr) { struct gsm_dlci *dlci = kzalloc(sizeof(struct gsm_dlci), GFP_ATOMIC); if (dlci == NULL) return NULL; spin_lock_init(&dlci->lock); mutex_init(&dlci->mutex); if (kfifo_alloc(&dlci->fifo, TX_SIZE, GFP_KERNEL) < 0) { kfree(dlci); return NULL; } skb_queue_head_init(&dlci->skb_list); timer_setup(&dlci->t1, gsm_dlci_t1, 0); tty_port_init(&dlci->port); dlci->port.ops = &gsm_port_ops; dlci->gsm = gsm; dlci->addr = addr; dlci->adaption = gsm->adaption; dlci->mtu = gsm->mtu; if (addr == 0) dlci->prio = 0; else dlci->prio = roundup(addr + 1, 8) - 1; dlci->ftype = gsm->ftype; dlci->k = gsm->k; dlci->state = DLCI_CLOSED; if (addr) { dlci->data = gsm_dlci_data; /* Prevent us from sending data before the link is up */ dlci->constipated = true; } else { dlci->data = gsm_dlci_command; } gsm->dlci[addr] = dlci; return dlci; } /** * gsm_dlci_free - free DLCI * @port: tty port for DLCI to free * * Free up a DLCI. * * Can sleep. */ static void gsm_dlci_free(struct tty_port *port) { struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port); timer_shutdown_sync(&dlci->t1); dlci->gsm->dlci[dlci->addr] = NULL; kfifo_free(&dlci->fifo); while ((dlci->skb = skb_dequeue(&dlci->skb_list))) dev_kfree_skb(dlci->skb); kfree(dlci); } static inline void dlci_get(struct gsm_dlci *dlci) { tty_port_get(&dlci->port); } static inline void dlci_put(struct gsm_dlci *dlci) { tty_port_put(&dlci->port); } static void gsm_destroy_network(struct gsm_dlci *dlci); /** * gsm_dlci_release - release DLCI * @dlci: DLCI to destroy * * Release a DLCI. Actual free is deferred until either * mux is closed or tty is closed - whichever is last. * * Can sleep. */ static void gsm_dlci_release(struct gsm_dlci *dlci) { struct tty_struct *tty = tty_port_tty_get(&dlci->port); if (tty) { mutex_lock(&dlci->mutex); gsm_destroy_network(dlci); mutex_unlock(&dlci->mutex); /* We cannot use tty_hangup() because in tty_kref_put() the tty * driver assumes that the hangup queue is free and reuses it to * queue release_one_tty() -> NULL pointer panic in * process_one_work(). */ tty_vhangup(tty); tty_port_tty_set(&dlci->port, NULL); tty_kref_put(tty); } dlci->state = DLCI_CLOSED; dlci_put(dlci); } /* * LAPBish link layer logic */ /** * gsm_queue - a GSM frame is ready to process * @gsm: pointer to our gsm mux * * At this point in time a frame has arrived and been demangled from * the line encoding. All the differences between the encodings have * been handled below us and the frame is unpacked into the structures. * The fcs holds the header FCS but any data FCS must be added here. */ static void gsm_queue(struct gsm_mux *gsm) { struct gsm_dlci *dlci; u8 cr; int address; if (gsm->fcs != GOOD_FCS) { gsm->bad_fcs++; if (debug & DBG_DATA) pr_debug("BAD FCS %02x\n", gsm->fcs); return; } address = gsm->address >> 1; if (address >= NUM_DLCI) goto invalid; cr = gsm->address & 1; /* C/R bit */ cr ^= gsm->initiator ? 0 : 1; /* Flip so 1 always means command */ gsm_print_packet("<--", address, cr, gsm->control, gsm->buf, gsm->len); dlci = gsm->dlci[address]; switch (gsm->control) { case SABM|PF: if (cr == 1) { gsm->open_error++; goto invalid; } if (dlci == NULL) dlci = gsm_dlci_alloc(gsm, address); if (dlci == NULL) { gsm->open_error++; return; } if (dlci->dead) gsm_response(gsm, address, DM|PF); else { gsm_response(gsm, address, UA|PF); gsm_dlci_open(dlci); } break; case DISC|PF: if (cr == 1) goto invalid; if (dlci == NULL || dlci->state == DLCI_CLOSED) { gsm_response(gsm, address, DM|PF); return; } /* Real close complete */ gsm_response(gsm, address, UA|PF); gsm_dlci_close(dlci); break; case UA|PF: if (cr == 0 || dlci == NULL) break; switch (dlci->state) { case DLCI_CLOSING: gsm_dlci_close(dlci); break; case DLCI_OPENING: gsm_dlci_open(dlci); break; default: pr_debug("%s: unhandled state: %d\n", __func__, dlci->state); break; } break; case DM: /* DM can be valid unsolicited */ case DM|PF: if (cr) goto invalid; if (dlci == NULL) return; gsm_dlci_close(dlci); break; case UI: case UI|PF: case UIH: case UIH|PF: if (dlci == NULL || dlci->state != DLCI_OPEN) { gsm_response(gsm, address, DM|PF); return; } dlci->data(dlci, gsm->buf, gsm->len); break; default: goto invalid; } return; invalid: gsm->malformed++; return; } /** * gsm0_receive_state_check_and_fix - check and correct receive state * @gsm: gsm data for this ldisc instance * * Ensures that the current receive state is valid for basic option mode. */ static void gsm0_receive_state_check_and_fix(struct gsm_mux *gsm) { switch (gsm->state) { case GSM_SEARCH: case GSM0_ADDRESS: case GSM0_CONTROL: case GSM0_LEN0: case GSM0_LEN1: case GSM0_DATA: case GSM0_FCS: case GSM0_SSOF: break; default: gsm->state = GSM_SEARCH; break; } } /** * gsm0_receive - perform processing for non-transparency * @gsm: gsm data for this ldisc instance * @c: character * * Receive bytes in gsm mode 0 */ static void gsm0_receive(struct gsm_mux *gsm, u8 c) { unsigned int len; gsm0_receive_state_check_and_fix(gsm); switch (gsm->state) { case GSM_SEARCH: /* SOF marker */ if (c == GSM0_SOF) { gsm->state = GSM0_ADDRESS; gsm->address = 0; gsm->len = 0; gsm->fcs = INIT_FCS; } break; case GSM0_ADDRESS: /* Address EA */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); if (gsm_read_ea(&gsm->address, c)) gsm->state = GSM0_CONTROL; break; case GSM0_CONTROL: /* Control Byte */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->control = c; gsm->state = GSM0_LEN0; break; case GSM0_LEN0: /* Length EA */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); if (gsm_read_ea(&gsm->len, c)) { if (gsm->len > gsm->mru) { gsm->bad_size++; gsm->state = GSM_SEARCH; break; } gsm->count = 0; if (!gsm->len) gsm->state = GSM0_FCS; else gsm->state = GSM0_DATA; break; } gsm->state = GSM0_LEN1; break; case GSM0_LEN1: gsm->fcs = gsm_fcs_add(gsm->fcs, c); len = c; gsm->len |= len << 7; if (gsm->len > gsm->mru) { gsm->bad_size++; gsm->state = GSM_SEARCH; break; } gsm->count = 0; if (!gsm->len) gsm->state = GSM0_FCS; else gsm->state = GSM0_DATA; break; case GSM0_DATA: /* Data */ gsm->buf[gsm->count++] = c; if (gsm->count >= MAX_MRU) { gsm->bad_size++; gsm->state = GSM_SEARCH; } else if (gsm->count >= gsm->len) { /* Calculate final FCS for UI frames over all data */ if ((gsm->control & ~PF) != UIH) { gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->count); } gsm->state = GSM0_FCS; } break; case GSM0_FCS: /* FCS follows the packet */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->state = GSM0_SSOF; break; case GSM0_SSOF: gsm->state = GSM_SEARCH; if (c == GSM0_SOF) gsm_queue(gsm); else gsm->bad_size++; break; default: pr_debug("%s: unhandled state: %d\n", __func__, gsm->state); break; } } /** * gsm1_receive_state_check_and_fix - check and correct receive state * @gsm: gsm data for this ldisc instance * * Ensures that the current receive state is valid for advanced option mode. */ static void gsm1_receive_state_check_and_fix(struct gsm_mux *gsm) { switch (gsm->state) { case GSM_SEARCH: case GSM1_START: case GSM1_ADDRESS: case GSM1_CONTROL: case GSM1_DATA: case GSM1_OVERRUN: break; default: gsm->state = GSM_SEARCH; break; } } /** * gsm1_receive - perform processing for non-transparency * @gsm: gsm data for this ldisc instance * @c: character * * Receive bytes in mode 1 (Advanced option) */ static void gsm1_receive(struct gsm_mux *gsm, u8 c) { gsm1_receive_state_check_and_fix(gsm); /* handle XON/XOFF */ if ((c & ISO_IEC_646_MASK) == XON) { gsm->constipated = true; return; } else if ((c & ISO_IEC_646_MASK) == XOFF) { gsm->constipated = false; /* Kick the link in case it is idling */ gsmld_write_trigger(gsm); return; } if (c == GSM1_SOF) { /* EOF is only valid in frame if we have got to the data state */ if (gsm->state == GSM1_DATA) { if (gsm->count < 1) { /* Missing FSC */ gsm->malformed++; gsm->state = GSM1_START; return; } /* Remove the FCS from data */ gsm->count--; if ((gsm->control & ~PF) != UIH) { /* Calculate final FCS for UI frames over all * data but FCS */ gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->count); } /* Add the FCS itself to test against GOOD_FCS */ gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->buf[gsm->count]); gsm->len = gsm->count; gsm_queue(gsm); gsm->state = GSM1_START; return; } /* Any partial frame was a runt so go back to start */ if (gsm->state != GSM1_START) { if (gsm->state != GSM_SEARCH) gsm->malformed++; gsm->state = GSM1_START; } /* A SOF in GSM_START means we are still reading idling or framing bytes */ return; } if (c == GSM1_ESCAPE) { gsm->escape = true; return; } /* Only an unescaped SOF gets us out of GSM search */ if (gsm->state == GSM_SEARCH) return; if (gsm->escape) { c ^= GSM1_ESCAPE_BITS; gsm->escape = false; } switch (gsm->state) { case GSM1_START: /* First byte after SOF */ gsm->address = 0; gsm->state = GSM1_ADDRESS; gsm->fcs = INIT_FCS; fallthrough; case GSM1_ADDRESS: /* Address continuation */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); if (gsm_read_ea(&gsm->address, c)) gsm->state = GSM1_CONTROL; break; case GSM1_CONTROL: /* Control Byte */ gsm->fcs = gsm_fcs_add(gsm->fcs, c); gsm->control = c; gsm->count = 0; gsm->state = GSM1_DATA; break; case GSM1_DATA: /* Data */ if (gsm->count > gsm->mru || gsm->count > MAX_MRU) { /* Allow one for the FCS */ gsm->state = GSM1_OVERRUN; gsm->bad_size++; } else gsm->buf[gsm->count++] = c; break; case GSM1_OVERRUN: /* Over-long - eg a dropped SOF */ break; default: pr_debug("%s: unhandled state: %d\n", __func__, gsm->state); break; } } /** * gsm_error - handle tty error * @gsm: ldisc data * * Handle an error in the receipt of data for a frame. Currently we just * go back to hunting for a SOF. * * FIXME: better diagnostics ? */ static void gsm_error(struct gsm_mux *gsm) { gsm->state = GSM_SEARCH; gsm->io_error++; } /** * gsm_cleanup_mux - generic GSM protocol cleanup * @gsm: our mux * @disc: disconnect link? * * Clean up the bits of the mux which are the same for all framing * protocols. Remove the mux from the mux table, stop all the timers * and then shut down each device hanging up the channels as we go. */ static void gsm_cleanup_mux(struct gsm_mux *gsm, bool disc) { int i; struct gsm_dlci *dlci; struct gsm_msg *txq, *ntxq; gsm->dead = true; mutex_lock(&gsm->mutex); dlci = gsm->dlci[0]; if (dlci) { if (disc && dlci->state != DLCI_CLOSED) { gsm_dlci_begin_close(dlci); wait_event(gsm->event, dlci->state == DLCI_CLOSED); } dlci->dead = true; } /* Finish outstanding timers, making sure they are done */ timer_delete_sync(&gsm->kick_timer); timer_delete_sync(&gsm->t2_timer); timer_delete_sync(&gsm->ka_timer); /* Finish writing to ldisc */ flush_work(&gsm->tx_work); /* Free up any link layer users and finally the control channel */ if (gsm->has_devices) { gsm_unregister_devices(gsm_tty_driver, gsm->num); gsm->has_devices = false; } for (i = NUM_DLCI - 1; i >= 0; i--) if (gsm->dlci[i]) gsm_dlci_release(gsm->dlci[i]); mutex_unlock(&gsm->mutex); /* Now wipe the queues */ tty_ldisc_flush(gsm->tty); guard(spinlock_irqsave)(&gsm->tx_lock); list_for_each_entry_safe(txq, ntxq, &gsm->tx_ctrl_list, list) kfree(txq); INIT_LIST_HEAD(&gsm->tx_ctrl_list); list_for_each_entry_safe(txq, ntxq, &gsm->tx_data_list, list) kfree(txq); INIT_LIST_HEAD(&gsm->tx_data_list); } /** * gsm_activate_mux - generic GSM setup * @gsm: our mux * * Set up the bits of the mux which are the same for all framing * protocols. Add the mux to the mux table so it can be opened and * finally kick off connecting to DLCI 0 on the modem. */ static int gsm_activate_mux(struct gsm_mux *gsm) { struct gsm_dlci *dlci; int ret; dlci = gsm_dlci_alloc(gsm, 0); if (dlci == NULL) return -ENOMEM; if (gsm->encoding == GSM_BASIC_OPT) gsm->receive = gsm0_receive; else gsm->receive = gsm1_receive; ret = gsm_register_devices(gsm_tty_driver, gsm->num); if (ret) return ret; gsm->has_devices = true; gsm->dead = false; /* Tty opens are now permissible */ return 0; } /** * gsm_free_mux - free up a mux * @gsm: mux to free * * Dispose of allocated resources for a dead mux */ static void gsm_free_mux(struct gsm_mux *gsm) { int i; for (i = 0; i < MAX_MUX; i++) { if (gsm == gsm_mux[i]) { gsm_mux[i] = NULL; break; } } mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); kfree(gsm); } /** * gsm_free_muxr - free up a mux * @ref: kreference to the mux to free * * Dispose of allocated resources for a dead mux */ static void gsm_free_muxr(struct kref *ref) { struct gsm_mux *gsm = container_of(ref, struct gsm_mux, ref); gsm_free_mux(gsm); } static inline void mux_get(struct gsm_mux *gsm) { unsigned long flags; spin_lock_irqsave(&gsm_mux_lock, flags); kref_get(&gsm->ref); spin_unlock_irqrestore(&gsm_mux_lock, flags); } static inline void mux_put(struct gsm_mux *gsm) { unsigned long flags; spin_lock_irqsave(&gsm_mux_lock, flags); kref_put(&gsm->ref, gsm_free_muxr); spin_unlock_irqrestore(&gsm_mux_lock, flags); } static inline unsigned int mux_num_to_base(struct gsm_mux *gsm) { return gsm->num * NUM_DLCI; } static inline unsigned int mux_line_to_num(unsigned int line) { return line / NUM_DLCI; } /** * gsm_alloc_mux - allocate a mux * * Creates a new mux ready for activation. */ static struct gsm_mux *gsm_alloc_mux(void) { int i; struct gsm_mux *gsm = kzalloc(sizeof(struct gsm_mux), GFP_KERNEL); if (gsm == NULL) return NULL; gsm->buf = kmalloc(MAX_MRU + 1, GFP_KERNEL); if (gsm->buf == NULL) { kfree(gsm); return NULL; } gsm->txframe = kmalloc(2 * (MAX_MTU + PROT_OVERHEAD - 1), GFP_KERNEL); if (gsm->txframe == NULL) { kfree(gsm->buf); kfree(gsm); return NULL; } spin_lock_init(&gsm->lock); mutex_init(&gsm->mutex); kref_init(&gsm->ref); INIT_LIST_HEAD(&gsm->tx_ctrl_list); INIT_LIST_HEAD(&gsm->tx_data_list); timer_setup(&gsm->kick_timer, gsm_kick_timer, 0); timer_setup(&gsm->t2_timer, gsm_control_retransmit, 0); timer_setup(&gsm->ka_timer, gsm_control_keep_alive, 0); INIT_WORK(&gsm->tx_work, gsmld_write_task); init_waitqueue_head(&gsm->event); spin_lock_init(&gsm->control_lock); spin_lock_init(&gsm->tx_lock); gsm->t1 = T1; gsm->t2 = T2; gsm->t3 = T3; gsm->n2 = N2; gsm->k = K; gsm->ftype = UIH; gsm->adaption = 1; gsm->encoding = GSM_ADV_OPT; gsm->mru = 64; /* Default to encoding 1 so these should be 64 */ gsm->mtu = 64; gsm->dead = true; /* Avoid early tty opens */ gsm->wait_config = false; /* Disabled */ gsm->keep_alive = 0; /* Disabled */ /* Store the instance to the mux array or abort if no space is * available. */ spin_lock(&gsm_mux_lock); for (i = 0; i < MAX_MUX; i++) { if (!gsm_mux[i]) { gsm_mux[i] = gsm; gsm->num = i; break; } } spin_unlock(&gsm_mux_lock); if (i == MAX_MUX) { mutex_destroy(&gsm->mutex); kfree(gsm->txframe); kfree(gsm->buf); kfree(gsm); return NULL; } return gsm; } static void gsm_copy_config_values(struct gsm_mux *gsm, struct gsm_config *c) { memset(c, 0, sizeof(*c)); c->adaption = gsm->adaption; c->encapsulation = gsm->encoding; c->initiator = gsm->initiator; c->t1 = gsm->t1; c->t2 = gsm->t2; c->t3 = gsm->t3; c->n2 = gsm->n2; if (gsm->ftype == UIH) c->i = 1; else c->i = 2; pr_debug("Ftype %d i %d\n", gsm->ftype, c->i); c->mru = gsm->mru; c->mtu = gsm->mtu; c->k = gsm->k; } static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c) { int need_close = 0; int need_restart = 0; /* Stuff we don't support yet - UI or I frame transport */ if (c->adaption != 1 && c->adaption != 2) return -EOPNOTSUPP; /* Check the MRU/MTU range looks sane */ if (c->mru < MIN_MTU || c->mtu < MIN_MTU) return -EINVAL; if (c->mru > MAX_MRU || c->mtu > MAX_MTU) return -EINVAL; if (c->t3 > MAX_T3) return -EINVAL; if (c->n2 > 255) return -EINVAL; if (c->encapsulation > 1) /* Basic, advanced, no I */ return -EINVAL; if (c->initiator > 1) return -EINVAL; if (c->k > MAX_WINDOW_SIZE) return -EINVAL; if (c->i == 0 || c->i > 2) /* UIH and UI only */ return -EINVAL; /* * See what is needed for reconfiguration */ /* Timing fields */ if (c->t1 != 0 && c->t1 != gsm->t1) need_restart = 1; if (c->t2 != 0 && c->t2 != gsm->t2) need_restart = 1; if (c->encapsulation != gsm->encoding) need_restart = 1; if (c->adaption != gsm->adaption) need_restart = 1; /* Requires care */ if (c->initiator != gsm->initiator) need_close = 1; if (c->mru != gsm->mru) need_restart = 1; if (c->mtu != gsm->mtu) need_restart = 1; /* * Close down what is needed, restart and initiate the new * configuration. On the first time there is no DLCI[0] * and closing or cleaning up is not necessary. */ if (need_close || need_restart) gsm_cleanup_mux(gsm, true); gsm->initiator = c->initiator; gsm->mru = c->mru; gsm->mtu = c->mtu; gsm->encoding = c->encapsulation ? GSM_ADV_OPT : GSM_BASIC_OPT; gsm->adaption = c->adaption; gsm->n2 = c->n2; if (c->i == 1) gsm->ftype = UIH; else if (c->i == 2) gsm->ftype = UI; if (c->t1) gsm->t1 = c->t1; if (c->t2) gsm->t2 = c->t2; if (c->t3) gsm->t3 = c->t3; if (c->k) gsm->k = c->k; /* * FIXME: We need to separate activation/deactivation from adding * and removing from the mux array */ if (gsm->dead) { int ret = gsm_activate_mux(gsm); if (ret) return ret; if (gsm->initiator) gsm_dlci_begin_open(gsm->dlci[0]); } return 0; } static void gsm_copy_config_ext_values(struct gsm_mux *gsm, struct gsm_config_ext *ce) { memset(ce, 0, sizeof(*ce)); ce->wait_config = gsm->wait_config ? 1 : 0; ce->keep_alive = gsm->keep_alive; } static int gsm_config_ext(struct gsm_mux *gsm, struct gsm_config_ext *ce) { bool need_restart = false; unsigned int i; /* * Check that userspace doesn't put stuff in here to prevent breakages * in the future. */ for (i = 0; i < ARRAY_SIZE(ce->reserved); i++) if (ce->reserved[i]) return -EINVAL; if (ce->flags & ~GSM_FL_RESTART) return -EINVAL; /* Requires care */ if (ce->flags & GSM_FL_RESTART) need_restart = true; /* * Close down what is needed, restart and initiate the new * configuration. On the first time there is no DLCI[0] * and closing or cleaning up is not necessary. */ if (need_restart) gsm_cleanup_mux(gsm, true); /* * Setup the new configuration values */ gsm->wait_config = ce->wait_config ? true : false; gsm->keep_alive = ce->keep_alive; if (gsm->dead) { int ret = gsm_activate_mux(gsm); if (ret) return ret; if (gsm->initiator) gsm_dlci_begin_open(gsm->dlci[0]); } return 0; } /** * gsmld_output - write to link * @gsm: our mux * @data: bytes to output * @len: size * * Write a block of data from the GSM mux to the data channel. This * will eventually be serialized from above but at the moment isn't. */ static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len) { if (tty_write_room(gsm->tty) < len) { set_bit(TTY_DO_WRITE_WAKEUP, &gsm->tty->flags); return -ENOSPC; } if (debug & DBG_DATA) gsm_hex_dump_bytes(__func__, data, len); return gsm->tty->ops->write(gsm->tty, data, len); } /** * gsmld_write_trigger - schedule ldisc write task * @gsm: our mux */ static void gsmld_write_trigger(struct gsm_mux *gsm) { if (!gsm || !gsm->dlci[0] || gsm->dlci[0]->dead) return; schedule_work(&gsm->tx_work); } /** * gsmld_write_task - ldisc write task * @work: our tx write work * * Writes out data to the ldisc if possible. We are doing this here to * avoid dead-locking. This returns if no space or data is left for output. */ static void gsmld_write_task(struct work_struct *work) { struct gsm_mux *gsm = container_of(work, struct gsm_mux, tx_work); unsigned long flags; int i, ret; /* All outstanding control channel and control messages and one data * frame is sent. */ ret = -ENODEV; spin_lock_irqsave(&gsm->tx_lock, flags); if (gsm->tty) ret = gsm_data_kick(gsm); spin_unlock_irqrestore(&gsm->tx_lock, flags); if (ret >= 0) for (i = 0; i < NUM_DLCI; i++) if (gsm->dlci[i]) tty_port_tty_wakeup(&gsm->dlci[i]->port); } /** * gsmld_attach_gsm - mode set up * @tty: our tty structure * @gsm: our mux * * Set up the MUX for basic mode and commence connecting to the * modem. Currently called from the line discipline set up but * will need moving to an ioctl path. */ static void gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) { gsm->tty = tty_kref_get(tty); /* Turn off tty XON/XOFF handling to handle it explicitly. */ gsm->old_c_iflag = tty->termios.c_iflag; tty->termios.c_iflag &= (IXON | IXOFF); } /** * gsmld_detach_gsm - stop doing 0710 mux * @tty: tty attached to the mux * @gsm: mux * * Shutdown and then clean up the resources used by the line discipline */ static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm) { WARN_ON(tty != gsm->tty); /* Restore tty XON/XOFF handling. */ gsm->tty->termios.c_iflag = gsm->old_c_iflag; tty_kref_put(gsm->tty); gsm->tty = NULL; } static void gsmld_receive_buf(struct tty_struct *tty, const u8 *cp, const u8 *fp, size_t count) { struct gsm_mux *gsm = tty->disc_data; u8 flags = TTY_NORMAL; if (debug & DBG_DATA) gsm_hex_dump_bytes(__func__, cp, count); for (; count; count--, cp++) { if (fp) flags = *fp++; switch (flags) { case TTY_NORMAL: if (gsm->receive) gsm->receive(gsm, *cp); break; case TTY_OVERRUN: case TTY_BREAK: case TTY_PARITY: case TTY_FRAME: gsm_error(gsm); break; default: WARN_ONCE(1, "%s: unknown flag %d\n", tty_name(tty), flags); break; } } /* FASYNC if needed ? */ /* If clogged call tty_throttle(tty); */ } /** * gsmld_flush_buffer - clean input queue * @tty: terminal device * * Flush the input buffer. Called when the line discipline is * being closed, when the tty layer wants the buffer flushed (eg * at hangup). */ static void gsmld_flush_buffer(struct tty_struct *tty) { } /** * gsmld_close - close the ldisc for this tty * @tty: device * * Called from the terminal layer when this line discipline is * being shut down, either because of a close or becsuse of a * discipline change. The function will not be called while other * ldisc methods are in progress. */ static void gsmld_close(struct tty_struct *tty) { struct gsm_mux *gsm = tty->disc_data; /* The ldisc locks and closes the port before calling our close. This * means we have no way to do a proper disconnect. We will not bother * to do one. */ gsm_cleanup_mux(gsm, false); gsmld_detach_gsm(tty, gsm); gsmld_flush_buffer(tty); /* Do other clean up here */ mux_put(gsm); } /** * gsmld_open - open an ldisc * @tty: terminal to open * * Called when this line discipline is being attached to the * terminal device. Can sleep. Called serialized so that no * other events will occur in parallel. No further open will occur * until a close. */ static int gsmld_open(struct tty_struct *tty) { struct gsm_mux *gsm; if (!capable(CAP_NET_ADMIN)) return -EPERM; if (tty->ops->write == NULL) return -EINVAL; /* Attach our ldisc data */ gsm = gsm_alloc_mux(); if (gsm == NULL) return -ENOMEM; tty->disc_data = gsm; tty->receive_room = 65536; /* Attach the initial passive connection */ gsmld_attach_gsm(tty, gsm); /* The mux will not be activated yet, we wait for correct * configuration first. */ if (gsm->encoding == GSM_BASIC_OPT) gsm->receive = gsm0_receive; else gsm->receive = gsm1_receive; return 0; } /** * gsmld_write_wakeup - asynchronous I/O notifier * @tty: tty device * * Required for the ptys, serial driver etc. since processes * that attach themselves to the master and rely on ASYNC * IO must be woken up */ static void gsmld_write_wakeup(struct tty_struct *tty) { struct gsm_mux *gsm = tty->disc_data; /* Queue poll */ gsmld_write_trigger(gsm); } /** * gsmld_read - read function for tty * @tty: tty device * @file: file object * @buf: userspace buffer pointer * @nr: size of I/O * @cookie: unused * @offset: unused * * Perform reads for the line discipline. We are guaranteed that the * line discipline will not be closed under us but we may get multiple * parallel readers and must handle this ourselves. We may also get * a hangup. Always called in user context, may sleep. * * This code must be sure never to sleep through a hangup. */ static ssize_t gsmld_read(struct tty_struct *tty, struct file *file, u8 *buf, size_t nr, void **cookie, unsigned long offset) { return -EOPNOTSUPP; } /** * gsmld_write - write function for tty * @tty: tty device * @file: file object * @buf: userspace buffer pointer * @nr: size of I/O * * Called when the owner of the device wants to send a frame * itself (or some other control data). The data is transferred * as-is and must be properly framed and checksummed as appropriate * by userspace. Frames are either sent whole or not at all as this * avoids pain user side. */ static ssize_t gsmld_write(struct tty_struct *tty, struct file *file, const u8 *buf, size_t nr) { struct gsm_mux *gsm = tty->disc_data; unsigned long flags; size_t space; int ret; if (!gsm) return -ENODEV; ret = -ENOBUFS; spin_lock_irqsave(&gsm->tx_lock, flags); space = tty_write_room(tty); if (space >= nr) ret = tty->ops->write(tty, buf, nr); else set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags); spin_unlock_irqrestore(&gsm->tx_lock, flags); return ret; } /** * gsmld_poll - poll method for N_GSM0710 * @tty: terminal device * @file: file accessing it * @wait: poll table * * Called when the line discipline is asked to poll() for data or * for special events. This code is not serialized with respect to * other events save open/close. * * This code must be sure never to sleep through a hangup. * Called without the kernel lock held - fine */ static __poll_t gsmld_poll(struct tty_struct *tty, struct file *file, poll_table *wait) { __poll_t mask = 0; struct gsm_mux *gsm = tty->disc_data; poll_wait(file, &tty->read_wait, wait); poll_wait(file, &tty->write_wait, wait); if (gsm->dead) mask |= EPOLLHUP; if (tty_hung_up_p(file)) mask |= EPOLLHUP; if (test_bit(TTY_OTHER_CLOSED, &tty->flags)) mask |= EPOLLHUP; if (!tty_is_writelocked(tty) && tty_write_room(tty) > 0) mask |= EPOLLOUT | EPOLLWRNORM; return mask; } static int gsmld_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct gsm_config c; struct gsm_config_ext ce; struct gsm_dlci_config dc; struct gsm_mux *gsm = tty->disc_data; unsigned int base, addr; struct gsm_dlci *dlci; switch (cmd) { case GSMIOC_GETCONF: gsm_copy_config_values(gsm, &c); if (copy_to_user((void __user *)arg, &c, sizeof(c))) return -EFAULT; return 0; case GSMIOC_SETCONF: if (copy_from_user(&c, (void __user *)arg, sizeof(c))) return -EFAULT; return gsm_config(gsm, &c); case GSMIOC_GETFIRST: base = mux_num_to_base(gsm); return put_user(base + 1, (__u32 __user *)arg); case GSMIOC_GETCONF_EXT: gsm_copy_config_ext_values(gsm, &ce); if (copy_to_user((void __user *)arg, &ce, sizeof(ce))) return -EFAULT; return 0; case GSMIOC_SETCONF_EXT: if (copy_from_user(&ce, (void __user *)arg, sizeof(ce))) return -EFAULT; return gsm_config_ext(gsm, &ce); case GSMIOC_GETCONF_DLCI: if (copy_from_user(&dc, (void __user *)arg, sizeof(dc))) return -EFAULT; if (dc.channel == 0 || dc.channel >= NUM_DLCI) return -EINVAL; addr = array_index_nospec(dc.channel, NUM_DLCI); dlci = gsm->dlci[addr]; if (!dlci) { dlci = gsm_dlci_alloc(gsm, addr); if (!dlci) return -ENOMEM; } gsm_dlci_copy_config_values(dlci, &dc); if (copy_to_user((void __user *)arg, &dc, sizeof(dc))) return -EFAULT; return 0; case GSMIOC_SETCONF_DLCI: if (copy_from_user(&dc, (void __user *)arg, sizeof(dc))) return -EFAULT; if (dc.channel == 0 || dc.channel >= NUM_DLCI) return -EINVAL; addr = array_index_nospec(dc.channel, NUM_DLCI); dlci = gsm->dlci[addr]; if (!dlci) { dlci = gsm_dlci_alloc(gsm, addr); if (!dlci) return -ENOMEM; } return gsm_dlci_config(dlci, &dc, 0); default: return n_tty_ioctl_helper(tty, cmd, arg); } } /* * Network interface * */ static int gsm_mux_net_open(struct net_device *net) { pr_debug("%s called\n", __func__); netif_start_queue(net); return 0; } static int gsm_mux_net_close(struct net_device *net) { netif_stop_queue(net); return 0; } static void dlci_net_free(struct gsm_dlci *dlci) { if (!dlci->net) { WARN_ON(1); return; } dlci->adaption = dlci->prev_adaption; dlci->data = dlci->prev_data; free_netdev(dlci->net); dlci->net = NULL; } static void net_free(struct kref *ref) { struct gsm_mux_net *mux_net; struct gsm_dlci *dlci; mux_net = container_of(ref, struct gsm_mux_net, ref); dlci = mux_net->dlci; if (dlci->net) { unregister_netdev(dlci->net); dlci_net_free(dlci); } } static inline void muxnet_get(struct gsm_mux_net *mux_net) { kref_get(&mux_net->ref); } static inline void muxnet_put(struct gsm_mux_net *mux_net) { kref_put(&mux_net->ref, net_free); } static netdev_tx_t gsm_mux_net_start_xmit(struct sk_buff *skb, struct net_device *net) { struct gsm_mux_net *mux_net = netdev_priv(net); struct gsm_dlci *dlci = mux_net->dlci; muxnet_get(mux_net); skb_queue_head(&dlci->skb_list, skb); net->stats.tx_packets++; net->stats.tx_bytes += skb->len; gsm_dlci_data_kick(dlci); /* And tell the kernel when the last transmit started. */ netif_trans_update(net); muxnet_put(mux_net); return NETDEV_TX_OK; } /* called when a packet did not ack after watchdogtimeout */ static void gsm_mux_net_tx_timeout(struct net_device *net, unsigned int txqueue) { /* Tell syslog we are hosed. */ dev_dbg(&net->dev, "Tx timed out.\n"); /* Update statistics */ net->stats.tx_errors++; } static void gsm_mux_rx_netchar(struct gsm_dlci *dlci, const u8 *in_buf, int size) { struct net_device *net = dlci->net; struct sk_buff *skb; struct gsm_mux_net *mux_net = netdev_priv(net); muxnet_get(mux_net); /* Allocate an sk_buff */ skb = dev_alloc_skb(size + NET_IP_ALIGN); if (!skb) { /* We got no receive buffer. */ net->stats.rx_dropped++; muxnet_put(mux_net); return; } skb_reserve(skb, NET_IP_ALIGN); skb_put_data(skb, in_buf, size); skb->dev = net; skb->protocol = htons(ETH_P_IP); /* Ship it off to the kernel */ netif_rx(skb); /* update out statistics */ net->stats.rx_packets++; net->stats.rx_bytes += size; muxnet_put(mux_net); return; } static void gsm_mux_net_init(struct net_device *net) { static const struct net_device_ops gsm_netdev_ops = { .ndo_open = gsm_mux_net_open, .ndo_stop = gsm_mux_net_close, .ndo_start_xmit = gsm_mux_net_start_xmit, .ndo_tx_timeout = gsm_mux_net_tx_timeout, }; net->netdev_ops = &gsm_netdev_ops; /* fill in the other fields */ net->watchdog_timeo = GSM_NET_TX_TIMEOUT; net->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; net->type = ARPHRD_NONE; net->tx_queue_len = 10; } /* caller holds the dlci mutex */ static void gsm_destroy_network(struct gsm_dlci *dlci) { struct gsm_mux_net *mux_net; pr_debug("destroy network interface\n"); if (!dlci->net) return; mux_net = netdev_priv(dlci->net); muxnet_put(mux_net); } /* caller holds the dlci mutex */ static int gsm_create_network(struct gsm_dlci *dlci, struct gsm_netconfig *nc) { char *netname; int retval = 0; struct net_device *net; struct gsm_mux_net *mux_net; if (!capable(CAP_NET_ADMIN)) return -EPERM; /* Already in a non tty mode */ if (dlci->adaption > 2) return -EBUSY; if (nc->protocol != htons(ETH_P_IP)) return -EPROTONOSUPPORT; if (nc->adaption != 3 && nc->adaption != 4) return -EPROTONOSUPPORT; pr_debug("create network interface\n"); netname = "gsm%d"; if (nc->if_name[0] != '\0') netname = nc->if_name; net = alloc_netdev(sizeof(struct gsm_mux_net), netname, NET_NAME_UNKNOWN, gsm_mux_net_init); if (!net) { pr_err("alloc_netdev failed\n"); return -ENOMEM; } net->mtu = dlci->mtu; net->min_mtu = MIN_MTU; net->max_mtu = dlci->mtu; mux_net = netdev_priv(net); mux_net->dlci = dlci; kref_init(&mux_net->ref); strscpy(nc->if_name, net->name); /* return net name */ /* reconfigure dlci for network */ dlci->prev_adaption = dlci->adaption; dlci->prev_data = dlci->data; dlci->adaption = nc->adaption; dlci->data = gsm_mux_rx_netchar; dlci->net = net; pr_debug("register netdev\n"); retval = register_netdev(net); if (retval) { pr_err("network register fail %d\n", retval); dlci_net_free(dlci); return retval; } return net->ifindex; /* return network index */ } /* Line discipline for real tty */ static struct tty_ldisc_ops tty_ldisc_packet = { .owner = THIS_MODULE, .num = N_GSM0710, .name = "n_gsm", .open = gsmld_open, .close = gsmld_close, .flush_buffer = gsmld_flush_buffer, .read = gsmld_read, .write = gsmld_write, .ioctl = gsmld_ioctl, .poll = gsmld_poll, .receive_buf = gsmld_receive_buf, .write_wakeup = gsmld_write_wakeup }; /* * Virtual tty side */ /** * gsm_modem_upd_via_data - send modem bits via convergence layer * @dlci: channel * @brk: break signal * * Send an empty frame to signal mobile state changes and to transmit the * break signal for adaption 2. */ static void gsm_modem_upd_via_data(struct gsm_dlci *dlci, u8 brk) { struct gsm_mux *gsm = dlci->gsm; unsigned long flags; if (dlci->state != DLCI_OPEN || dlci->adaption != 2) return; spin_lock_irqsave(&gsm->tx_lock, flags); gsm_dlci_modem_output(gsm, dlci, brk); spin_unlock_irqrestore(&gsm->tx_lock, flags); } /** * gsm_modem_upd_via_msc - send modem bits via control frame * @dlci: channel * @brk: break signal */ static int gsm_modem_upd_via_msc(struct gsm_dlci *dlci, u8 brk) { u8 modembits[3]; struct gsm_control *ctrl; int len = 2; if (dlci->gsm->encoding != GSM_BASIC_OPT) return 0; modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */ if (!brk) { modembits[1] = (gsm_encode_modem(dlci) << 1) | EA; } else { modembits[1] = gsm_encode_modem(dlci) << 1; modembits[2] = (brk << 4) | 2 | EA; /* Length, Break, EA */ len++; } ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len); if (ctrl == NULL) return -ENOMEM; return gsm_control_wait(dlci->gsm, ctrl); } /** * gsm_modem_send_initial_msc - Send initial modem status message * * @dlci channel * * Send an initial MSC message after DLCI open to set the initial * modem status lines. This is only done for basic mode. * Does not wait for a response as we cannot block the input queue * processing. */ static int gsm_modem_send_initial_msc(struct gsm_dlci *dlci) { u8 modembits[2]; if (dlci->adaption != 1 || dlci->gsm->encoding != GSM_BASIC_OPT) return 0; modembits[0] = (dlci->addr << 2) | 2 | EA; /* DLCI, Valid, EA */ modembits[1] = (gsm_encode_modem(dlci) << 1) | EA; return gsm_control_command(dlci->gsm, CMD_MSC, (const u8 *)&modembits, 2); } /** * gsm_modem_update - send modem status line state * @dlci: channel * @brk: break signal */ static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk) { if (dlci->gsm->dead) return -EL2HLT; if (dlci->adaption == 2) { /* Send convergence layer type 2 empty data frame. */ gsm_modem_upd_via_data(dlci, brk); return 0; } else if (dlci->gsm->encoding == GSM_BASIC_OPT) { /* Send as MSC control message. */ return gsm_modem_upd_via_msc(dlci, brk); } /* Modem status lines are not supported. */ return -EPROTONOSUPPORT; } /** * gsm_wait_modem_change - wait for modem status line change * @dlci: channel * @mask: modem status line bits * * The function returns if: * - any given modem status line bit changed * - the wait event function got interrupted (e.g. by a signal) * - the underlying DLCI was closed * - the underlying ldisc device was removed */ static int gsm_wait_modem_change(struct gsm_dlci *dlci, u32 mask) { struct gsm_mux *gsm = dlci->gsm; u32 old = dlci->modem_rx; int ret; ret = wait_event_interruptible(gsm->event, gsm->dead || dlci->state != DLCI_OPEN || (old ^ dlci->modem_rx) & mask); if (gsm->dead) return -ENODEV; if (dlci->state != DLCI_OPEN) return -EL2NSYNC; return ret; } static bool gsm_carrier_raised(struct tty_port *port) { struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port); struct gsm_mux *gsm = dlci->gsm; /* Not yet open so no carrier info */ if (dlci->state != DLCI_OPEN) return false; if (debug & DBG_CD_ON) return true; /* * Basic mode with control channel in ADM mode may not respond * to CMD_MSC at all and modem_rx is empty. */ if (gsm->encoding == GSM_BASIC_OPT && gsm->dlci[0]->mode == DLCI_MODE_ADM && !dlci->modem_rx) return true; return dlci->modem_rx & TIOCM_CD; } static void gsm_dtr_rts(struct tty_port *port, bool active) { struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port); unsigned int modem_tx = dlci->modem_tx; if (active) modem_tx |= TIOCM_DTR | TIOCM_RTS; else modem_tx &= ~(TIOCM_DTR | TIOCM_RTS); if (modem_tx != dlci->modem_tx) { dlci->modem_tx = modem_tx; gsm_modem_update(dlci, 0); } } static const struct tty_port_operations gsm_port_ops = { .carrier_raised = gsm_carrier_raised, .dtr_rts = gsm_dtr_rts, .destruct = gsm_dlci_free, }; static int gsmtty_install(struct tty_driver *driver, struct tty_struct *tty) { struct gsm_mux *gsm; struct gsm_dlci *dlci, *dlci0; unsigned int line = tty->index; unsigned int mux = mux_line_to_num(line); bool alloc = false; int ret; line = line & 0x3F; if (mux >= MAX_MUX) return -ENXIO; /* FIXME: we need to lock gsm_mux for lifetimes of ttys eventually */ if (gsm_mux[mux] == NULL) return -EUNATCH; if (line == 0 || line > 61) /* 62/63 reserved */ return -ECHRNG; gsm = gsm_mux[mux]; if (gsm->dead) return -EL2HLT; /* If DLCI 0 is not yet fully open return an error. This is ok from a locking perspective as we don't have to worry about this if DLCI0 is lost */ mutex_lock(&gsm->mutex); dlci0 = gsm->dlci[0]; if (dlci0 && dlci0->state != DLCI_OPEN) { mutex_unlock(&gsm->mutex); if (dlci0->state == DLCI_OPENING) wait_event(gsm->event, dlci0->state != DLCI_OPENING); if (dlci0->state != DLCI_OPEN) return -EL2NSYNC; mutex_lock(&gsm->mutex); } dlci = gsm->dlci[line]; if (dlci == NULL) { alloc = true; dlci = gsm_dlci_alloc(gsm, line); } if (dlci == NULL) { mutex_unlock(&gsm->mutex); return -ENOMEM; } ret = tty_port_install(&dlci->port, driver, tty); if (ret) { if (alloc) dlci_put(dlci); mutex_unlock(&gsm->mutex); return ret; } dlci_get(dlci); dlci_get(gsm->dlci[0]); mux_get(gsm); tty->driver_data = dlci; mutex_unlock(&gsm->mutex); return 0; } static int gsmtty_open(struct tty_struct *tty, struct file *filp) { struct gsm_dlci *dlci = tty->driver_data; struct tty_port *port = &dlci->port; port->count++; tty_port_tty_set(port, tty); dlci->modem_rx = 0; /* We could in theory open and close before we wait - eg if we get a DM straight back. This is ok as that will have caused a hangup */ tty_port_set_initialized(port, true); /* Start sending off SABM messages */ if (!dlci->gsm->wait_config) { /* Start sending off SABM messages */ if (dlci->gsm->initiator) gsm_dlci_begin_open(dlci); else gsm_dlci_set_opening(dlci); } else { gsm_dlci_set_wait_config(dlci); } /* And wait for virtual carrier */ return tty_port_block_til_ready(port, tty, filp); } static void gsmtty_close(struct tty_struct *tty, struct file *filp) { struct gsm_dlci *dlci = tty->driver_data; if (dlci == NULL) return; if (dlci->state == DLCI_CLOSED) return; mutex_lock(&dlci->mutex); gsm_destroy_network(dlci); mutex_unlock(&dlci->mutex); if (tty_port_close_start(&dlci->port, tty, filp) == 0) return; gsm_dlci_begin_close(dlci); if (tty_port_initialized(&dlci->port) && C_HUPCL(tty)) tty_port_lower_dtr_rts(&dlci->port); tty_port_close_end(&dlci->port, tty); tty_port_tty_set(&dlci->port, NULL); return; } static void gsmtty_hangup(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return; tty_port_hangup(&dlci->port); gsm_dlci_begin_close(dlci); } static ssize_t gsmtty_write(struct tty_struct *tty, const u8 *buf, size_t len) { int sent; struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return -EINVAL; /* Stuff the bytes into the fifo queue */ sent = kfifo_in_locked(&dlci->fifo, buf, len, &dlci->lock); /* Need to kick the channel */ gsm_dlci_data_kick(dlci); return sent; } static unsigned int gsmtty_write_room(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return 0; return kfifo_avail(&dlci->fifo); } static unsigned int gsmtty_chars_in_buffer(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return 0; return kfifo_len(&dlci->fifo); } static void gsmtty_flush_buffer(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; unsigned long flags; if (dlci->state == DLCI_CLOSED) return; /* Caution needed: If we implement reliable transport classes then the data being transmitted can't simply be junked once it has first hit the stack. Until then we can just blow it away */ spin_lock_irqsave(&dlci->lock, flags); kfifo_reset(&dlci->fifo); spin_unlock_irqrestore(&dlci->lock, flags); /* Need to unhook this DLCI from the transmit queue logic */ } static void gsmtty_wait_until_sent(struct tty_struct *tty, int timeout) { /* The FIFO handles the queue so the kernel will do the right thing waiting on chars_in_buffer before calling us. No work to do here */ } static int gsmtty_tiocmget(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return -EINVAL; return dlci->modem_rx; } static int gsmtty_tiocmset(struct tty_struct *tty, unsigned int set, unsigned int clear) { struct gsm_dlci *dlci = tty->driver_data; unsigned int modem_tx = dlci->modem_tx; if (dlci->state == DLCI_CLOSED) return -EINVAL; modem_tx &= ~clear; modem_tx |= set; if (modem_tx != dlci->modem_tx) { dlci->modem_tx = modem_tx; return gsm_modem_update(dlci, 0); } return 0; } static int gsmtty_ioctl(struct tty_struct *tty, unsigned int cmd, unsigned long arg) { struct gsm_dlci *dlci = tty->driver_data; struct gsm_netconfig nc; struct gsm_dlci_config dc; int index; if (dlci->state == DLCI_CLOSED) return -EINVAL; switch (cmd) { case GSMIOC_ENABLE_NET: if (copy_from_user(&nc, (void __user *)arg, sizeof(nc))) return -EFAULT; nc.if_name[IFNAMSIZ-1] = '\0'; /* return net interface index or error code */ mutex_lock(&dlci->mutex); index = gsm_create_network(dlci, &nc); mutex_unlock(&dlci->mutex); if (copy_to_user((void __user *)arg, &nc, sizeof(nc))) return -EFAULT; return index; case GSMIOC_DISABLE_NET: if (!capable(CAP_NET_ADMIN)) return -EPERM; mutex_lock(&dlci->mutex); gsm_destroy_network(dlci); mutex_unlock(&dlci->mutex); return 0; case GSMIOC_GETCONF_DLCI: if (copy_from_user(&dc, (void __user *)arg, sizeof(dc))) return -EFAULT; if (dc.channel != dlci->addr) return -EPERM; gsm_dlci_copy_config_values(dlci, &dc); if (copy_to_user((void __user *)arg, &dc, sizeof(dc))) return -EFAULT; return 0; case GSMIOC_SETCONF_DLCI: if (copy_from_user(&dc, (void __user *)arg, sizeof(dc))) return -EFAULT; if (dc.channel >= NUM_DLCI) return -EINVAL; if (dc.channel != 0 && dc.channel != dlci->addr) return -EPERM; return gsm_dlci_config(dlci, &dc, 1); case TIOCMIWAIT: return gsm_wait_modem_change(dlci, (u32)arg); default: return -ENOIOCTLCMD; } } static void gsmtty_set_termios(struct tty_struct *tty, const struct ktermios *old) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return; /* For the moment its fixed. In actual fact the speed information for the virtual channel can be propogated in both directions by the RPN control message. This however rapidly gets nasty as we then have to remap modem signals each way according to whether our virtual cable is null modem etc .. */ tty_termios_copy_hw(&tty->termios, old); } static void gsmtty_throttle(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return; if (C_CRTSCTS(tty)) dlci->modem_tx &= ~TIOCM_RTS; dlci->throttled = true; /* Send an MSC with RTS cleared */ gsm_modem_update(dlci, 0); } static void gsmtty_unthrottle(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; if (dlci->state == DLCI_CLOSED) return; if (C_CRTSCTS(tty)) dlci->modem_tx |= TIOCM_RTS; dlci->throttled = false; /* Send an MSC with RTS set */ gsm_modem_update(dlci, 0); } static int gsmtty_break_ctl(struct tty_struct *tty, int state) { struct gsm_dlci *dlci = tty->driver_data; int encode = 0; /* Off */ if (dlci->state == DLCI_CLOSED) return -EINVAL; if (state == -1) /* "On indefinitely" - we can't encode this properly */ encode = 0x0F; else if (state > 0) { encode = state / 200; /* mS to encoding */ if (encode > 0x0F) encode = 0x0F; /* Best effort */ } return gsm_modem_update(dlci, encode); } static void gsmtty_cleanup(struct tty_struct *tty) { struct gsm_dlci *dlci = tty->driver_data; struct gsm_mux *gsm = dlci->gsm; dlci_put(dlci); dlci_put(gsm->dlci[0]); mux_put(gsm); } /* Virtual ttys for the demux */ static const struct tty_operations gsmtty_ops = { .install = gsmtty_install, .open = gsmtty_open, .close = gsmtty_close, .write = gsmtty_write, .write_room = gsmtty_write_room, .chars_in_buffer = gsmtty_chars_in_buffer, .flush_buffer = gsmtty_flush_buffer, .ioctl = gsmtty_ioctl, .throttle = gsmtty_throttle, .unthrottle = gsmtty_unthrottle, .set_termios = gsmtty_set_termios, .hangup = gsmtty_hangup, .wait_until_sent = gsmtty_wait_until_sent, .tiocmget = gsmtty_tiocmget, .tiocmset = gsmtty_tiocmset, .break_ctl = gsmtty_break_ctl, .cleanup = gsmtty_cleanup, }; static int __init gsm_init(void) { /* Fill in our line protocol discipline, and register it */ int status = tty_register_ldisc(&tty_ldisc_packet); if (status != 0) { pr_err("n_gsm: can't register line discipline (err = %d)\n", status); return status; } gsm_tty_driver = tty_alloc_driver(GSM_TTY_MINORS, TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV | TTY_DRIVER_HARDWARE_BREAK); if (IS_ERR(gsm_tty_driver)) { pr_err("gsm_init: tty allocation failed.\n"); status = PTR_ERR(gsm_tty_driver); goto err_unreg_ldisc; } gsm_tty_driver->driver_name = "gsmtty"; gsm_tty_driver->name = "gsmtty"; gsm_tty_driver->major = 0; /* Dynamic */ gsm_tty_driver->minor_start = 0; gsm_tty_driver->type = TTY_DRIVER_TYPE_SERIAL; gsm_tty_driver->subtype = SERIAL_TYPE_NORMAL; gsm_tty_driver->init_termios = tty_std_termios; /* Fixme */ gsm_tty_driver->init_termios.c_lflag &= ~ECHO; tty_set_operations(gsm_tty_driver, &gsmtty_ops); if (tty_register_driver(gsm_tty_driver)) { pr_err("gsm_init: tty registration failed.\n"); status = -EBUSY; goto err_put_driver; } pr_debug("gsm_init: loaded as %d,%d.\n", gsm_tty_driver->major, gsm_tty_driver->minor_start); return 0; err_put_driver: tty_driver_kref_put(gsm_tty_driver); err_unreg_ldisc: tty_unregister_ldisc(&tty_ldisc_packet); return status; } static void __exit gsm_exit(void) { tty_unregister_ldisc(&tty_ldisc_packet); tty_unregister_driver(gsm_tty_driver); tty_driver_kref_put(gsm_tty_driver); } module_init(gsm_init); module_exit(gsm_exit); MODULE_DESCRIPTION("GSM 0710 tty multiplexor"); MODULE_LICENSE("GPL"); MODULE_ALIAS_LDISC(N_GSM0710);
12 12 12 12 12 9 12 12 12 9 9 9 9 9 9 9 3 3 3 3 3 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 // SPDX-License-Identifier: GPL-2.0 #include <linux/cgroup.h> #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/sched/signal.h> #include "cgroup-internal.h" #include <trace/events/cgroup.h> /* * Update CGRP_FROZEN of cgroup.flag * Return true if flags is updated; false if flags has no change */ static bool cgroup_update_frozen_flag(struct cgroup *cgrp, bool frozen) { lockdep_assert_held(&css_set_lock); /* Already there? */ if (test_bit(CGRP_FROZEN, &cgrp->flags) == frozen) return false; if (frozen) set_bit(CGRP_FROZEN, &cgrp->flags); else clear_bit(CGRP_FROZEN, &cgrp->flags); cgroup_file_notify(&cgrp->events_file); TRACE_CGROUP_PATH(notify_frozen, cgrp, frozen); return true; } /* * Propagate the cgroup frozen state upwards by the cgroup tree. */ static void cgroup_propagate_frozen(struct cgroup *cgrp, bool frozen) { int desc = 1; /* * If the new state is frozen, some freezing ancestor cgroups may change * their state too, depending on if all their descendants are frozen. * * Otherwise, all ancestor cgroups are forced into the non-frozen state. */ while ((cgrp = cgroup_parent(cgrp))) { if (frozen) { cgrp->freezer.nr_frozen_descendants += desc; if (!test_bit(CGRP_FREEZE, &cgrp->flags) || (cgrp->freezer.nr_frozen_descendants != cgrp->nr_descendants)) continue; } else { cgrp->freezer.nr_frozen_descendants -= desc; } if (cgroup_update_frozen_flag(cgrp, frozen)) desc++; } } /* * Revisit the cgroup frozen state. * Checks if the cgroup is really frozen and perform all state transitions. */ void cgroup_update_frozen(struct cgroup *cgrp) { bool frozen; /* * If the cgroup has to be frozen (CGRP_FREEZE bit set), * and all tasks are frozen and/or stopped, let's consider * the cgroup frozen. Otherwise it's not frozen. */ frozen = test_bit(CGRP_FREEZE, &cgrp->flags) && cgrp->freezer.nr_frozen_tasks == __cgroup_task_count(cgrp); /* If flags is updated, update the state of ancestor cgroups. */ if (cgroup_update_frozen_flag(cgrp, frozen)) cgroup_propagate_frozen(cgrp, frozen); } /* * Increment cgroup's nr_frozen_tasks. */ static void cgroup_inc_frozen_cnt(struct cgroup *cgrp) { cgrp->freezer.nr_frozen_tasks++; } /* * Decrement cgroup's nr_frozen_tasks. */ static void cgroup_dec_frozen_cnt(struct cgroup *cgrp) { cgrp->freezer.nr_frozen_tasks--; WARN_ON_ONCE(cgrp->freezer.nr_frozen_tasks < 0); } /* * Enter frozen/stopped state, if not yet there. Update cgroup's counters, * and revisit the state of the cgroup, if necessary. */ void cgroup_enter_frozen(void) { struct cgroup *cgrp; if (current->frozen) return; spin_lock_irq(&css_set_lock); current->frozen = true; cgrp = task_dfl_cgroup(current); cgroup_inc_frozen_cnt(cgrp); cgroup_update_frozen(cgrp); spin_unlock_irq(&css_set_lock); } /* * Conditionally leave frozen/stopped state. Update cgroup's counters, * and revisit the state of the cgroup, if necessary. * * If always_leave is not set, and the cgroup is freezing, * we're racing with the cgroup freezing. In this case, we don't * drop the frozen counter to avoid a transient switch to * the unfrozen state. */ void cgroup_leave_frozen(bool always_leave) { struct cgroup *cgrp; spin_lock_irq(&css_set_lock); cgrp = task_dfl_cgroup(current); if (always_leave || !test_bit(CGRP_FREEZE, &cgrp->flags)) { cgroup_dec_frozen_cnt(cgrp); cgroup_update_frozen(cgrp); WARN_ON_ONCE(!current->frozen); current->frozen = false; } else if (!(current->jobctl & JOBCTL_TRAP_FREEZE)) { spin_lock(&current->sighand->siglock); current->jobctl |= JOBCTL_TRAP_FREEZE; set_thread_flag(TIF_SIGPENDING); spin_unlock(&current->sighand->siglock); } spin_unlock_irq(&css_set_lock); } /* * Freeze or unfreeze the task by setting or clearing the JOBCTL_TRAP_FREEZE * jobctl bit. */ static void cgroup_freeze_task(struct task_struct *task, bool freeze) { unsigned long flags; /* If the task is about to die, don't bother with freezing it. */ if (!lock_task_sighand(task, &flags)) return; if (freeze) { task->jobctl |= JOBCTL_TRAP_FREEZE; signal_wake_up(task, false); } else { task->jobctl &= ~JOBCTL_TRAP_FREEZE; wake_up_process(task); } unlock_task_sighand(task, &flags); } /* * Freeze or unfreeze all tasks in the given cgroup. */ static void cgroup_do_freeze(struct cgroup *cgrp, bool freeze, u64 ts_nsec) { struct css_task_iter it; struct task_struct *task; lockdep_assert_held(&cgroup_mutex); spin_lock_irq(&css_set_lock); write_seqcount_begin(&cgrp->freezer.freeze_seq); if (freeze) { set_bit(CGRP_FREEZE, &cgrp->flags); cgrp->freezer.freeze_start_nsec = ts_nsec; } else { clear_bit(CGRP_FREEZE, &cgrp->flags); cgrp->freezer.frozen_nsec += (ts_nsec - cgrp->freezer.freeze_start_nsec); } write_seqcount_end(&cgrp->freezer.freeze_seq); spin_unlock_irq(&css_set_lock); if (freeze) TRACE_CGROUP_PATH(freeze, cgrp); else TRACE_CGROUP_PATH(unfreeze, cgrp); css_task_iter_start(&cgrp->self, 0, &it); while ((task = css_task_iter_next(&it))) { /* * Ignore kernel threads here. Freezing cgroups containing * kthreads isn't supported. */ if (task->flags & PF_KTHREAD) continue; cgroup_freeze_task(task, freeze); } css_task_iter_end(&it); /* * Cgroup state should be revisited here to cover empty leaf cgroups * and cgroups which descendants are already in the desired state. */ spin_lock_irq(&css_set_lock); if (cgrp->nr_descendants == cgrp->freezer.nr_frozen_descendants) cgroup_update_frozen(cgrp); spin_unlock_irq(&css_set_lock); } /* * Adjust the task state (freeze or unfreeze) and revisit the state of * source and destination cgroups. */ void cgroup_freezer_migrate_task(struct task_struct *task, struct cgroup *src, struct cgroup *dst) { lockdep_assert_held(&css_set_lock); /* * Kernel threads are not supposed to be frozen at all. */ if (task->flags & PF_KTHREAD) return; /* * It's not necessary to do changes if both of the src and dst cgroups * are not freezing and task is not frozen. */ if (!test_bit(CGRP_FREEZE, &src->flags) && !test_bit(CGRP_FREEZE, &dst->flags) && !task->frozen) return; /* * Adjust counters of freezing and frozen tasks. * Note, that if the task is frozen, but the destination cgroup is not * frozen, we bump both counters to keep them balanced. */ if (task->frozen) { cgroup_inc_frozen_cnt(dst); cgroup_dec_frozen_cnt(src); } cgroup_update_frozen(dst); cgroup_update_frozen(src); /* * Force the task to the desired state. */ cgroup_freeze_task(task, test_bit(CGRP_FREEZE, &dst->flags)); } void cgroup_freeze(struct cgroup *cgrp, bool freeze) { struct cgroup_subsys_state *css; struct cgroup *parent; struct cgroup *dsct; bool applied = false; u64 ts_nsec; bool old_e; lockdep_assert_held(&cgroup_mutex); /* * Nothing changed? Just exit. */ if (cgrp->freezer.freeze == freeze) return; cgrp->freezer.freeze = freeze; ts_nsec = ktime_get_ns(); /* * Propagate changes downwards the cgroup tree. */ css_for_each_descendant_pre(css, &cgrp->self) { dsct = css->cgroup; if (cgroup_is_dead(dsct)) continue; /* * e_freeze is affected by parent's e_freeze and dst's freeze. * If old e_freeze eq new e_freeze, no change, its children * will not be affected. So do nothing and skip the subtree */ old_e = dsct->freezer.e_freeze; parent = cgroup_parent(dsct); dsct->freezer.e_freeze = (dsct->freezer.freeze || parent->freezer.e_freeze); if (dsct->freezer.e_freeze == old_e) { css = css_rightmost_descendant(css); continue; } /* * Do change actual state: freeze or unfreeze. */ cgroup_do_freeze(dsct, freeze, ts_nsec); applied = true; } /* * Even if the actual state hasn't changed, let's notify a user. * The state can be enforced by an ancestor cgroup: the cgroup * can already be in the desired state or it can be locked in the * opposite state, so that the transition will never happen. * In both cases it's better to notify a user, that there is * nothing to wait for. */ if (!applied) { TRACE_CGROUP_PATH(notify_frozen, cgrp, test_bit(CGRP_FROZEN, &cgrp->flags)); cgroup_file_notify(&cgrp->events_file); } }
1107 29 29 10 10 10 19 19 19 19 19 3 3 3 3 2 2 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 // SPDX-License-Identifier: GPL-2.0 /* * property.c - Unified device property interface. * * Copyright (C) 2014, Intel Corporation * Authors: Rafael J. Wysocki <rafael.j.wysocki@intel.com> * Mika Westerberg <mika.westerberg@linux.intel.com> */ #include <linux/device.h> #include <linux/err.h> #include <linux/export.h> #include <linux/kconfig.h> #include <linux/of.h> #include <linux/property.h> #include <linux/phy.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/types.h> struct fwnode_handle *__dev_fwnode(struct device *dev) { return IS_ENABLED(CONFIG_OF) && dev->of_node ? of_fwnode_handle(dev->of_node) : dev->fwnode; } EXPORT_SYMBOL_GPL(__dev_fwnode); const struct fwnode_handle *__dev_fwnode_const(const struct device *dev) { return IS_ENABLED(CONFIG_OF) && dev->of_node ? of_fwnode_handle(dev->of_node) : dev->fwnode; } EXPORT_SYMBOL_GPL(__dev_fwnode_const); /** * device_property_present - check if a property of a device is present * @dev: Device whose property is being checked * @propname: Name of the property * * Check if property @propname is present in the device firmware description. * * Return: true if property @propname is present. Otherwise, returns false. */ bool device_property_present(const struct device *dev, const char *propname) { return fwnode_property_present(dev_fwnode(dev), propname); } EXPORT_SYMBOL_GPL(device_property_present); /** * fwnode_property_present - check if a property of a firmware node is present * @fwnode: Firmware node whose property to check * @propname: Name of the property * * Return: true if property @propname is present. Otherwise, returns false. */ bool fwnode_property_present(const struct fwnode_handle *fwnode, const char *propname) { bool ret; if (IS_ERR_OR_NULL(fwnode)) return false; ret = fwnode_call_bool_op(fwnode, property_present, propname); if (ret) return ret; return fwnode_call_bool_op(fwnode->secondary, property_present, propname); } EXPORT_SYMBOL_GPL(fwnode_property_present); /** * device_property_read_bool - Return the value for a boolean property of a device * @dev: Device whose property is being checked * @propname: Name of the property * * Return if property @propname is true or false in the device firmware description. * * Return: true if property @propname is present. Otherwise, returns false. */ bool device_property_read_bool(const struct device *dev, const char *propname) { return fwnode_property_read_bool(dev_fwnode(dev), propname); } EXPORT_SYMBOL_GPL(device_property_read_bool); /** * fwnode_property_read_bool - Return the value for a boolean property of a firmware node * @fwnode: Firmware node whose property to check * @propname: Name of the property * * Return if property @propname is true or false in the firmware description. */ bool fwnode_property_read_bool(const struct fwnode_handle *fwnode, const char *propname) { bool ret; if (IS_ERR_OR_NULL(fwnode)) return false; ret = fwnode_call_bool_op(fwnode, property_read_bool, propname); if (ret) return ret; return fwnode_call_bool_op(fwnode->secondary, property_read_bool, propname); } EXPORT_SYMBOL_GPL(fwnode_property_read_bool); /** * device_property_read_u8_array - return a u8 array property of a device * @dev: Device to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Function reads an array of u8 properties with @propname from the device * firmware description and stores them to @val if found. * * It's recommended to call device_property_count_u8() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_u8_array(const struct device *dev, const char *propname, u8 *val, size_t nval) { return fwnode_property_read_u8_array(dev_fwnode(dev), propname, val, nval); } EXPORT_SYMBOL_GPL(device_property_read_u8_array); /** * device_property_read_u16_array - return a u16 array property of a device * @dev: Device to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Function reads an array of u16 properties with @propname from the device * firmware description and stores them to @val if found. * * It's recommended to call device_property_count_u16() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_u16_array(const struct device *dev, const char *propname, u16 *val, size_t nval) { return fwnode_property_read_u16_array(dev_fwnode(dev), propname, val, nval); } EXPORT_SYMBOL_GPL(device_property_read_u16_array); /** * device_property_read_u32_array - return a u32 array property of a device * @dev: Device to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Function reads an array of u32 properties with @propname from the device * firmware description and stores them to @val if found. * * It's recommended to call device_property_count_u32() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_u32_array(const struct device *dev, const char *propname, u32 *val, size_t nval) { return fwnode_property_read_u32_array(dev_fwnode(dev), propname, val, nval); } EXPORT_SYMBOL_GPL(device_property_read_u32_array); /** * device_property_read_u64_array - return a u64 array property of a device * @dev: Device to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Function reads an array of u64 properties with @propname from the device * firmware description and stores them to @val if found. * * It's recommended to call device_property_count_u64() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_u64_array(const struct device *dev, const char *propname, u64 *val, size_t nval) { return fwnode_property_read_u64_array(dev_fwnode(dev), propname, val, nval); } EXPORT_SYMBOL_GPL(device_property_read_u64_array); /** * device_property_read_string_array - return a string array property of device * @dev: Device to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Function reads an array of string properties with @propname from the device * firmware description and stores them to @val if found. * * It's recommended to call device_property_string_array_count() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values read on success if @val is non-NULL, * number of values available on success if @val is NULL, * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property is not an array of strings, * %-EOVERFLOW if the size of the property is not as expected. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_string_array(const struct device *dev, const char *propname, const char **val, size_t nval) { return fwnode_property_read_string_array(dev_fwnode(dev), propname, val, nval); } EXPORT_SYMBOL_GPL(device_property_read_string_array); /** * device_property_read_string - return a string property of a device * @dev: Device to get the property of * @propname: Name of the property * @val: The value is stored here * * Function reads property @propname from the device firmware description and * stores the value into @val if found. The value is checked to be a string. * * Return: %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property type is not a string. * %-ENXIO if no suitable firmware interface is present. */ int device_property_read_string(const struct device *dev, const char *propname, const char **val) { return fwnode_property_read_string(dev_fwnode(dev), propname, val); } EXPORT_SYMBOL_GPL(device_property_read_string); /** * device_property_match_string - find a string in an array and return index * @dev: Device to get the property of * @propname: Name of the property holding the array * @string: String to look for * * Find a given string in a string array and if it is found return the * index back. * * Return: index, starting from %0, if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of strings, * %-ENXIO if no suitable firmware interface is present. */ int device_property_match_string(const struct device *dev, const char *propname, const char *string) { return fwnode_property_match_string(dev_fwnode(dev), propname, string); } EXPORT_SYMBOL_GPL(device_property_match_string); static int fwnode_property_read_int_array(const struct fwnode_handle *fwnode, const char *propname, unsigned int elem_size, void *val, size_t nval) { int ret; if (IS_ERR_OR_NULL(fwnode)) return -EINVAL; ret = fwnode_call_int_op(fwnode, property_read_int_array, propname, elem_size, val, nval); if (ret != -EINVAL) return ret; return fwnode_call_int_op(fwnode->secondary, property_read_int_array, propname, elem_size, val, nval); } /** * fwnode_property_read_u8_array - return a u8 array property of firmware node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Read an array of u8 properties with @propname from @fwnode and stores them to * @val if found. * * It's recommended to call fwnode_property_count_u8() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_u8_array(const struct fwnode_handle *fwnode, const char *propname, u8 *val, size_t nval) { return fwnode_property_read_int_array(fwnode, propname, sizeof(u8), val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array); /** * fwnode_property_read_u16_array - return a u16 array property of firmware node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Read an array of u16 properties with @propname from @fwnode and store them to * @val if found. * * It's recommended to call fwnode_property_count_u16() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_u16_array(const struct fwnode_handle *fwnode, const char *propname, u16 *val, size_t nval) { return fwnode_property_read_int_array(fwnode, propname, sizeof(u16), val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array); /** * fwnode_property_read_u32_array - return a u32 array property of firmware node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Read an array of u32 properties with @propname from @fwnode store them to * @val if found. * * It's recommended to call fwnode_property_count_u32() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_u32_array(const struct fwnode_handle *fwnode, const char *propname, u32 *val, size_t nval) { return fwnode_property_read_int_array(fwnode, propname, sizeof(u32), val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array); /** * fwnode_property_read_u64_array - return a u64 array property firmware node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Read an array of u64 properties with @propname from @fwnode and store them to * @val if found. * * It's recommended to call fwnode_property_count_u64() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values if @val was %NULL, * %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of numbers, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_u64_array(const struct fwnode_handle *fwnode, const char *propname, u64 *val, size_t nval) { return fwnode_property_read_int_array(fwnode, propname, sizeof(u64), val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_u64_array); /** * fwnode_property_read_string_array - return string array property of a node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The values are stored here or %NULL to return the number of values * @nval: Size of the @val array * * Read an string list property @propname from the given firmware node and store * them to @val if found. * * It's recommended to call fwnode_property_string_array_count() instead of calling * this function with @val equals %NULL and @nval equals 0. * * Return: number of values read on success if @val is non-NULL, * number of values available on success if @val is NULL, * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property is not an array of strings, * %-EOVERFLOW if the size of the property is not as expected, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_string_array(const struct fwnode_handle *fwnode, const char *propname, const char **val, size_t nval) { int ret; if (IS_ERR_OR_NULL(fwnode)) return -EINVAL; ret = fwnode_call_int_op(fwnode, property_read_string_array, propname, val, nval); if (ret != -EINVAL) return ret; return fwnode_call_int_op(fwnode->secondary, property_read_string_array, propname, val, nval); } EXPORT_SYMBOL_GPL(fwnode_property_read_string_array); /** * fwnode_property_read_string - return a string property of a firmware node * @fwnode: Firmware node to get the property of * @propname: Name of the property * @val: The value is stored here * * Read property @propname from the given firmware node and store the value into * @val if found. The value is checked to be a string. * * Return: %0 if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property is not a string, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_read_string(const struct fwnode_handle *fwnode, const char *propname, const char **val) { int ret = fwnode_property_read_string_array(fwnode, propname, val, 1); return ret < 0 ? ret : 0; } EXPORT_SYMBOL_GPL(fwnode_property_read_string); /** * fwnode_property_match_string - find a string in an array and return index * @fwnode: Firmware node to get the property of * @propname: Name of the property holding the array * @string: String to look for * * Find a given string in a string array and if it is found return the * index back. * * Return: index, starting from %0, if the property was found (success), * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO if the property is not an array of strings, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_match_string(const struct fwnode_handle *fwnode, const char *propname, const char *string) { const char **values; int nval, ret; nval = fwnode_property_string_array_count(fwnode, propname); if (nval < 0) return nval; if (nval == 0) return -ENODATA; values = kcalloc(nval, sizeof(*values), GFP_KERNEL); if (!values) return -ENOMEM; ret = fwnode_property_read_string_array(fwnode, propname, values, nval); if (ret < 0) goto out_free; ret = match_string(values, nval, string); if (ret < 0) ret = -ENODATA; out_free: kfree(values); return ret; } EXPORT_SYMBOL_GPL(fwnode_property_match_string); /** * fwnode_property_match_property_string - find a property string value in an array and return index * @fwnode: Firmware node to get the property of * @propname: Name of the property holding the string value * @array: String array to search in * @n: Size of the @array * * Find a property string value in a given @array and if it is found return * the index back. * * Return: index, starting from %0, if the string value was found in the @array (success), * %-ENOENT when the string value was not found in the @array, * %-EINVAL if given arguments are not valid, * %-ENODATA if the property does not have a value, * %-EPROTO or %-EILSEQ if the property is not a string, * %-ENXIO if no suitable firmware interface is present. */ int fwnode_property_match_property_string(const struct fwnode_handle *fwnode, const char *propname, const char * const *array, size_t n) { const char *string; int ret; ret = fwnode_property_read_string(fwnode, propname, &string); if (ret) return ret; ret = match_string(array, n, string); if (ret < 0) ret = -ENOENT; return ret; } EXPORT_SYMBOL_GPL(fwnode_property_match_property_string); /** * fwnode_property_get_reference_args() - Find a reference with arguments * @fwnode: Firmware node where to look for the reference * @prop: The name of the property * @nargs_prop: The name of the property telling the number of * arguments in the referred node. NULL if @nargs is known, * otherwise @nargs is ignored. * @nargs: Number of arguments. Ignored if @nargs_prop is non-NULL. * @index: Index of the reference, from zero onwards. * @args: Result structure with reference and integer arguments. * May be NULL. * * Obtain a reference based on a named property in an fwnode, with * integer arguments. * * The caller is responsible for calling fwnode_handle_put() on the returned * @args->fwnode pointer. * * Return: %0 on success * %-ENOENT when the index is out of bounds, the index has an empty * reference or the property was not found * %-EINVAL on parse error */ int fwnode_property_get_reference_args(const struct fwnode_handle *fwnode, const char *prop, const char *nargs_prop, unsigned int nargs, unsigned int index, struct fwnode_reference_args *args) { int ret; if (IS_ERR_OR_NULL(fwnode)) return -ENOENT; ret = fwnode_call_int_op(fwnode, get_reference_args, prop, nargs_prop, nargs, index, args); if (ret == 0) return ret; if (IS_ERR_OR_NULL(fwnode->secondary)) return ret; return fwnode_call_int_op(fwnode->secondary, get_reference_args, prop, nargs_prop, nargs, index, args); } EXPORT_SYMBOL_GPL(fwnode_property_get_reference_args); /** * fwnode_find_reference - Find named reference to a fwnode_handle * @fwnode: Firmware node where to look for the reference * @name: The name of the reference * @index: Index of the reference * * @index can be used when the named reference holds a table of references. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: a pointer to the reference fwnode, when found. Otherwise, * returns an error pointer. */ struct fwnode_handle *fwnode_find_reference(const struct fwnode_handle *fwnode, const char *name, unsigned int index) { struct fwnode_reference_args args; int ret; ret = fwnode_property_get_reference_args(fwnode, name, NULL, 0, index, &args); return ret ? ERR_PTR(ret) : args.fwnode; } EXPORT_SYMBOL_GPL(fwnode_find_reference); /** * fwnode_get_name - Return the name of a node * @fwnode: The firmware node * * Return: a pointer to the node name, or %NULL. */ const char *fwnode_get_name(const struct fwnode_handle *fwnode) { return fwnode_call_ptr_op(fwnode, get_name); } EXPORT_SYMBOL_GPL(fwnode_get_name); /** * fwnode_get_name_prefix - Return the prefix of node for printing purposes * @fwnode: The firmware node * * Return: the prefix of a node, intended to be printed right before the node. * The prefix works also as a separator between the nodes. */ const char *fwnode_get_name_prefix(const struct fwnode_handle *fwnode) { return fwnode_call_ptr_op(fwnode, get_name_prefix); } /** * fwnode_name_eq - Return true if node name is equal * @fwnode: The firmware node * @name: The name to which to compare the node name * * Compare the name provided as an argument to the name of the node, stopping * the comparison at either NUL or '@' character, whichever comes first. This * function is generally used for comparing node names while ignoring the * possible unit address of the node. * * Return: true if the node name matches with the name provided in the @name * argument, false otherwise. */ bool fwnode_name_eq(const struct fwnode_handle *fwnode, const char *name) { const char *node_name; ptrdiff_t len; node_name = fwnode_get_name(fwnode); if (!node_name) return false; len = strchrnul(node_name, '@') - node_name; return str_has_prefix(node_name, name) == len; } EXPORT_SYMBOL_GPL(fwnode_name_eq); /** * fwnode_get_parent - Return parent firwmare node * @fwnode: Firmware whose parent is retrieved * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: parent firmware node of the given node if possible or %NULL if no * parent was available. */ struct fwnode_handle *fwnode_get_parent(const struct fwnode_handle *fwnode) { return fwnode_call_ptr_op(fwnode, get_parent); } EXPORT_SYMBOL_GPL(fwnode_get_parent); /** * fwnode_get_next_parent - Iterate to the node's parent * @fwnode: Firmware whose parent is retrieved * * This is like fwnode_get_parent() except that it drops the refcount * on the passed node, making it suitable for iterating through a * node's parents. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. Note that this function also puts a reference to @fwnode * unconditionally. * * Return: parent firmware node of the given node if possible or %NULL if no * parent was available. */ struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode) { struct fwnode_handle *parent = fwnode_get_parent(fwnode); fwnode_handle_put(fwnode); return parent; } EXPORT_SYMBOL_GPL(fwnode_get_next_parent); /** * fwnode_count_parents - Return the number of parents a node has * @fwnode: The node the parents of which are to be counted * * Return: the number of parents a node has. */ unsigned int fwnode_count_parents(const struct fwnode_handle *fwnode) { struct fwnode_handle *parent; unsigned int count = 0; fwnode_for_each_parent_node(fwnode, parent) count++; return count; } EXPORT_SYMBOL_GPL(fwnode_count_parents); /** * fwnode_get_nth_parent - Return an nth parent of a node * @fwnode: The node the parent of which is requested * @depth: Distance of the parent from the node * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: the nth parent of a node. If there is no parent at the requested * @depth, %NULL is returned. If @depth is 0, the functionality is equivalent to * fwnode_handle_get(). For @depth == 1, it is fwnode_get_parent() and so on. */ struct fwnode_handle *fwnode_get_nth_parent(struct fwnode_handle *fwnode, unsigned int depth) { struct fwnode_handle *parent; if (depth == 0) return fwnode_handle_get(fwnode); fwnode_for_each_parent_node(fwnode, parent) { if (--depth == 0) return parent; } return NULL; } EXPORT_SYMBOL_GPL(fwnode_get_nth_parent); /** * fwnode_get_next_child_node - Return the next child node handle for a node * @fwnode: Firmware node to find the next child node for. * @child: Handle to one of the node's child nodes or a %NULL handle. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. Note that this function also puts a reference to @child * unconditionally. */ struct fwnode_handle * fwnode_get_next_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { return fwnode_call_ptr_op(fwnode, get_next_child_node, child); } EXPORT_SYMBOL_GPL(fwnode_get_next_child_node); /** * fwnode_get_next_available_child_node - Return the next available child node handle for a node * @fwnode: Firmware node to find the next child node for. * @child: Handle to one of the node's child nodes or a %NULL handle. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. Note that this function also puts a reference to @child * unconditionally. */ struct fwnode_handle * fwnode_get_next_available_child_node(const struct fwnode_handle *fwnode, struct fwnode_handle *child) { struct fwnode_handle *next_child = child; if (IS_ERR_OR_NULL(fwnode)) return NULL; do { next_child = fwnode_get_next_child_node(fwnode, next_child); if (!next_child) return NULL; } while (!fwnode_device_is_available(next_child)); return next_child; } EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node); /** * device_get_next_child_node - Return the next child node handle for a device * @dev: Device to find the next child node for. * @child: Handle to one of the device's child nodes or a %NULL handle. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. Note that this function also puts a reference to @child * unconditionally. */ struct fwnode_handle *device_get_next_child_node(const struct device *dev, struct fwnode_handle *child) { const struct fwnode_handle *fwnode = dev_fwnode(dev); struct fwnode_handle *next; if (IS_ERR_OR_NULL(fwnode)) return NULL; /* Try to find a child in primary fwnode */ next = fwnode_get_next_child_node(fwnode, child); if (next) return next; /* When no more children in primary, continue with secondary */ return fwnode_get_next_child_node(fwnode->secondary, child); } EXPORT_SYMBOL_GPL(device_get_next_child_node); /** * fwnode_get_named_child_node - Return first matching named child node handle * @fwnode: Firmware node to find the named child node for. * @childname: String to match child node name against. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. */ struct fwnode_handle * fwnode_get_named_child_node(const struct fwnode_handle *fwnode, const char *childname) { return fwnode_call_ptr_op(fwnode, get_named_child_node, childname); } EXPORT_SYMBOL_GPL(fwnode_get_named_child_node); /** * device_get_named_child_node - Return first matching named child node handle * @dev: Device to find the named child node for. * @childname: String to match child node name against. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. */ struct fwnode_handle *device_get_named_child_node(const struct device *dev, const char *childname) { return fwnode_get_named_child_node(dev_fwnode(dev), childname); } EXPORT_SYMBOL_GPL(device_get_named_child_node); /** * fwnode_handle_get - Obtain a reference to a device node * @fwnode: Pointer to the device node to obtain the reference to. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: the fwnode handle. */ struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode) { if (!fwnode_has_op(fwnode, get)) return fwnode; return fwnode_call_ptr_op(fwnode, get); } EXPORT_SYMBOL_GPL(fwnode_handle_get); /** * fwnode_device_is_available - check if a device is available for use * @fwnode: Pointer to the fwnode of the device. * * Return: true if device is available for use. Otherwise, returns false. * * For fwnode node types that don't implement the .device_is_available() * operation, this function returns true. */ bool fwnode_device_is_available(const struct fwnode_handle *fwnode) { if (IS_ERR_OR_NULL(fwnode)) return false; if (!fwnode_has_op(fwnode, device_is_available)) return true; return fwnode_call_bool_op(fwnode, device_is_available); } EXPORT_SYMBOL_GPL(fwnode_device_is_available); /** * fwnode_get_child_node_count - return the number of child nodes for a given firmware node * @fwnode: Pointer to the parent firmware node * * Return: the number of child nodes for a given firmware node. */ unsigned int fwnode_get_child_node_count(const struct fwnode_handle *fwnode) { struct fwnode_handle *child; unsigned int count = 0; fwnode_for_each_child_node(fwnode, child) count++; return count; } EXPORT_SYMBOL_GPL(fwnode_get_child_node_count); /** * fwnode_get_named_child_node_count - number of child nodes with given name * @fwnode: Node which child nodes are counted. * @name: String to match child node name against. * * Scan child nodes and count all the nodes with a specific name. Potential * 'number' -ending after the 'at sign' for scanned names is ignored. * E.g.:: * fwnode_get_named_child_node_count(fwnode, "channel"); * would match all the nodes:: * channel { }, channel@0 {}, channel@0xabba {}... * * Return: the number of child nodes with a matching name for a given device. */ unsigned int fwnode_get_named_child_node_count(const struct fwnode_handle *fwnode, const char *name) { struct fwnode_handle *child; unsigned int count = 0; fwnode_for_each_named_child_node(fwnode, child, name) count++; return count; } EXPORT_SYMBOL_GPL(fwnode_get_named_child_node_count); bool device_dma_supported(const struct device *dev) { return fwnode_call_bool_op(dev_fwnode(dev), device_dma_supported); } EXPORT_SYMBOL_GPL(device_dma_supported); enum dev_dma_attr device_get_dma_attr(const struct device *dev) { if (!fwnode_has_op(dev_fwnode(dev), device_get_dma_attr)) return DEV_DMA_NOT_SUPPORTED; return fwnode_call_int_op(dev_fwnode(dev), device_get_dma_attr); } EXPORT_SYMBOL_GPL(device_get_dma_attr); /** * fwnode_get_phy_mode - Get phy mode for given firmware node * @fwnode: Pointer to the given node * * The function gets phy interface string from property 'phy-mode' or * 'phy-connection-type', and return its index in phy_modes table, or errno in * error case. */ int fwnode_get_phy_mode(const struct fwnode_handle *fwnode) { const char *pm; int err, i; err = fwnode_property_read_string(fwnode, "phy-mode", &pm); if (err < 0) err = fwnode_property_read_string(fwnode, "phy-connection-type", &pm); if (err < 0) return err; for (i = 0; i < PHY_INTERFACE_MODE_MAX; i++) if (!strcasecmp(pm, phy_modes(i))) return i; return -ENODEV; } EXPORT_SYMBOL_GPL(fwnode_get_phy_mode); /** * device_get_phy_mode - Get phy mode for given device * @dev: Pointer to the given device * * The function gets phy interface string from property 'phy-mode' or * 'phy-connection-type', and return its index in phy_modes table, or errno in * error case. */ int device_get_phy_mode(struct device *dev) { return fwnode_get_phy_mode(dev_fwnode(dev)); } EXPORT_SYMBOL_GPL(device_get_phy_mode); /** * fwnode_iomap - Maps the memory mapped IO for a given fwnode * @fwnode: Pointer to the firmware node * @index: Index of the IO range * * Return: a pointer to the mapped memory. */ void __iomem *fwnode_iomap(struct fwnode_handle *fwnode, int index) { return fwnode_call_ptr_op(fwnode, iomap, index); } EXPORT_SYMBOL(fwnode_iomap); /** * fwnode_irq_get - Get IRQ directly from a fwnode * @fwnode: Pointer to the firmware node * @index: Zero-based index of the IRQ * * Return: Linux IRQ number on success. Negative errno on failure. */ int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index) { int ret; ret = fwnode_call_int_op(fwnode, irq_get, index); /* We treat mapping errors as invalid case */ if (ret == 0) return -EINVAL; return ret; } EXPORT_SYMBOL(fwnode_irq_get); /** * fwnode_irq_get_byname - Get IRQ from a fwnode using its name * @fwnode: Pointer to the firmware node * @name: IRQ name * * Description: * Find a match to the string @name in the 'interrupt-names' string array * in _DSD for ACPI, or of_node for Device Tree. Then get the Linux IRQ * number of the IRQ resource corresponding to the index of the matched * string. * * Return: Linux IRQ number on success, or negative errno otherwise. */ int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name) { int index; if (!name) return -EINVAL; index = fwnode_property_match_string(fwnode, "interrupt-names", name); if (index < 0) return index; return fwnode_irq_get(fwnode, index); } EXPORT_SYMBOL(fwnode_irq_get_byname); /** * fwnode_graph_get_next_endpoint - Get next endpoint firmware node * @fwnode: Pointer to the parent firmware node * @prev: Previous endpoint node or %NULL to get the first * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. Note that this function also puts a reference to @prev * unconditionally. * * Return: an endpoint firmware node pointer or %NULL if no more endpoints * are available. */ struct fwnode_handle * fwnode_graph_get_next_endpoint(const struct fwnode_handle *fwnode, struct fwnode_handle *prev) { struct fwnode_handle *ep, *port_parent = NULL; const struct fwnode_handle *parent; /* * If this function is in a loop and the previous iteration returned * an endpoint from fwnode->secondary, then we need to use the secondary * as parent rather than @fwnode. */ if (prev) { port_parent = fwnode_graph_get_port_parent(prev); parent = port_parent; } else { parent = fwnode; } if (IS_ERR_OR_NULL(parent)) return NULL; ep = fwnode_call_ptr_op(parent, graph_get_next_endpoint, prev); if (ep) goto out_put_port_parent; ep = fwnode_graph_get_next_endpoint(parent->secondary, NULL); out_put_port_parent: fwnode_handle_put(port_parent); return ep; } EXPORT_SYMBOL_GPL(fwnode_graph_get_next_endpoint); /** * fwnode_graph_get_port_parent - Return the device fwnode of a port endpoint * @endpoint: Endpoint firmware node of the port * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: the firmware node of the device the @endpoint belongs to. */ struct fwnode_handle * fwnode_graph_get_port_parent(const struct fwnode_handle *endpoint) { struct fwnode_handle *port, *parent; port = fwnode_get_parent(endpoint); parent = fwnode_call_ptr_op(port, graph_get_port_parent); fwnode_handle_put(port); return parent; } EXPORT_SYMBOL_GPL(fwnode_graph_get_port_parent); /** * fwnode_graph_get_remote_port_parent - Return fwnode of a remote device * @fwnode: Endpoint firmware node pointing to the remote endpoint * * Extracts firmware node of a remote device the @fwnode points to. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. */ struct fwnode_handle * fwnode_graph_get_remote_port_parent(const struct fwnode_handle *fwnode) { struct fwnode_handle *endpoint, *parent; endpoint = fwnode_graph_get_remote_endpoint(fwnode); parent = fwnode_graph_get_port_parent(endpoint); fwnode_handle_put(endpoint); return parent; } EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port_parent); /** * fwnode_graph_get_remote_port - Return fwnode of a remote port * @fwnode: Endpoint firmware node pointing to the remote endpoint * * Extracts firmware node of a remote port the @fwnode points to. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. */ struct fwnode_handle * fwnode_graph_get_remote_port(const struct fwnode_handle *fwnode) { return fwnode_get_next_parent(fwnode_graph_get_remote_endpoint(fwnode)); } EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_port); /** * fwnode_graph_get_remote_endpoint - Return fwnode of a remote endpoint * @fwnode: Endpoint firmware node pointing to the remote endpoint * * Extracts firmware node of a remote endpoint the @fwnode points to. * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. */ struct fwnode_handle * fwnode_graph_get_remote_endpoint(const struct fwnode_handle *fwnode) { return fwnode_call_ptr_op(fwnode, graph_get_remote_endpoint); } EXPORT_SYMBOL_GPL(fwnode_graph_get_remote_endpoint); static bool fwnode_graph_remote_available(struct fwnode_handle *ep) { struct fwnode_handle *dev_node; bool available; dev_node = fwnode_graph_get_remote_port_parent(ep); available = fwnode_device_is_available(dev_node); fwnode_handle_put(dev_node); return available; } /** * fwnode_graph_get_endpoint_by_id - get endpoint by port and endpoint numbers * @fwnode: parent fwnode_handle containing the graph * @port: identifier of the port node * @endpoint: identifier of the endpoint node under the port node * @flags: fwnode lookup flags * * The caller is responsible for calling fwnode_handle_put() on the returned * fwnode pointer. * * Return: the fwnode handle of the local endpoint corresponding the port and * endpoint IDs or %NULL if not found. * * If FWNODE_GRAPH_ENDPOINT_NEXT is passed in @flags and the specified endpoint * has not been found, look for the closest endpoint ID greater than the * specified one and return the endpoint that corresponds to it, if present. * * Does not return endpoints that belong to disabled devices or endpoints that * are unconnected, unless FWNODE_GRAPH_DEVICE_DISABLED is passed in @flags. */ struct fwnode_handle * fwnode_graph_get_endpoint_by_id(const struct fwnode_handle *fwnode, u32 port, u32 endpoint, unsigned long flags) { struct fwnode_handle *ep, *best_ep = NULL; unsigned int best_ep_id = 0; bool endpoint_next = flags & FWNODE_GRAPH_ENDPOINT_NEXT; bool enabled_only = !(flags & FWNODE_GRAPH_DEVICE_DISABLED); fwnode_graph_for_each_endpoint(fwnode, ep) { struct fwnode_endpoint fwnode_ep = { 0 }; int ret; if (enabled_only && !fwnode_graph_remote_available(ep)) continue; ret = fwnode_graph_parse_endpoint(ep, &fwnode_ep); if (ret < 0) continue; if (fwnode_ep.port != port) continue; if (fwnode_ep.id == endpoint) return ep; if (!endpoint_next) continue; /* * If the endpoint that has just been found is not the first * matching one and the ID of the one found previously is closer * to the requested endpoint ID, skip it. */ if (fwnode_ep.id < endpoint || (best_ep && best_ep_id < fwnode_ep.id)) continue; fwnode_handle_put(best_ep); best_ep = fwnode_handle_get(ep); best_ep_id = fwnode_ep.id; } return best_ep; } EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_by_id); /** * fwnode_graph_get_endpoint_count - Count endpoints on a device node * @fwnode: The node related to a device * @flags: fwnode lookup flags * Count endpoints in a device node. * * If FWNODE_GRAPH_DEVICE_DISABLED flag is specified, also unconnected endpoints * and endpoints connected to disabled devices are counted. */ unsigned int fwnode_graph_get_endpoint_count(const struct fwnode_handle *fwnode, unsigned long flags) { struct fwnode_handle *ep; unsigned int count = 0; fwnode_graph_for_each_endpoint(fwnode, ep) { if (flags & FWNODE_GRAPH_DEVICE_DISABLED || fwnode_graph_remote_available(ep)) count++; } return count; } EXPORT_SYMBOL_GPL(fwnode_graph_get_endpoint_count); /** * fwnode_graph_parse_endpoint - parse common endpoint node properties * @fwnode: pointer to endpoint fwnode_handle * @endpoint: pointer to the fwnode endpoint data structure * * Parse @fwnode representing a graph endpoint node and store the * information in @endpoint. The caller must hold a reference to * @fwnode. */ int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint) { memset(endpoint, 0, sizeof(*endpoint)); return fwnode_call_int_op(fwnode, graph_parse_endpoint, endpoint); } EXPORT_SYMBOL(fwnode_graph_parse_endpoint); const void *device_get_match_data(const struct device *dev) { return fwnode_call_ptr_op(dev_fwnode(dev), device_get_match_data, dev); } EXPORT_SYMBOL_GPL(device_get_match_data); static unsigned int fwnode_graph_devcon_matches(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match, void **matches, unsigned int matches_len) { struct fwnode_handle *node; struct fwnode_handle *ep; unsigned int count = 0; void *ret; fwnode_graph_for_each_endpoint(fwnode, ep) { if (matches && count >= matches_len) { fwnode_handle_put(ep); break; } node = fwnode_graph_get_remote_port_parent(ep); if (!fwnode_device_is_available(node)) { fwnode_handle_put(node); continue; } ret = match(node, con_id, data); fwnode_handle_put(node); if (ret) { if (matches) matches[count] = ret; count++; } } return count; } static unsigned int fwnode_devcon_matches(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match, void **matches, unsigned int matches_len) { struct fwnode_handle *node; unsigned int count = 0; unsigned int i; void *ret; for (i = 0; ; i++) { if (matches && count >= matches_len) break; node = fwnode_find_reference(fwnode, con_id, i); if (IS_ERR(node)) break; ret = match(node, NULL, data); fwnode_handle_put(node); if (ret) { if (matches) matches[count] = ret; count++; } } return count; } /** * fwnode_connection_find_match - Find connection from a device node * @fwnode: Device node with the connection * @con_id: Identifier for the connection * @data: Data for the match function * @match: Function to check and convert the connection description * * Find a connection with unique identifier @con_id between @fwnode and another * device node. @match will be used to convert the connection description to * data the caller is expecting to be returned. */ void *fwnode_connection_find_match(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match) { unsigned int count; void *ret; if (!fwnode || !match) return NULL; count = fwnode_graph_devcon_matches(fwnode, con_id, data, match, &ret, 1); if (count) return ret; count = fwnode_devcon_matches(fwnode, con_id, data, match, &ret, 1); return count ? ret : NULL; } EXPORT_SYMBOL_GPL(fwnode_connection_find_match); /** * fwnode_connection_find_matches - Find connections from a device node * @fwnode: Device node with the connection * @con_id: Identifier for the connection * @data: Data for the match function * @match: Function to check and convert the connection description * @matches: (Optional) array of pointers to fill with matches * @matches_len: Length of @matches * * Find up to @matches_len connections with unique identifier @con_id between * @fwnode and other device nodes. @match will be used to convert the * connection description to data the caller is expecting to be returned * through the @matches array. * * If @matches is %NULL @matches_len is ignored and the total number of resolved * matches is returned. * * Return: Number of matches resolved, or negative errno. */ int fwnode_connection_find_matches(const struct fwnode_handle *fwnode, const char *con_id, void *data, devcon_match_fn_t match, void **matches, unsigned int matches_len) { unsigned int count_graph; unsigned int count_ref; if (!fwnode || !match) return -EINVAL; count_graph = fwnode_graph_devcon_matches(fwnode, con_id, data, match, matches, matches_len); if (matches) { matches += count_graph; matches_len -= count_graph; } count_ref = fwnode_devcon_matches(fwnode, con_id, data, match, matches, matches_len); return count_graph + count_ref; } EXPORT_SYMBOL_GPL(fwnode_connection_find_matches);
16 16 16 22 22 1 1 1 1 1 3 1 3 5 3 5 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 // SPDX-License-Identifier: GPL-2.0-only /* * Sync File validation framework and debug information * * Copyright (C) 2012 Google, Inc. */ #include <linux/debugfs.h> #include "sync_debug.h" static struct dentry *dbgfs; static LIST_HEAD(sync_timeline_list_head); static DEFINE_SPINLOCK(sync_timeline_list_lock); void sync_timeline_debug_add(struct sync_timeline *obj) { unsigned long flags; spin_lock_irqsave(&sync_timeline_list_lock, flags); list_add_tail(&obj->sync_timeline_list, &sync_timeline_list_head); spin_unlock_irqrestore(&sync_timeline_list_lock, flags); } void sync_timeline_debug_remove(struct sync_timeline *obj) { unsigned long flags; spin_lock_irqsave(&sync_timeline_list_lock, flags); list_del(&obj->sync_timeline_list); spin_unlock_irqrestore(&sync_timeline_list_lock, flags); } static const char *sync_status_str(int status) { if (status < 0) return "error"; if (status > 0) return "signaled"; return "active"; } static void sync_print_fence(struct seq_file *s, struct dma_fence *fence, bool show) { struct sync_timeline *parent = dma_fence_parent(fence); int status; status = dma_fence_get_status_locked(fence); seq_printf(s, " %s%sfence %s", show ? parent->name : "", show ? "_" : "", sync_status_str(status)); if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags)) { struct timespec64 ts64 = ktime_to_timespec64(fence->timestamp); seq_printf(s, "@%lld.%09ld", (s64)ts64.tv_sec, ts64.tv_nsec); } seq_printf(s, ": %lld", fence->seqno); seq_printf(s, " / %d", parent->value); seq_putc(s, '\n'); } static void sync_print_obj(struct seq_file *s, struct sync_timeline *obj) { struct list_head *pos; seq_printf(s, "%s: %d\n", obj->name, obj->value); spin_lock(&obj->lock); /* Caller already disabled IRQ. */ list_for_each(pos, &obj->pt_list) { struct sync_pt *pt = container_of(pos, struct sync_pt, link); sync_print_fence(s, &pt->base, false); } spin_unlock(&obj->lock); } static int sync_info_debugfs_show(struct seq_file *s, void *unused) { struct list_head *pos; seq_puts(s, "objs:\n--------------\n"); spin_lock_irq(&sync_timeline_list_lock); list_for_each(pos, &sync_timeline_list_head) { struct sync_timeline *obj = container_of(pos, struct sync_timeline, sync_timeline_list); sync_print_obj(s, obj); seq_putc(s, '\n'); } spin_unlock_irq(&sync_timeline_list_lock); seq_puts(s, "fences:\n--------------\n"); return 0; } DEFINE_SHOW_ATTRIBUTE(sync_info_debugfs); static __init int sync_debugfs_init(void) { dbgfs = debugfs_create_dir("sync", NULL); /* * The debugfs files won't ever get removed and thus, there is * no need to protect it against removal races. The use of * debugfs_create_file_unsafe() is actually safe here. */ debugfs_create_file_unsafe("info", 0444, dbgfs, NULL, &sync_info_debugfs_fops); debugfs_create_file_unsafe("sw_sync", 0644, dbgfs, NULL, &sw_sync_debugfs_fops); return 0; } late_initcall(sync_debugfs_init);
3 3 3 1 9 3 1 3 7 8 2 1 3 1 17 17 5546 187 208 2758 7 3765 3214 3 17 3186 45 3191 228 3188 3125 3185 3196 3196 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_SCHED_H #define _LINUX_SCHED_H /* * Define 'struct task_struct' and provide the main scheduler * APIs (schedule(), wakeup variants, etc.) */ #include <uapi/linux/sched.h> #include <asm/current.h> #include <asm/processor.h> #include <linux/thread_info.h> #include <linux/preempt.h> #include <linux/cpumask_types.h> #include <linux/cache.h> #include <linux/irqflags_types.h> #include <linux/smp_types.h> #include <linux/pid_types.h> #include <linux/sem_types.h> #include <linux/shm.h> #include <linux/kmsan_types.h> #include <linux/mutex_types.h> #include <linux/plist_types.h> #include <linux/hrtimer_types.h> #include <linux/timer_types.h> #include <linux/seccomp_types.h> #include <linux/nodemask_types.h> #include <linux/refcount_types.h> #include <linux/resource.h> #include <linux/latencytop.h> #include <linux/sched/prio.h> #include <linux/sched/types.h> #include <linux/signal_types.h> #include <linux/spinlock.h> #include <linux/syscall_user_dispatch_types.h> #include <linux/mm_types_task.h> #include <linux/netdevice_xmit.h> #include <linux/task_io_accounting.h> #include <linux/posix-timers_types.h> #include <linux/restart_block.h> #include <uapi/linux/rseq.h> #include <linux/seqlock_types.h> #include <linux/kcsan.h> #include <linux/rv.h> #include <linux/uidgid_types.h> #include <linux/tracepoint-defs.h> #include <linux/unwind_deferred_types.h> #include <asm/kmap_size.h> #ifndef COMPILE_OFFSETS #include <generated/rq-offsets.h> #endif /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; struct bio_list; struct blk_plug; struct bpf_local_storage; struct bpf_run_ctx; struct bpf_net_context; struct capture_control; struct cfs_rq; struct fs_struct; struct futex_pi_state; struct io_context; struct io_uring_task; struct mempolicy; struct nameidata; struct nsproxy; struct perf_event_context; struct perf_ctx_data; struct pid_namespace; struct pipe_inode_info; struct rcu_node; struct reclaim_state; struct robust_list_head; struct root_domain; struct rq; struct sched_attr; struct sched_dl_entity; struct seq_file; struct sighand_struct; struct signal_struct; struct task_delay_info; struct task_group; struct task_struct; struct user_event_mm; #include <linux/sched/ext.h> /* * Task state bitmask. NOTE! These bits are also * encoded in fs/proc/array.c: get_task_state(). * * We have two separate sets of flags: task->__state * is about runnability, while task->exit_state are * about the task exiting. Confusing, but this way * modifying one set can't modify the other one by * mistake. */ /* Used in tsk->__state: */ #define TASK_RUNNING 0x00000000 #define TASK_INTERRUPTIBLE 0x00000001 #define TASK_UNINTERRUPTIBLE 0x00000002 #define __TASK_STOPPED 0x00000004 #define __TASK_TRACED 0x00000008 /* Used in tsk->exit_state: */ #define EXIT_DEAD 0x00000010 #define EXIT_ZOMBIE 0x00000020 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* Used in tsk->__state again: */ #define TASK_PARKED 0x00000040 #define TASK_DEAD 0x00000080 #define TASK_WAKEKILL 0x00000100 #define TASK_WAKING 0x00000200 #define TASK_NOLOAD 0x00000400 #define TASK_NEW 0x00000800 #define TASK_RTLOCK_WAIT 0x00001000 #define TASK_FREEZABLE 0x00002000 #define __TASK_FREEZABLE_UNSAFE (0x00004000 * IS_ENABLED(CONFIG_LOCKDEP)) #define TASK_FROZEN 0x00008000 #define TASK_STATE_MAX 0x00010000 #define TASK_ANY (TASK_STATE_MAX-1) /* * DO NOT ADD ANY NEW USERS ! */ #define TASK_FREEZABLE_UNSAFE (TASK_FREEZABLE | __TASK_FREEZABLE_UNSAFE) /* Convenience macros for the sake of set_current_state: */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) #define TASK_TRACED __TASK_TRACED #define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) /* Convenience macros for the sake of wake_up(): */ #define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) /* get_task_state(): */ #define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \ __TASK_TRACED | EXIT_DEAD | EXIT_ZOMBIE | \ TASK_PARKED) #define task_is_running(task) (READ_ONCE((task)->__state) == TASK_RUNNING) #define task_is_traced(task) ((READ_ONCE(task->jobctl) & JOBCTL_TRACED) != 0) #define task_is_stopped(task) ((READ_ONCE(task->jobctl) & JOBCTL_STOPPED) != 0) #define task_is_stopped_or_traced(task) ((READ_ONCE(task->jobctl) & (JOBCTL_STOPPED | JOBCTL_TRACED)) != 0) /* * Special states are those that do not use the normal wait-loop pattern. See * the comment with set_special_state(). */ #define is_special_task_state(state) \ ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_PARKED | \ TASK_DEAD | TASK_FROZEN)) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP # define debug_normal_state_change(state_value) \ do { \ WARN_ON_ONCE(is_special_task_state(state_value)); \ current->task_state_change = _THIS_IP_; \ } while (0) # define debug_special_state_change(state_value) \ do { \ WARN_ON_ONCE(!is_special_task_state(state_value)); \ current->task_state_change = _THIS_IP_; \ } while (0) # define debug_rtlock_wait_set_state() \ do { \ current->saved_state_change = current->task_state_change;\ current->task_state_change = _THIS_IP_; \ } while (0) # define debug_rtlock_wait_restore_state() \ do { \ current->task_state_change = current->saved_state_change;\ } while (0) #else # define debug_normal_state_change(cond) do { } while (0) # define debug_special_state_change(cond) do { } while (0) # define debug_rtlock_wait_set_state() do { } while (0) # define debug_rtlock_wait_restore_state() do { } while (0) #endif #define trace_set_current_state(state_value) \ do { \ if (tracepoint_enabled(sched_set_state_tp)) \ __trace_set_current_state(state_value); \ } while (0) /* * set_current_state() includes a barrier so that the write of current->__state * is correctly serialised wrt the caller's subsequent test of whether to * actually sleep: * * for (;;) { * set_current_state(TASK_UNINTERRUPTIBLE); * if (CONDITION) * break; * * schedule(); * } * __set_current_state(TASK_RUNNING); * * If the caller does not need such serialisation (because, for instance, the * CONDITION test and condition change and wakeup are under the same lock) then * use __set_current_state(). * * The above is typically ordered against the wakeup, which does: * * CONDITION = 1; * wake_up_state(p, TASK_UNINTERRUPTIBLE); * * where wake_up_state()/try_to_wake_up() executes a full memory barrier before * accessing p->__state. * * Wakeup will do: if (@state & p->__state) p->__state = TASK_RUNNING, that is, * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). * * However, with slightly different timing the wakeup TASK_RUNNING store can * also collide with the TASK_UNINTERRUPTIBLE store. Losing that store is not * a problem either because that will result in one extra go around the loop * and our @cond test will save the day. * * Also see the comments of try_to_wake_up(). */ #define __set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ trace_set_current_state(state_value); \ WRITE_ONCE(current->__state, (state_value)); \ } while (0) #define set_current_state(state_value) \ do { \ debug_normal_state_change((state_value)); \ trace_set_current_state(state_value); \ smp_store_mb(current->__state, (state_value)); \ } while (0) /* * set_special_state() should be used for those states when the blocking task * can not use the regular condition based wait-loop. In that case we must * serialize against wakeups such that any possible in-flight TASK_RUNNING * stores will not collide with our state change. */ #define set_special_state(state_value) \ do { \ unsigned long flags; /* may shadow */ \ \ raw_spin_lock_irqsave(&current->pi_lock, flags); \ debug_special_state_change((state_value)); \ trace_set_current_state(state_value); \ WRITE_ONCE(current->__state, (state_value)); \ raw_spin_unlock_irqrestore(&current->pi_lock, flags); \ } while (0) /* * PREEMPT_RT specific variants for "sleeping" spin/rwlocks * * RT's spin/rwlock substitutions are state preserving. The state of the * task when blocking on the lock is saved in task_struct::saved_state and * restored after the lock has been acquired. These operations are * serialized by task_struct::pi_lock against try_to_wake_up(). Any non RT * lock related wakeups while the task is blocked on the lock are * redirected to operate on task_struct::saved_state to ensure that these * are not dropped. On restore task_struct::saved_state is set to * TASK_RUNNING so any wakeup attempt redirected to saved_state will fail. * * The lock operation looks like this: * * current_save_and_set_rtlock_wait_state(); * for (;;) { * if (try_lock()) * break; * raw_spin_unlock_irq(&lock->wait_lock); * schedule_rtlock(); * raw_spin_lock_irq(&lock->wait_lock); * set_current_state(TASK_RTLOCK_WAIT); * } * current_restore_rtlock_saved_state(); */ #define current_save_and_set_rtlock_wait_state() \ do { \ lockdep_assert_irqs_disabled(); \ raw_spin_lock(&current->pi_lock); \ current->saved_state = current->__state; \ debug_rtlock_wait_set_state(); \ trace_set_current_state(TASK_RTLOCK_WAIT); \ WRITE_ONCE(current->__state, TASK_RTLOCK_WAIT); \ raw_spin_unlock(&current->pi_lock); \ } while (0); #define current_restore_rtlock_saved_state() \ do { \ lockdep_assert_irqs_disabled(); \ raw_spin_lock(&current->pi_lock); \ debug_rtlock_wait_restore_state(); \ trace_set_current_state(current->saved_state); \ WRITE_ONCE(current->__state, current->saved_state); \ current->saved_state = TASK_RUNNING; \ raw_spin_unlock(&current->pi_lock); \ } while (0); #define get_current_state() READ_ONCE(current->__state) /* * Define the task command name length as enum, then it can be visible to * BPF programs. */ enum { TASK_COMM_LEN = 16, }; extern void sched_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX extern long schedule_timeout(long timeout); extern long schedule_timeout_interruptible(long timeout); extern long schedule_timeout_killable(long timeout); extern long schedule_timeout_uninterruptible(long timeout); extern long schedule_timeout_idle(long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); asmlinkage void preempt_schedule_irq(void); #ifdef CONFIG_PREEMPT_RT extern void schedule_rtlock(void); #endif extern int __must_check io_schedule_prepare(void); extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); extern void io_schedule(void); /* wrapper functions to trace from this header file */ DECLARE_TRACEPOINT(sched_set_state_tp); extern void __trace_set_current_state(int state_value); DECLARE_TRACEPOINT(sched_set_need_resched_tp); extern void __trace_set_need_resched(struct task_struct *curr, int tif); /** * struct prev_cputime - snapshot of system and user cputime * @utime: time spent in user mode * @stime: time spent in system mode * @lock: protects the above two fields * * Stores previous user/system time values such that we can guarantee * monotonicity. */ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE u64 utime; u64 stime; raw_spinlock_t lock; #endif }; enum vtime_state { /* Task is sleeping or running in a CPU with VTIME inactive: */ VTIME_INACTIVE = 0, /* Task is idle */ VTIME_IDLE, /* Task runs in kernelspace in a CPU with VTIME active: */ VTIME_SYS, /* Task runs in userspace in a CPU with VTIME active: */ VTIME_USER, /* Task runs as guests in a CPU with VTIME active: */ VTIME_GUEST, }; struct vtime { seqcount_t seqcount; unsigned long long starttime; enum vtime_state state; unsigned int cpu; u64 utime; u64 stime; u64 gtime; }; /* * Utilization clamp constraints. * @UCLAMP_MIN: Minimum utilization * @UCLAMP_MAX: Maximum utilization * @UCLAMP_CNT: Utilization clamp constraints count */ enum uclamp_id { UCLAMP_MIN = 0, UCLAMP_MAX, UCLAMP_CNT }; extern struct root_domain def_root_domain; extern struct mutex sched_domains_mutex; extern void sched_domains_mutex_lock(void); extern void sched_domains_mutex_unlock(void); struct sched_param { int sched_priority; }; struct sched_info { #ifdef CONFIG_SCHED_INFO /* Cumulative counters: */ /* # of times we have run on this CPU: */ unsigned long pcount; /* Time spent waiting on a runqueue: */ unsigned long long run_delay; /* Max time spent waiting on a runqueue: */ unsigned long long max_run_delay; /* Min time spent waiting on a runqueue: */ unsigned long long min_run_delay; /* Timestamps: */ /* When did we last run on a CPU? */ unsigned long long last_arrival; /* When were we last queued to run? */ unsigned long long last_queued; #endif /* CONFIG_SCHED_INFO */ }; /* * Integer metrics need fixed point arithmetic, e.g., sched/fair * has a few: load, load_avg, util_avg, freq, and capacity. * * We define a basic fixed point arithmetic range, and then formalize * all these metrics based on that basic range. */ # define SCHED_FIXEDPOINT_SHIFT 10 # define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT) /* Increase resolution of cpu_capacity calculations */ # define SCHED_CAPACITY_SHIFT SCHED_FIXEDPOINT_SHIFT # define SCHED_CAPACITY_SCALE (1L << SCHED_CAPACITY_SHIFT) struct load_weight { unsigned long weight; u32 inv_weight; }; /* * The load/runnable/util_avg accumulates an infinite geometric series * (see __update_load_avg_cfs_rq() in kernel/sched/pelt.c). * * [load_avg definition] * * load_avg = runnable% * scale_load_down(load) * * [runnable_avg definition] * * runnable_avg = runnable% * SCHED_CAPACITY_SCALE * * [util_avg definition] * * util_avg = running% * SCHED_CAPACITY_SCALE * * where runnable% is the time ratio that a sched_entity is runnable and * running% the time ratio that a sched_entity is running. * * For cfs_rq, they are the aggregated values of all runnable and blocked * sched_entities. * * The load/runnable/util_avg doesn't directly factor frequency scaling and CPU * capacity scaling. The scaling is done through the rq_clock_pelt that is used * for computing those signals (see update_rq_clock_pelt()) * * N.B., the above ratios (runnable% and running%) themselves are in the * range of [0, 1]. To do fixed point arithmetics, we therefore scale them * to as large a range as necessary. This is for example reflected by * util_avg's SCHED_CAPACITY_SCALE. * * [Overflow issue] * * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities * with the highest load (=88761), always runnable on a single cfs_rq, * and should not overflow as the number already hits PID_MAX_LIMIT. * * For all other cases (including 32-bit kernels), struct load_weight's * weight will overflow first before we do, because: * * Max(load_avg) <= Max(load.weight) * * Then it is the load_weight's responsibility to consider overflow * issues. */ struct sched_avg { u64 last_update_time; u64 load_sum; u64 runnable_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; unsigned long runnable_avg; unsigned long util_avg; unsigned int util_est; } ____cacheline_aligned; /* * The UTIL_AVG_UNCHANGED flag is used to synchronize util_est with util_avg * updates. When a task is dequeued, its util_est should not be updated if its * util_avg has not been updated in the meantime. * This information is mapped into the MSB bit of util_est at dequeue time. * Since max value of util_est for a task is 1024 (PELT util_avg for a task) * it is safe to use MSB. */ #define UTIL_EST_WEIGHT_SHIFT 2 #define UTIL_AVG_UNCHANGED 0x80000000 struct sched_statistics { #ifdef CONFIG_SCHEDSTATS u64 wait_start; u64 wait_max; u64 wait_count; u64 wait_sum; u64 iowait_count; u64 iowait_sum; u64 sleep_start; u64 sleep_max; s64 sum_sleep_runtime; u64 block_start; u64 block_max; s64 sum_block_runtime; s64 exec_max; u64 slice_max; u64 nr_migrations_cold; u64 nr_failed_migrations_affine; u64 nr_failed_migrations_running; u64 nr_failed_migrations_hot; u64 nr_forced_migrations; u64 nr_wakeups; u64 nr_wakeups_sync; u64 nr_wakeups_migrate; u64 nr_wakeups_local; u64 nr_wakeups_remote; u64 nr_wakeups_affine; u64 nr_wakeups_affine_attempts; u64 nr_wakeups_passive; u64 nr_wakeups_idle; #ifdef CONFIG_SCHED_CORE u64 core_forceidle_sum; #endif #endif /* CONFIG_SCHEDSTATS */ } ____cacheline_aligned; struct sched_entity { /* For load-balancing: */ struct load_weight load; struct rb_node run_node; u64 deadline; u64 min_vruntime; u64 min_slice; struct list_head group_node; unsigned char on_rq; unsigned char sched_delayed; unsigned char rel_deadline; unsigned char custom_slice; /* hole */ u64 exec_start; u64 sum_exec_runtime; u64 prev_sum_exec_runtime; u64 vruntime; union { /* * When !@on_rq this field is vlag. * When cfs_rq->curr == se (which implies @on_rq) * this field is vprot. See protect_slice(). */ s64 vlag; u64 vprot; }; u64 slice; u64 nr_migrations; #ifdef CONFIG_FAIR_GROUP_SCHED int depth; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; /* cached value of my_q->h_nr_running */ unsigned long runnable_weight; #endif /* * Per entity load average tracking. * * Put into separate cache line so it does not * collide with read-mostly values above. */ struct sched_avg avg; }; struct sched_rt_entity { struct list_head run_list; unsigned long timeout; unsigned long watchdog_stamp; unsigned int time_slice; unsigned short on_rq; unsigned short on_list; struct sched_rt_entity *back; #ifdef CONFIG_RT_GROUP_SCHED struct sched_rt_entity *parent; /* rq on which this entity is (to be) queued: */ struct rt_rq *rt_rq; /* rq "owned" by this entity/group: */ struct rt_rq *my_q; #endif } __randomize_layout; typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *); typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *); struct sched_dl_entity { struct rb_node rb_node; /* * Original scheduling parameters. Copied here from sched_attr * during sched_setattr(), they will remain the same until * the next sched_setattr(). */ u64 dl_runtime; /* Maximum runtime for each instance */ u64 dl_deadline; /* Relative deadline of each instance */ u64 dl_period; /* Separation of two instances (period) */ u64 dl_bw; /* dl_runtime / dl_period */ u64 dl_density; /* dl_runtime / dl_deadline */ /* * Actual scheduling parameters. Initialized with the values above, * they are continuously updated during task execution. Note that * the remaining runtime could be < 0 in case we are in overrun. */ s64 runtime; /* Remaining runtime for this instance */ u64 deadline; /* Absolute deadline for this instance */ unsigned int flags; /* Specifying the scheduler behaviour */ /* * Some bool flags: * * @dl_throttled tells if we exhausted the runtime. If so, the * task has to wait for a replenishment to be performed at the * next firing of dl_timer. * * @dl_yielded tells if task gave up the CPU before consuming * all its available runtime during the last job. * * @dl_non_contending tells if the task is inactive while still * contributing to the active utilization. In other words, it * indicates if the inactive timer has been armed and its handler * has not been executed yet. This flag is useful to avoid race * conditions between the inactive timer handler and the wakeup * code. * * @dl_overrun tells if the task asked to be informed about runtime * overruns. * * @dl_server tells if this is a server entity. * * @dl_defer tells if this is a deferred or regular server. For * now only defer server exists. * * @dl_defer_armed tells if the deferrable server is waiting * for the replenishment timer to activate it. * * @dl_server_active tells if the dlserver is active(started). * dlserver is started on first cfs enqueue on an idle runqueue * and is stopped when a dequeue results in 0 cfs tasks on the * runqueue. In other words, dlserver is active only when cpu's * runqueue has atleast one cfs task. * * @dl_defer_running tells if the deferrable server is actually * running, skipping the defer phase. */ unsigned int dl_throttled : 1; unsigned int dl_yielded : 1; unsigned int dl_non_contending : 1; unsigned int dl_overrun : 1; unsigned int dl_server : 1; unsigned int dl_server_active : 1; unsigned int dl_defer : 1; unsigned int dl_defer_armed : 1; unsigned int dl_defer_running : 1; /* * Bandwidth enforcement timer. Each -deadline task has its * own bandwidth to be enforced, thus we need one timer per task. */ struct hrtimer dl_timer; /* * Inactive timer, responsible for decreasing the active utilization * at the "0-lag time". When a -deadline task blocks, it contributes * to GRUB's active utilization until the "0-lag time", hence a * timer is needed to decrease the active utilization at the correct * time. */ struct hrtimer inactive_timer; /* * Bits for DL-server functionality. Also see the comment near * dl_server_update(). * * @rq the runqueue this server is for * * @server_has_tasks() returns true if @server_pick return a * runnable task. */ struct rq *rq; dl_server_pick_f server_pick_task; #ifdef CONFIG_RT_MUTEXES /* * Priority Inheritance. When a DEADLINE scheduling entity is boosted * pi_se points to the donor, otherwise points to the dl_se it belongs * to (the original one/itself). */ struct sched_dl_entity *pi_se; #endif }; #ifdef CONFIG_UCLAMP_TASK /* Number of utilization clamp buckets (shorter alias) */ #define UCLAMP_BUCKETS CONFIG_UCLAMP_BUCKETS_COUNT /* * Utilization clamp for a scheduling entity * @value: clamp value "assigned" to a se * @bucket_id: bucket index corresponding to the "assigned" value * @active: the se is currently refcounted in a rq's bucket * @user_defined: the requested clamp value comes from user-space * * The bucket_id is the index of the clamp bucket matching the clamp value * which is pre-computed and stored to avoid expensive integer divisions from * the fast path. * * The active bit is set whenever a task has got an "effective" value assigned, * which can be different from the clamp value "requested" from user-space. * This allows to know a task is refcounted in the rq's bucket corresponding * to the "effective" bucket_id. * * The user_defined bit is set whenever a task has got a task-specific clamp * value requested from userspace, i.e. the system defaults apply to this task * just as a restriction. This allows to relax default clamps when a less * restrictive task-specific value has been requested, thus allowing to * implement a "nice" semantic. For example, a task running with a 20% * default boost can still drop its own boosting to 0%. */ struct uclamp_se { unsigned int value : bits_per(SCHED_CAPACITY_SCALE); unsigned int bucket_id : bits_per(UCLAMP_BUCKETS); unsigned int active : 1; unsigned int user_defined : 1; }; #endif /* CONFIG_UCLAMP_TASK */ union rcu_special { struct { u8 blocked; u8 need_qs; u8 exp_hint; /* Hint for performance. */ u8 need_mb; /* Readers need smp_mb(). */ } b; /* Bits. */ u32 s; /* Set of bits. */ }; enum perf_event_task_context { perf_invalid_context = -1, perf_hw_context = 0, perf_sw_context, perf_nr_task_contexts, }; /* * Number of contexts where an event can trigger: * task, softirq, hardirq, nmi. */ #define PERF_NR_CONTEXTS 4 struct wake_q_node { struct wake_q_node *next; }; struct kmap_ctrl { #ifdef CONFIG_KMAP_LOCAL int idx; pte_t pteval[KM_MAX_IDX]; #endif }; struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* * For reasons of header soup (see current_thread_info()), this * must be the first element of task_struct. */ struct thread_info thread_info; #endif unsigned int __state; /* saved state for "spinlock sleepers" */ unsigned int saved_state; /* * This begins the randomizable portion of task_struct. Only * scheduling-critical items should be added above here. */ randomized_struct_fields_start void *stack; refcount_t usage; /* Per task flags (PF_*), defined further below: */ unsigned int flags; unsigned int ptrace; #ifdef CONFIG_MEM_ALLOC_PROFILING struct alloc_tag *alloc_tag; #endif int on_cpu; struct __call_single_node wake_entry; unsigned int wakee_flips; unsigned long wakee_flip_decay_ts; struct task_struct *last_wakee; /* * recent_used_cpu is initially set as the last CPU used by a task * that wakes affine another task. Waker/wakee relationships can * push tasks around a CPU where each wakeup moves to the next one. * Tracking a recently used CPU allows a quick search for a recently * used CPU that may be idle. */ int recent_used_cpu; int wake_cpu; int on_rq; int prio; int static_prio; int normal_prio; unsigned int rt_priority; struct sched_entity se; struct sched_rt_entity rt; struct sched_dl_entity dl; struct sched_dl_entity *dl_server; #ifdef CONFIG_SCHED_CLASS_EXT struct sched_ext_entity scx; #endif const struct sched_class *sched_class; #ifdef CONFIG_SCHED_CORE struct rb_node core_node; unsigned long core_cookie; unsigned int core_occupation; #endif #ifdef CONFIG_CGROUP_SCHED struct task_group *sched_task_group; #ifdef CONFIG_CFS_BANDWIDTH struct callback_head sched_throttle_work; struct list_head throttle_node; bool throttled; #endif #endif #ifdef CONFIG_UCLAMP_TASK /* * Clamp values requested for a scheduling entity. * Must be updated with task_rq_lock() held. */ struct uclamp_se uclamp_req[UCLAMP_CNT]; /* * Effective clamp values used for a scheduling entity. * Must be updated with task_rq_lock() held. */ struct uclamp_se uclamp[UCLAMP_CNT]; #endif struct sched_statistics stats; #ifdef CONFIG_PREEMPT_NOTIFIERS /* List of struct preempt_notifier: */ struct hlist_head preempt_notifiers; #endif #ifdef CONFIG_BLK_DEV_IO_TRACE unsigned int btrace_seq; #endif unsigned int policy; unsigned long max_allowed_capacity; int nr_cpus_allowed; const cpumask_t *cpus_ptr; cpumask_t *user_cpus_ptr; cpumask_t cpus_mask; void *migration_pending; unsigned short migration_disabled; unsigned short migration_flags; #ifdef CONFIG_PREEMPT_RCU int rcu_read_lock_nesting; union rcu_special rcu_read_unlock_special; struct list_head rcu_node_entry; struct rcu_node *rcu_blocked_node; #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TASKS_RCU unsigned long rcu_tasks_nvcsw; u8 rcu_tasks_holdout; u8 rcu_tasks_idx; int rcu_tasks_idle_cpu; struct list_head rcu_tasks_holdout_list; int rcu_tasks_exit_cpu; struct list_head rcu_tasks_exit_list; #endif /* #ifdef CONFIG_TASKS_RCU */ #ifdef CONFIG_TASKS_TRACE_RCU int trc_reader_nesting; int trc_ipi_to_cpu; union rcu_special trc_reader_special; struct list_head trc_holdout_list; struct list_head trc_blkd_node; int trc_blkd_cpu; #endif /* #ifdef CONFIG_TASKS_TRACE_RCU */ struct sched_info sched_info; struct list_head tasks; struct plist_node pushable_tasks; struct rb_node pushable_dl_tasks; struct mm_struct *mm; struct mm_struct *active_mm; struct address_space *faults_disabled_mapping; int exit_state; int exit_code; int exit_signal; /* The signal sent when the parent dies: */ int pdeath_signal; /* JOBCTL_*, siglock protected: */ unsigned long jobctl; /* Used for emulating ABI behavior of previous Linux versions: */ unsigned int personality; /* Scheduler bits, serialized by scheduler locks: */ unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; unsigned sched_task_hot:1; /* Force alignment to the next boundary: */ unsigned :0; /* Unserialized, strictly 'current' */ /* * This field must not be in the scheduler word above due to wakelist * queueing no longer being serialized by p->on_cpu. However: * * p->XXX = X; ttwu() * schedule() if (p->on_rq && ..) // false * smp_mb__after_spinlock(); if (smp_load_acquire(&p->on_cpu) && //true * deactivate_task() ttwu_queue_wakelist()) * p->on_rq = 0; p->sched_remote_wakeup = Y; * * guarantees all stores of 'current' are visible before * ->sched_remote_wakeup gets used, so it can be in this word. */ unsigned sched_remote_wakeup:1; #ifdef CONFIG_RT_MUTEXES unsigned sched_rt_mutex:1; #endif /* Bit to tell TOMOYO we're in execve(): */ unsigned in_execve:1; unsigned in_iowait:1; #ifndef TIF_RESTORE_SIGMASK unsigned restore_sigmask:1; #endif #ifdef CONFIG_MEMCG_V1 unsigned in_user_fault:1; #endif #ifdef CONFIG_LRU_GEN /* whether the LRU algorithm may apply to this access */ unsigned in_lru_fault:1; #endif #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif #ifdef CONFIG_CGROUPS /* disallow userland-initiated cgroup migration */ unsigned no_cgroup_migration:1; /* task is frozen/stopped (used by the cgroup freezer) */ unsigned frozen:1; #endif #ifdef CONFIG_BLK_CGROUP unsigned use_memdelay:1; #endif #ifdef CONFIG_PSI /* Stalled due to lack of memory */ unsigned in_memstall:1; #endif #ifdef CONFIG_PAGE_OWNER /* Used by page_owner=on to detect recursion in page tracking. */ unsigned in_page_owner:1; #endif #ifdef CONFIG_EVENTFD /* Recursion prevention for eventfd_signal() */ unsigned in_eventfd:1; #endif #ifdef CONFIG_ARCH_HAS_CPU_PASID unsigned pasid_activated:1; #endif #ifdef CONFIG_X86_BUS_LOCK_DETECT unsigned reported_split_lock:1; #endif #ifdef CONFIG_TASK_DELAY_ACCT /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif unsigned in_nf_duplicate:1; #ifdef CONFIG_PREEMPT_RT struct netdev_xmit net_xmit; #endif unsigned long atomic_flags; /* Flags requiring atomic access. */ struct restart_block restart_block; pid_t pid; pid_t tgid; #ifdef CONFIG_STACKPROTECTOR /* Canary value for the -fstack-protector GCC feature: */ unsigned long stack_canary; #endif /* * Pointers to the (original) parent process, youngest child, younger sibling, * older sibling, respectively. (p->father can be replaced with * p->real_parent->pid) */ /* Real parent process: */ struct task_struct __rcu *real_parent; /* Recipient of SIGCHLD, wait4() reports: */ struct task_struct __rcu *parent; /* * Children/sibling form the list of natural children: */ struct list_head children; struct list_head sibling; struct task_struct *group_leader; /* * 'ptraced' is the list of tasks this task is using ptrace() on. * * This includes both natural children and PTRACE_ATTACH targets. * 'ptrace_entry' is this task's link on the p->parent->ptraced list. */ struct list_head ptraced; struct list_head ptrace_entry; /* PID/PID hash table linkage. */ struct pid *thread_pid; struct hlist_node pid_links[PIDTYPE_MAX]; struct list_head thread_node; struct completion *vfork_done; /* CLONE_CHILD_SETTID: */ int __user *set_child_tid; /* CLONE_CHILD_CLEARTID: */ int __user *clear_child_tid; /* PF_KTHREAD | PF_IO_WORKER */ void *worker_private; u64 utime; u64 stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME u64 utimescaled; u64 stimescaled; #endif u64 gtime; struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN struct vtime vtime; #endif #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif /* Context switch counts: */ unsigned long nvcsw; unsigned long nivcsw; /* Monotonic time in nsecs: */ u64 start_time; /* Boot based time in nsecs: */ u64 start_boottime; /* MM fault and swap info: this can arguably be seen as either mm-specific or thread-specific: */ unsigned long min_flt; unsigned long maj_flt; /* Empty if CONFIG_POSIX_CPUTIMERS=n */ struct posix_cputimers posix_cputimers; #ifdef CONFIG_POSIX_CPU_TIMERS_TASK_WORK struct posix_cputimers_work posix_cputimers_work; #endif /* Process credentials: */ /* Tracer's credentials at attach: */ const struct cred __rcu *ptracer_cred; /* Objective and real subjective task credentials (COW): */ const struct cred __rcu *real_cred; /* Effective (overridable) subjective task credentials (COW): */ const struct cred __rcu *cred; #ifdef CONFIG_KEYS /* Cached requested key. */ struct key *cached_requested_key; #endif /* * executable name, excluding path. * * - normally initialized begin_new_exec() * - set it with set_task_comm() * - strscpy_pad() to ensure it is always NUL-terminated and * zero-padded * - task_lock() to ensure the operation is atomic and the name is * fully updated. */ char comm[TASK_COMM_LEN]; struct nameidata *nameidata; #ifdef CONFIG_SYSVIPC struct sysv_sem sysvsem; struct sysv_shm sysvshm; #endif #ifdef CONFIG_DETECT_HUNG_TASK unsigned long last_switch_count; unsigned long last_switch_time; #endif /* Filesystem information: */ struct fs_struct *fs; /* Open file information: */ struct files_struct *files; #ifdef CONFIG_IO_URING struct io_uring_task *io_uring; #endif /* Namespaces: */ struct nsproxy *nsproxy; /* Signal handlers: */ struct signal_struct *signal; struct sighand_struct __rcu *sighand; sigset_t blocked; sigset_t real_blocked; /* Restored if set_restore_sigmask() was used: */ sigset_t saved_sigmask; struct sigpending pending; unsigned long sas_ss_sp; size_t sas_ss_size; unsigned int sas_ss_flags; struct callback_head *task_works; #ifdef CONFIG_AUDIT #ifdef CONFIG_AUDITSYSCALL struct audit_context *audit_context; #endif kuid_t loginuid; unsigned int sessionid; #endif struct seccomp seccomp; struct syscall_user_dispatch syscall_dispatch; /* Thread group tracking: */ u64 parent_exec_id; u64 self_exec_id; /* Protection against (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed, mempolicy: */ spinlock_t alloc_lock; /* Protection of the PI data structures: */ raw_spinlock_t pi_lock; struct wake_q_node wake_q; #ifdef CONFIG_RT_MUTEXES /* PI waiters blocked on a rt_mutex held by this task: */ struct rb_root_cached pi_waiters; /* Updated under owner's pi_lock and rq lock */ struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ struct rt_mutex_waiter *pi_blocked_on; #endif struct mutex *blocked_on; /* lock we're blocked on */ #ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER /* * Encoded lock address causing task block (lower 2 bits = type from * <linux/hung_task.h>). Accessed via hung_task_*() helpers. */ unsigned long blocker; #endif #ifdef CONFIG_DEBUG_ATOMIC_SLEEP int non_block_count; #endif #ifdef CONFIG_TRACE_IRQFLAGS struct irqtrace_events irqtrace; unsigned int hardirq_threaded; u64 hardirq_chain_key; int softirqs_enabled; int softirq_context; int irq_config; #endif #ifdef CONFIG_PREEMPT_RT int softirq_disable_cnt; #endif #ifdef CONFIG_LOCKDEP # define MAX_LOCK_DEPTH 48UL u64 curr_chain_key; int lockdep_depth; unsigned int lockdep_recursion; struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif #if defined(CONFIG_UBSAN) && !defined(CONFIG_UBSAN_TRAP) unsigned int in_ubsan; #endif /* Journalling filesystem info: */ void *journal_info; /* Stacked block device info: */ struct bio_list *bio_list; /* Stack plugging: */ struct blk_plug *plug; /* VM state: */ struct reclaim_state *reclaim_state; struct io_context *io_context; #ifdef CONFIG_COMPACTION struct capture_control *capture_control; #endif /* Ptrace state: */ unsigned long ptrace_message; kernel_siginfo_t *last_siginfo; struct task_io_accounting ioac; #ifdef CONFIG_PSI /* Pressure stall state */ unsigned int psi_flags; #endif #ifdef CONFIG_TASK_XACCT /* Accumulated RSS usage: */ u64 acct_rss_mem1; /* Accumulated virtual memory usage: */ u64 acct_vm_mem1; /* stime + utime since last update: */ u64 acct_timexpd; #endif #ifdef CONFIG_CPUSETS /* Protected by ->alloc_lock: */ nodemask_t mems_allowed; /* Sequence number to catch updates: */ seqcount_spinlock_t mems_allowed_seq; int cpuset_mem_spread_rotor; #endif #ifdef CONFIG_CGROUPS /* Control Group info protected by css_set_lock: */ struct css_set __rcu *cgroups; /* cg_list protected by css_set_lock and tsk->alloc_lock: */ struct list_head cg_list; #endif #ifdef CONFIG_X86_CPU_RESCTRL u32 closid; u32 rmid; #endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT struct compat_robust_list_head __user *compat_robust_list; #endif struct list_head pi_state_list; struct futex_pi_state *pi_state_cache; struct mutex futex_exit_mutex; unsigned int futex_state; #endif #ifdef CONFIG_PERF_EVENTS u8 perf_recursion[PERF_NR_CONTEXTS]; struct perf_event_context *perf_event_ctxp; struct mutex perf_event_mutex; struct list_head perf_event_list; struct perf_ctx_data __rcu *perf_ctx_data; #endif #ifdef CONFIG_DEBUG_PREEMPT unsigned long preempt_disable_ip; #endif #ifdef CONFIG_NUMA /* Protected by alloc_lock: */ struct mempolicy *mempolicy; short il_prev; u8 il_weight; short pref_node_fork; #endif #ifdef CONFIG_NUMA_BALANCING int numa_scan_seq; unsigned int numa_scan_period; unsigned int numa_scan_period_max; int numa_preferred_nid; unsigned long numa_migrate_retry; /* Migration stamp: */ u64 node_stamp; u64 last_task_numa_placement; u64 last_sum_exec_runtime; struct callback_head numa_work; /* * This pointer is only modified for current in syscall and * pagefault context (and for tasks being destroyed), so it can be read * from any of the following contexts: * - RCU read-side critical section * - current->numa_group from everywhere * - task's runqueue locked, task not running */ struct numa_group __rcu *numa_group; /* * numa_faults is an array split into four regions: * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer * in this precise order. * * faults_memory: Exponential decaying average of faults on a per-node * basis. Scheduling placement decisions are made based on these * counts. The values remain static for the duration of a PTE scan. * faults_cpu: Track the nodes the process was running on when a NUMA * hinting fault was incurred. * faults_memory_buffer and faults_cpu_buffer: Record faults per node * during the current scan window. When the scan completes, the counts * in faults_memory and faults_cpu decay and these values are copied. */ unsigned long *numa_faults; unsigned long total_numa_faults; /* * numa_faults_locality tracks if faults recorded during the last * scan window were remote/local or failed to migrate. The task scan * period is adapted based on the locality of the faults with different * weights depending on whether they were shared or private faults */ unsigned long numa_faults_locality[3]; unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ #ifdef CONFIG_RSEQ struct rseq __user *rseq; u32 rseq_len; u32 rseq_sig; /* * RmW on rseq_event_mask must be performed atomically * with respect to preemption. */ unsigned long rseq_event_mask; # ifdef CONFIG_DEBUG_RSEQ /* * This is a place holder to save a copy of the rseq fields for * validation of read-only fields. The struct rseq has a * variable-length array at the end, so it cannot be used * directly. Reserve a size large enough for the known fields. */ char rseq_fields[sizeof(struct rseq)]; # endif #endif #ifdef CONFIG_SCHED_MM_CID int mm_cid; /* Current cid in mm */ int last_mm_cid; /* Most recent cid in mm */ int migrate_from_cpu; int mm_cid_active; /* Whether cid bitmap is active */ struct callback_head cid_work; #endif struct tlbflush_unmap_batch tlb_ubc; /* Cache last used pipe for splice(): */ struct pipe_inode_info *splice_pipe; struct page_frag task_frag; #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info *delays; #endif #ifdef CONFIG_FAULT_INJECTION int make_it_fail; unsigned int fail_nth; #endif /* * When (nr_dirtied >= nr_dirtied_pause), it's time to call * balance_dirty_pages() for a dirty throttling pause: */ int nr_dirtied; int nr_dirtied_pause; /* Start of a write-and-pause period: */ unsigned long dirty_paused_when; #ifdef CONFIG_LATENCYTOP int latency_record_count; struct latency_record latency_record[LT_SAVECOUNT]; #endif /* * Time slack values; these are used to round up poll() and * select() etc timeout values. These are in nanoseconds. */ u64 timer_slack_ns; u64 default_timer_slack_ns; #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) unsigned int kasan_depth; #endif #ifdef CONFIG_KCSAN struct kcsan_ctx kcsan_ctx; #ifdef CONFIG_TRACE_IRQFLAGS struct irqtrace_events kcsan_save_irqtrace; #endif #ifdef CONFIG_KCSAN_WEAK_MEMORY int kcsan_stack_depth; #endif #endif #ifdef CONFIG_KMSAN struct kmsan_ctx kmsan_ctx; #endif #if IS_ENABLED(CONFIG_KUNIT) struct kunit *kunit_test; #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack: */ int curr_ret_stack; int curr_ret_depth; /* Stack of return addresses for return function tracing: */ unsigned long *ret_stack; /* Timestamp for last schedule: */ unsigned long long ftrace_timestamp; unsigned long long ftrace_sleeptime; /* * Number of functions that haven't been traced * because of depth overrun: */ atomic_t trace_overrun; /* Pause tracing: */ atomic_t tracing_graph_pause; #endif #ifdef CONFIG_TRACING /* Bitmask and counter of trace recursion: */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_KCOV /* See kernel/kcov.c for more details. */ /* Coverage collection mode enabled for this task (0 if disabled): */ unsigned int kcov_mode; /* Size of the kcov_area: */ unsigned int kcov_size; /* Buffer for coverage collection: */ void *kcov_area; /* KCOV descriptor wired with this task or NULL: */ struct kcov *kcov; /* KCOV common handle for remote coverage collection: */ u64 kcov_handle; /* KCOV sequence number: */ int kcov_sequence; /* Collect coverage from softirq context: */ unsigned int kcov_softirq; #endif #ifdef CONFIG_MEMCG_V1 struct mem_cgroup *memcg_in_oom; #endif #ifdef CONFIG_MEMCG /* Number of pages to reclaim on returning to userland: */ unsigned int memcg_nr_pages_over_high; /* Used by memcontrol for targeted memcg charge: */ struct mem_cgroup *active_memcg; /* Cache for current->cgroups->memcg->objcg lookups: */ struct obj_cgroup *objcg; #endif #ifdef CONFIG_BLK_CGROUP struct gendisk *throttle_disk; #endif #ifdef CONFIG_UPROBES struct uprobe_task *utask; #endif #if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE) unsigned int sequential_io; unsigned int sequential_io_avg; #endif struct kmap_ctrl kmap_ctrl; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP unsigned long task_state_change; # ifdef CONFIG_PREEMPT_RT unsigned long saved_state_change; # endif #endif struct rcu_head rcu; refcount_t rcu_users; int pagefault_disabled; #ifdef CONFIG_MMU struct task_struct *oom_reaper_list; struct timer_list oom_reaper_timer; #endif #ifdef CONFIG_VMAP_STACK struct vm_struct *stack_vm_area; #endif #ifdef CONFIG_THREAD_INFO_IN_TASK /* A live task holds one reference: */ refcount_t stack_refcount; #endif #ifdef CONFIG_LIVEPATCH int patch_state; #endif #ifdef CONFIG_SECURITY /* Used by LSM modules for access restriction: */ void *security; #endif #ifdef CONFIG_BPF_SYSCALL /* Used by BPF task local storage */ struct bpf_local_storage __rcu *bpf_storage; /* Used for BPF run context */ struct bpf_run_ctx *bpf_ctx; #endif /* Used by BPF for per-TASK xdp storage */ struct bpf_net_context *bpf_net_context; #ifdef CONFIG_KSTACK_ERASE unsigned long lowest_stack; #endif #ifdef CONFIG_KSTACK_ERASE_METRICS unsigned long prev_lowest_stack; #endif #ifdef CONFIG_X86_MCE void __user *mce_vaddr; __u64 mce_kflags; u64 mce_addr; __u64 mce_ripv : 1, mce_whole_page : 1, __mce_reserved : 62; struct callback_head mce_kill_me; int mce_count; #endif #ifdef CONFIG_KRETPROBES struct llist_head kretprobe_instances; #endif #ifdef CONFIG_RETHOOK struct llist_head rethooks; #endif #ifdef CONFIG_ARCH_HAS_PARANOID_L1D_FLUSH /* * If L1D flush is supported on mm context switch * then we use this callback head to queue kill work * to kill tasks that are not running on SMT disabled * cores */ struct callback_head l1d_flush_kill; #endif #ifdef CONFIG_RV /* * Per-task RV monitor, fixed in CONFIG_RV_PER_TASK_MONITORS. * If memory becomes a concern, we can think about a dynamic method. */ union rv_task_monitor rv[CONFIG_RV_PER_TASK_MONITORS]; #endif #ifdef CONFIG_USER_EVENTS struct user_event_mm *user_event_mm; #endif #ifdef CONFIG_UNWIND_USER struct unwind_task_info unwind_info; #endif /* CPU-specific state of this task: */ struct thread_struct thread; /* * New fields for task_struct should be added above here, so that * they are included in the randomized portion of task_struct. */ randomized_struct_fields_end } __attribute__ ((aligned (64))); #ifdef CONFIG_SCHED_PROXY_EXEC DECLARE_STATIC_KEY_TRUE(__sched_proxy_exec); static inline bool sched_proxy_exec(void) { return static_branch_likely(&__sched_proxy_exec); } #else static inline bool sched_proxy_exec(void) { return false; } #endif #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) static inline unsigned int __task_state_index(unsigned int tsk_state, unsigned int tsk_exit_state) { unsigned int state = (tsk_state | tsk_exit_state) & TASK_REPORT; BUILD_BUG_ON_NOT_POWER_OF_2(TASK_REPORT_MAX); if ((tsk_state & TASK_IDLE) == TASK_IDLE) state = TASK_REPORT_IDLE; /* * We're lying here, but rather than expose a completely new task state * to userspace, we can make this appear as if the task has gone through * a regular rt_mutex_lock() call. * Report frozen tasks as uninterruptible. */ if ((tsk_state & TASK_RTLOCK_WAIT) || (tsk_state & TASK_FROZEN)) state = TASK_UNINTERRUPTIBLE; return fls(state); } static inline unsigned int task_state_index(struct task_struct *tsk) { return __task_state_index(READ_ONCE(tsk->__state), tsk->exit_state); } static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; BUILD_BUG_ON(TASK_REPORT_MAX * 2 != 1 << (sizeof(state_char) - 1)); return state_char[state]; } static inline char task_state_to_char(struct task_struct *tsk) { return task_index_to_char(task_state_index(tsk)); } extern struct pid *cad_pid; /* * Per process flags */ #define PF_VCPU 0x00000001 /* I'm a virtual CPU */ #define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* Getting shut down */ #define PF_POSTCOREDUMP 0x00000008 /* Coredumps should ignore this task */ #define PF_IO_WORKER 0x00000010 /* Task is an IO worker */ #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */ #define PF_MCE_PROCESS 0x00000080 /* Process policy on mce errors */ #define PF_SUPERPRIV 0x00000100 /* Used super-user privileges */ #define PF_DUMPCORE 0x00000200 /* Dumped core */ #define PF_SIGNALED 0x00000400 /* Killed by a signal */ #define PF_MEMALLOC 0x00000800 /* Allocating memory to free memory. See memalloc_noreclaim_save() */ #define PF_NPROC_EXCEEDED 0x00001000 /* set_user() noticed that RLIMIT_NPROC was exceeded */ #define PF_USED_MATH 0x00002000 /* If unset the fpu must be initialized before use */ #define PF_USER_WORKER 0x00004000 /* Kernel thread cloned from userspace thread */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_KCOMPACTD 0x00010000 /* I am kcompactd */ #define PF_KSWAPD 0x00020000 /* I am kswapd */ #define PF_MEMALLOC_NOFS 0x00040000 /* All allocations inherit GFP_NOFS. See memalloc_nfs_save() */ #define PF_MEMALLOC_NOIO 0x00080000 /* All allocations inherit GFP_NOIO. See memalloc_noio_save() */ #define PF_LOCAL_THROTTLE 0x00100000 /* Throttle writes only against the bdi I write to, * I am cleaning dirty pages from some other bdi. */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ #define PF__HOLE__00800000 0x00800000 #define PF__HOLE__01000000 0x01000000 #define PF__HOLE__02000000 0x02000000 #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MEMALLOC_PIN 0x10000000 /* Allocations constrained to zones which allow long term pinning. * See memalloc_pin_save() */ #define PF_BLOCK_TS 0x20000000 /* plug has ts that needs updating */ #define PF__HOLE__40000000 0x40000000 #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ /* * Only the _current_ task can read/write to tsk->flags, but other * tasks can access tsk->flags in readonly mode for example * with tsk_used_math (like during threaded core dumping). * There is however an exception to this rule during ptrace * or during fork: the ptracer task is allowed to write to the * child->flags of its traced child (same goes for fork, the parent * can write to the child->flags), because we're guaranteed the * child is not running and in turn not changing child->flags * at the same time the parent does it. */ #define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0) #define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0) #define clear_used_math() clear_stopped_child_used_math(current) #define set_used_math() set_stopped_child_used_math(current) #define conditional_stopped_child_used_math(condition, child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0) #define conditional_used_math(condition) conditional_stopped_child_used_math(condition, current) #define copy_to_stopped_child_used_math(child) \ do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0) /* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */ #define tsk_used_math(p) ((p)->flags & PF_USED_MATH) #define used_math() tsk_used_math(current) static __always_inline bool is_percpu_thread(void) { return (current->flags & PF_NO_SETAFFINITY) && (current->nr_cpus_allowed == 1); } /* Per-process atomic flags. */ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ #define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ #define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ #define PFA_SPEC_IB_DISABLE 5 /* Indirect branch speculation restricted */ #define PFA_SPEC_IB_FORCE_DISABLE 6 /* Indirect branch speculation permanently restricted */ #define PFA_SPEC_SSB_NOEXEC 7 /* Speculative Store Bypass clear on execve() */ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ { return test_bit(PFA_##name, &p->atomic_flags); } #define TASK_PFA_SET(name, func) \ static inline void task_set_##func(struct task_struct *p) \ { set_bit(PFA_##name, &p->atomic_flags); } #define TASK_PFA_CLEAR(name, func) \ static inline void task_clear_##func(struct task_struct *p) \ { clear_bit(PFA_##name, &p->atomic_flags); } TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) TASK_PFA_TEST(SPREAD_PAGE, spread_page) TASK_PFA_SET(SPREAD_PAGE, spread_page) TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) TASK_PFA_TEST(SPEC_SSB_NOEXEC, spec_ssb_noexec) TASK_PFA_SET(SPEC_SSB_NOEXEC, spec_ssb_noexec) TASK_PFA_CLEAR(SPEC_SSB_NOEXEC, spec_ssb_noexec) TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) TASK_PFA_TEST(SPEC_IB_DISABLE, spec_ib_disable) TASK_PFA_SET(SPEC_IB_DISABLE, spec_ib_disable) TASK_PFA_CLEAR(SPEC_IB_DISABLE, spec_ib_disable) TASK_PFA_TEST(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) TASK_PFA_SET(SPEC_IB_FORCE_DISABLE, spec_ib_force_disable) static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { current->flags &= ~flags; current->flags |= orig_flags & flags; } extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); extern int task_can_attach(struct task_struct *p); extern int dl_bw_alloc(int cpu, u64 dl_bw); extern void dl_bw_free(int cpu, u64 dl_bw); /* do_set_cpus_allowed() - consider using set_cpus_allowed_ptr() instead */ extern void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask); /** * set_cpus_allowed_ptr - set CPU affinity mask of a task * @p: the task * @new_mask: CPU affinity mask * * Return: zero if successful, or a negative error code */ extern int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask); extern int dup_user_cpus_ptr(struct task_struct *dst, struct task_struct *src, int node); extern void release_user_cpus_ptr(struct task_struct *p); extern int dl_task_check_affinity(struct task_struct *p, const struct cpumask *mask); extern void force_compatible_cpus_allowed_ptr(struct task_struct *p); extern void relax_compatible_cpus_allowed_ptr(struct task_struct *p); extern int yield_to(struct task_struct *p, bool preempt); extern void set_user_nice(struct task_struct *p, long nice); extern int task_prio(const struct task_struct *p); /** * task_nice - return the nice value of a given task. * @p: the task in question. * * Return: The nice value [ -20 ... 0 ... 19 ]. */ static inline int task_nice(const struct task_struct *p) { return PRIO_TO_NICE((p)->static_prio); } extern int can_nice(const struct task_struct *p, const int nice); extern int task_curr(const struct task_struct *p); extern int idle_cpu(int cpu); extern int available_idle_cpu(int cpu); extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *); extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *); extern void sched_set_fifo(struct task_struct *p); extern void sched_set_fifo_low(struct task_struct *p); extern void sched_set_normal(struct task_struct *p, int nice); extern int sched_setattr(struct task_struct *, const struct sched_attr *); extern int sched_setattr_nocheck(struct task_struct *, const struct sched_attr *); extern struct task_struct *idle_task(int cpu); /** * is_idle_task - is the specified task an idle task? * @p: the task in question. * * Return: 1 if @p is an idle task. 0 otherwise. */ static __always_inline bool is_idle_task(const struct task_struct *p) { return !!(p->flags & PF_IDLE); } extern struct task_struct *curr_task(int cpu); extern void ia64_set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { struct task_struct task; #ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; #endif unsigned long stack[THREAD_SIZE/sizeof(long)]; }; #ifndef CONFIG_THREAD_INFO_IN_TASK extern struct thread_info init_thread_info; #endif extern unsigned long init_stack[THREAD_SIZE / sizeof(unsigned long)]; #ifdef CONFIG_THREAD_INFO_IN_TASK # define task_thread_info(task) (&(task)->thread_info) #else # define task_thread_info(task) ((struct thread_info *)(task)->stack) #endif /* * find a task by one of its numerical ids * * find_task_by_pid_ns(): * finds a task by its pid in the specified namespace * find_task_by_vpid(): * finds a task by its virtual pid * * see also find_vpid() etc in include/linux/pid.h */ extern struct task_struct *find_task_by_vpid(pid_t nr); extern struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns); /* * find a task by its virtual pid and get the task struct */ extern struct task_struct *find_get_task_by_vpid(pid_t nr); extern int wake_up_state(struct task_struct *tsk, unsigned int state); extern int wake_up_process(struct task_struct *tsk); extern void wake_up_new_task(struct task_struct *tsk); extern void kick_process(struct task_struct *tsk); extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec); #define set_task_comm(tsk, from) ({ \ BUILD_BUG_ON(sizeof(from) != TASK_COMM_LEN); \ __set_task_comm(tsk, from, false); \ }) /* * - Why not use task_lock()? * User space can randomly change their names anyway, so locking for readers * doesn't make sense. For writers, locking is probably necessary, as a race * condition could lead to long-term mixed results. * The strscpy_pad() in __set_task_comm() can ensure that the task comm is * always NUL-terminated and zero-padded. Therefore the race condition between * reader and writer is not an issue. * * - BUILD_BUG_ON() can help prevent the buf from being truncated. * Since the callers don't perform any return value checks, this safeguard is * necessary. */ #define get_task_comm(buf, tsk) ({ \ BUILD_BUG_ON(sizeof(buf) < TASK_COMM_LEN); \ strscpy_pad(buf, (tsk)->comm); \ buf; \ }) static __always_inline void scheduler_ipi(void) { /* * Fold TIF_NEED_RESCHED into the preempt_count; anybody setting * TIF_NEED_RESCHED remotely (for the first time) will also send * this IPI. */ preempt_fold_need_resched(); } extern unsigned long wait_task_inactive(struct task_struct *, unsigned int match_state); /* * Set thread flags in other task's structures. * See asm/thread_info.h for TIF_xxxx flags available: */ static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag) { set_ti_thread_flag(task_thread_info(tsk), flag); } static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag) { clear_ti_thread_flag(task_thread_info(tsk), flag); } static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, bool value) { update_ti_thread_flag(task_thread_info(tsk), flag, value); } static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag) { return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); } static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag) { return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag); } static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag) { return test_ti_thread_flag(task_thread_info(tsk), flag); } static inline void set_tsk_need_resched(struct task_struct *tsk) { if (tracepoint_enabled(sched_set_need_resched_tp) && !test_tsk_thread_flag(tsk, TIF_NEED_RESCHED)) __trace_set_need_resched(tsk, TIF_NEED_RESCHED); set_tsk_thread_flag(tsk,TIF_NEED_RESCHED); } static inline void clear_tsk_need_resched(struct task_struct *tsk) { atomic_long_andnot(_TIF_NEED_RESCHED | _TIF_NEED_RESCHED_LAZY, (atomic_long_t *)&task_thread_info(tsk)->flags); } static inline int test_tsk_need_resched(struct task_struct *tsk) { return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED)); } /* * cond_resched() and cond_resched_lock(): latency reduction via * explicit rescheduling in places that are safe. The return * value indicates whether a reschedule was done in fact. * cond_resched_lock() will drop the spinlock before scheduling, */ #if !defined(CONFIG_PREEMPTION) || defined(CONFIG_PREEMPT_DYNAMIC) extern int __cond_resched(void); #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) DECLARE_STATIC_CALL(cond_resched, __cond_resched); static __always_inline int _cond_resched(void) { return static_call_mod(cond_resched)(); } #elif defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) extern int dynamic_cond_resched(void); static __always_inline int _cond_resched(void) { return dynamic_cond_resched(); } #else /* !CONFIG_PREEMPTION */ static inline int _cond_resched(void) { return __cond_resched(); } #endif /* PREEMPT_DYNAMIC && CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ #else /* CONFIG_PREEMPTION && !CONFIG_PREEMPT_DYNAMIC */ static inline int _cond_resched(void) { return 0; } #endif /* !CONFIG_PREEMPTION || CONFIG_PREEMPT_DYNAMIC */ #define cond_resched() ({ \ __might_resched(__FILE__, __LINE__, 0); \ _cond_resched(); \ }) extern int __cond_resched_lock(spinlock_t *lock); extern int __cond_resched_rwlock_read(rwlock_t *lock); extern int __cond_resched_rwlock_write(rwlock_t *lock); #define MIGHT_RESCHED_RCU_SHIFT 8 #define MIGHT_RESCHED_PREEMPT_MASK ((1U << MIGHT_RESCHED_RCU_SHIFT) - 1) #ifndef CONFIG_PREEMPT_RT /* * Non RT kernels have an elevated preempt count due to the held lock, * but are not allowed to be inside a RCU read side critical section */ # define PREEMPT_LOCK_RESCHED_OFFSETS PREEMPT_LOCK_OFFSET #else /* * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in * cond_resched*lock() has to take that into account because it checks for * preempt_count() and rcu_preempt_depth(). */ # define PREEMPT_LOCK_RESCHED_OFFSETS \ (PREEMPT_LOCK_OFFSET + (1U << MIGHT_RESCHED_RCU_SHIFT)) #endif #define cond_resched_lock(lock) ({ \ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ __cond_resched_lock(lock); \ }) #define cond_resched_rwlock_read(lock) ({ \ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ __cond_resched_rwlock_read(lock); \ }) #define cond_resched_rwlock_write(lock) ({ \ __might_resched(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSETS); \ __cond_resched_rwlock_write(lock); \ }) #ifndef CONFIG_PREEMPT_RT static inline struct mutex *__get_task_blocked_on(struct task_struct *p) { struct mutex *m = p->blocked_on; if (m) lockdep_assert_held_once(&m->wait_lock); return m; } static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m) { struct mutex *blocked_on = READ_ONCE(p->blocked_on); WARN_ON_ONCE(!m); /* The task should only be setting itself as blocked */ WARN_ON_ONCE(p != current); /* Currently we serialize blocked_on under the mutex::wait_lock */ lockdep_assert_held_once(&m->wait_lock); /* * Check ensure we don't overwrite existing mutex value * with a different mutex. Note, setting it to the same * lock repeatedly is ok. */ WARN_ON_ONCE(blocked_on && blocked_on != m); WRITE_ONCE(p->blocked_on, m); } static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m) { guard(raw_spinlock_irqsave)(&m->wait_lock); __set_task_blocked_on(p, m); } static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m) { if (m) { struct mutex *blocked_on = READ_ONCE(p->blocked_on); /* Currently we serialize blocked_on under the mutex::wait_lock */ lockdep_assert_held_once(&m->wait_lock); /* * There may be cases where we re-clear already cleared * blocked_on relationships, but make sure we are not * clearing the relationship with a different lock. */ WARN_ON_ONCE(blocked_on && blocked_on != m); } WRITE_ONCE(p->blocked_on, NULL); } static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m) { guard(raw_spinlock_irqsave)(&m->wait_lock); __clear_task_blocked_on(p, m); } #else static inline void __clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) { } static inline void clear_task_blocked_on(struct task_struct *p, struct rt_mutex *m) { } #endif /* !CONFIG_PREEMPT_RT */ static __always_inline bool need_resched(void) { return unlikely(tif_need_resched()); } /* * Wrappers for p->thread_info->cpu access. No-op on UP. */ #ifdef CONFIG_SMP static inline unsigned int task_cpu(const struct task_struct *p) { return READ_ONCE(task_thread_info(p)->cpu); } extern void set_task_cpu(struct task_struct *p, unsigned int cpu); #else static inline unsigned int task_cpu(const struct task_struct *p) { return 0; } static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) { } #endif /* CONFIG_SMP */ static inline bool task_is_runnable(struct task_struct *p) { return p->on_rq && !p->se.sched_delayed; } extern bool sched_task_on_rq(struct task_struct *p); extern unsigned long get_wchan(struct task_struct *p); extern struct task_struct *cpu_curr_snapshot(int cpu); /* * In order to reduce various lock holder preemption latencies provide an * interface to see if a vCPU is currently running or not. * * This allows us to terminate optimistic spin loops and block, analogous to * the native optimistic spin heuristic of testing if the lock owner task is * running or not. */ #ifndef vcpu_is_preempted static inline bool vcpu_is_preempted(int cpu) { return false; } #endif extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); #ifndef TASK_SIZE_OF #define TASK_SIZE_OF(tsk) TASK_SIZE #endif static inline bool owner_on_cpu(struct task_struct *owner) { /* * As lock holder preemption issue, we both skip spinning if * task is not on cpu or its cpu is preempted */ return READ_ONCE(owner->on_cpu) && !vcpu_is_preempted(task_cpu(owner)); } /* Returns effective CPU energy utilization, as seen by the scheduler */ unsigned long sched_cpu_util(int cpu); #ifdef CONFIG_SCHED_CORE extern void sched_core_free(struct task_struct *tsk); extern void sched_core_fork(struct task_struct *p); extern int sched_core_share_pid(unsigned int cmd, pid_t pid, enum pid_type type, unsigned long uaddr); extern int sched_core_idle_cpu(int cpu); #else static inline void sched_core_free(struct task_struct *tsk) { } static inline void sched_core_fork(struct task_struct *p) { } static inline int sched_core_idle_cpu(int cpu) { return idle_cpu(cpu); } #endif extern void sched_set_stop_task(int cpu, struct task_struct *stop); #ifdef CONFIG_MEM_ALLOC_PROFILING static __always_inline struct alloc_tag *alloc_tag_save(struct alloc_tag *tag) { swap(current->alloc_tag, tag); return tag; } static __always_inline void alloc_tag_restore(struct alloc_tag *tag, struct alloc_tag *old) { #ifdef CONFIG_MEM_ALLOC_PROFILING_DEBUG WARN(current->alloc_tag != tag, "current->alloc_tag was changed:\n"); #endif current->alloc_tag = old; } #else #define alloc_tag_save(_tag) NULL #define alloc_tag_restore(_tag, _old) do {} while (0) #endif #ifndef MODULE #ifndef COMPILE_OFFSETS extern void ___migrate_enable(void); struct rq; DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); /* * The "struct rq" is not available here, so we can't access the * "runqueues" with this_cpu_ptr(), as the compilation will fail in * this_cpu_ptr() -> raw_cpu_ptr() -> __verify_pcpu_ptr(): * typeof((ptr) + 0) * * So use arch_raw_cpu_ptr()/PERCPU_PTR() directly here. */ #ifdef CONFIG_SMP #define this_rq_raw() arch_raw_cpu_ptr(&runqueues) #else #define this_rq_raw() PERCPU_PTR(&runqueues) #endif #define this_rq_pinned() (*(unsigned int *)((void *)this_rq_raw() + RQ_nr_pinned)) static inline void __migrate_enable(void) { struct task_struct *p = current; #ifdef CONFIG_DEBUG_PREEMPT /* * Check both overflow from migrate_disable() and superfluous * migrate_enable(). */ if (WARN_ON_ONCE((s16)p->migration_disabled <= 0)) return; #endif if (p->migration_disabled > 1) { p->migration_disabled--; return; } /* * Ensure stop_task runs either before or after this, and that * __set_cpus_allowed_ptr(SCA_MIGRATE_ENABLE) doesn't schedule(). */ guard(preempt)(); if (unlikely(p->cpus_ptr != &p->cpus_mask)) ___migrate_enable(); /* * Mustn't clear migration_disabled() until cpus_ptr points back at the * regular cpus_mask, otherwise things that race (eg. * select_fallback_rq) get confused. */ barrier(); p->migration_disabled = 0; this_rq_pinned()--; } static inline void __migrate_disable(void) { struct task_struct *p = current; if (p->migration_disabled) { #ifdef CONFIG_DEBUG_PREEMPT /* *Warn about overflow half-way through the range. */ WARN_ON_ONCE((s16)p->migration_disabled < 0); #endif p->migration_disabled++; return; } guard(preempt)(); this_rq_pinned()++; p->migration_disabled = 1; } #else /* !COMPILE_OFFSETS */ static inline void __migrate_disable(void) { } static inline void __migrate_enable(void) { } #endif /* !COMPILE_OFFSETS */ /* * So that it is possible to not export the runqueues variable, define and * export migrate_enable/migrate_disable in kernel/sched/core.c too, and use * them for the modules. The macro "INSTANTIATE_EXPORTED_MIGRATE_DISABLE" will * be defined in kernel/sched/core.c. */ #ifndef INSTANTIATE_EXPORTED_MIGRATE_DISABLE static inline void migrate_disable(void) { __migrate_disable(); } static inline void migrate_enable(void) { __migrate_enable(); } #else /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */ extern void migrate_disable(void); extern void migrate_enable(void); #endif /* INSTANTIATE_EXPORTED_MIGRATE_DISABLE */ #else /* MODULE */ extern void migrate_disable(void); extern void migrate_enable(void); #endif /* MODULE */ DEFINE_LOCK_GUARD_0(migrate, migrate_disable(), migrate_enable()) #endif
9 19 19 16 19 19 24 35 35 35 35 22 35 35 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 // SPDX-License-Identifier: GPL-2.0 /* * security/tomoyo/environ.c * * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include "common.h" /** * tomoyo_check_env_acl - Check permission for environment variable's name. * * @r: Pointer to "struct tomoyo_request_info". * @ptr: Pointer to "struct tomoyo_acl_info". * * Returns true if granted, false otherwise. */ static bool tomoyo_check_env_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { const struct tomoyo_env_acl *acl = container_of(ptr, typeof(*acl), head); return tomoyo_path_matches_pattern(r->param.environ.name, acl->env); } /** * tomoyo_audit_env_log - Audit environment variable name log. * * @r: Pointer to "struct tomoyo_request_info". * * Returns 0 on success, negative value otherwise. */ static int tomoyo_audit_env_log(struct tomoyo_request_info *r) { return tomoyo_supervisor(r, "misc env %s\n", r->param.environ.name->name); } /** * tomoyo_env_perm - Check permission for environment variable's name. * * @r: Pointer to "struct tomoyo_request_info". * @env: The name of environment variable. * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ int tomoyo_env_perm(struct tomoyo_request_info *r, const char *env) { struct tomoyo_path_info environ; int error; if (!env || !*env) return 0; environ.name = env; tomoyo_fill_path_info(&environ); r->param_type = TOMOYO_TYPE_ENV_ACL; r->param.environ.name = &environ; do { tomoyo_check_acl(r, tomoyo_check_env_acl); error = tomoyo_audit_env_log(r); } while (error == TOMOYO_RETRY_REQUEST); return error; } /** * tomoyo_same_env_acl - Check for duplicated "struct tomoyo_env_acl" entry. * * @a: Pointer to "struct tomoyo_acl_info". * @b: Pointer to "struct tomoyo_acl_info". * * Returns true if @a == @b, false otherwise. */ static bool tomoyo_same_env_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_env_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_env_acl *p2 = container_of(b, typeof(*p2), head); return p1->env == p2->env; } /** * tomoyo_write_env - Write "struct tomoyo_env_acl" list. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ static int tomoyo_write_env(struct tomoyo_acl_param *param) { struct tomoyo_env_acl e = { .head.type = TOMOYO_TYPE_ENV_ACL }; int error = -ENOMEM; const char *data = tomoyo_read_token(param); if (!tomoyo_correct_word(data) || strchr(data, '=')) return -EINVAL; e.env = tomoyo_get_name(data); if (!e.env) return error; error = tomoyo_update_domain(&e.head, sizeof(e), param, tomoyo_same_env_acl, NULL); tomoyo_put_name(e.env); return error; } /** * tomoyo_write_misc - Update environment variable list. * * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. */ int tomoyo_write_misc(struct tomoyo_acl_param *param) { if (tomoyo_str_starts(&param->data, "env ")) return tomoyo_write_env(param); return -EINVAL; }
169 168 169 169 236 237 237 236 237 237 237 235 237 236 237 237 187 236 237 237 1 237 237 167 168 169 168 169 169 167 169 168 169 168 170 169 170 169 1 168 168 169 169 237 170 170 170 170 170 170 237 237 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 // SPDX-License-Identifier: GPL-2.0 /* * FPU signal frame handling routines. */ #include <linux/compat.h> #include <linux/cpu.h> #include <linux/pagemap.h> #include <asm/fpu/signal.h> #include <asm/fpu/regset.h> #include <asm/fpu/xstate.h> #include <asm/sigframe.h> #include <asm/trapnr.h> #include <asm/trace/fpu.h> #include "context.h" #include "internal.h" #include "legacy.h" #include "xstate.h" /* * Check for the presence of extended state information in the * user fpstate pointer in the sigcontext. */ static inline bool check_xstate_in_sigframe(struct fxregs_state __user *fxbuf, struct _fpx_sw_bytes *fx_sw) { void __user *fpstate = fxbuf; unsigned int magic2; if (__copy_from_user(fx_sw, &fxbuf->sw_reserved[0], sizeof(*fx_sw))) return false; /* Check for the first magic field */ if (fx_sw->magic1 != FP_XSTATE_MAGIC1) goto setfx; /* * Check for the presence of second magic word at the end of memory * layout. This detects the case where the user just copied the legacy * fpstate layout with out copying the extended state information * in the memory layout. */ if (__get_user(magic2, (__u32 __user *)(fpstate + x86_task_fpu(current)->fpstate->user_size))) return false; if (likely(magic2 == FP_XSTATE_MAGIC2)) return true; setfx: trace_x86_fpu_xstate_check_failed(x86_task_fpu(current)); /* Set the parameters for fx only state */ fx_sw->magic1 = 0; fx_sw->xstate_size = sizeof(struct fxregs_state); fx_sw->xfeatures = XFEATURE_MASK_FPSSE; return true; } /* * Signal frame handlers. */ static inline bool save_fsave_header(struct task_struct *tsk, void __user *buf) { if (use_fxsr()) { struct xregs_state *xsave = &x86_task_fpu(tsk)->fpstate->regs.xsave; struct user_i387_ia32_struct env; struct _fpstate_32 __user *fp = buf; fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) fxsave(&x86_task_fpu(tsk)->fpstate->regs.fxsave); fpregs_unlock(); convert_from_fxsr(&env, tsk); if (__copy_to_user(buf, &env, sizeof(env)) || __put_user(xsave->i387.swd, &fp->status) || __put_user(X86_FXSR_MAGIC, &fp->magic)) return false; } else { struct fregs_state __user *fp = buf; u32 swd; if (__get_user(swd, &fp->swd) || __put_user(swd, &fp->status)) return false; } return true; } /* * Prepare the SW reserved portion of the fxsave memory layout, indicating * the presence of the extended state information in the memory layout * pointed to by the fpstate pointer in the sigcontext. * This is saved when ever the FP and extended state context is * saved on the user stack during the signal handler delivery to the user. */ static inline void save_sw_bytes(struct _fpx_sw_bytes *sw_bytes, bool ia32_frame, struct fpstate *fpstate) { sw_bytes->magic1 = FP_XSTATE_MAGIC1; sw_bytes->extended_size = fpstate->user_size + FP_XSTATE_MAGIC2_SIZE; sw_bytes->xfeatures = fpstate->user_xfeatures; sw_bytes->xstate_size = fpstate->user_size; if (ia32_frame) sw_bytes->extended_size += sizeof(struct fregs_state); } static inline bool save_xstate_epilog(void __user *buf, int ia32_frame, struct fpstate *fpstate) { struct xregs_state __user *x = buf; struct _fpx_sw_bytes sw_bytes = {}; int err; /* Setup the bytes not touched by the [f]xsave and reserved for SW. */ save_sw_bytes(&sw_bytes, ia32_frame, fpstate); err = __copy_to_user(&x->i387.sw_reserved, &sw_bytes, sizeof(sw_bytes)); if (!use_xsave()) return !err; err |= __put_user(FP_XSTATE_MAGIC2, (__u32 __user *)(buf + fpstate->user_size)); /* * For legacy compatible, we always set FP/SSE bits in the bit * vector while saving the state to the user context. This will * enable us capturing any changes(during sigreturn) to * the FP/SSE bits by the legacy applications which don't touch * xfeatures in the xsave header. * * xsave aware apps can change the xfeatures in the xsave * header as well as change any contents in the memory layout. * xrestore as part of sigreturn will capture all the changes. */ err |= set_xfeature_in_sigframe(x, XFEATURE_MASK_FPSSE); return !err; } static inline int copy_fpregs_to_sigframe(struct xregs_state __user *buf, u32 pkru) { if (use_xsave()) return xsave_to_user_sigframe(buf, pkru); if (use_fxsr()) return fxsave_to_user_sigframe((struct fxregs_state __user *) buf); else return fnsave_to_user_sigframe((struct fregs_state __user *) buf); } /* * Save the fpu, extended register state to the user signal frame. * * 'buf_fx' is the 64-byte aligned pointer at which the [f|fx|x]save * state is copied. * 'buf' points to the 'buf_fx' or to the fsave header followed by 'buf_fx'. * * buf == buf_fx for 64-bit frames and 32-bit fsave frame. * buf != buf_fx for 32-bit frames with fxstate. * * Save it directly to the user frame with disabled page fault handler. If * that faults, try to clear the frame which handles the page fault. * * If this is a 32-bit frame with fxstate, put a fsave header before * the aligned state at 'buf_fx'. * * For [f]xsave state, update the SW reserved fields in the [f]xsave frame * indicating the absence/presence of the extended state to the user. */ bool copy_fpstate_to_sigframe(void __user *buf, void __user *buf_fx, int size, u32 pkru) { struct task_struct *tsk = current; struct fpstate *fpstate = x86_task_fpu(tsk)->fpstate; bool ia32_fxstate = (buf != buf_fx); int ret; ia32_fxstate &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); if (!static_cpu_has(X86_FEATURE_FPU)) { struct user_i387_ia32_struct fp; fpregs_soft_get(current, NULL, (struct membuf){.p = &fp, .left = sizeof(fp)}); return !copy_to_user(buf, &fp, sizeof(fp)); } if (!access_ok(buf, size)) return false; if (use_xsave()) { struct xregs_state __user *xbuf = buf_fx; /* * Clear the xsave header first, so that reserved fields are * initialized to zero. */ if (__clear_user(&xbuf->header, sizeof(xbuf->header))) return false; } retry: /* * Load the FPU registers if they are not valid for the current task. * With a valid FPU state we can attempt to save the state directly to * userland's stack frame which will likely succeed. If it does not, * resolve the fault in the user memory and try again. */ fpregs_lock(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) fpregs_restore_userregs(); pagefault_disable(); ret = copy_fpregs_to_sigframe(buf_fx, pkru); pagefault_enable(); fpregs_unlock(); if (ret) { if (!__clear_user(buf_fx, fpstate->user_size)) goto retry; return false; } /* Save the fsave header for the 32-bit frames. */ if ((ia32_fxstate || !use_fxsr()) && !save_fsave_header(tsk, buf)) return false; if (use_fxsr() && !save_xstate_epilog(buf_fx, ia32_fxstate, fpstate)) return false; return true; } static int __restore_fpregs_from_user(void __user *buf, u64 ufeatures, u64 xrestore, bool fx_only) { if (use_xsave()) { u64 init_bv = ufeatures & ~xrestore; int ret; if (likely(!fx_only)) ret = xrstor_from_user_sigframe(buf, xrestore); else ret = fxrstor_from_user_sigframe(buf); if (!ret && unlikely(init_bv)) os_xrstor(&init_fpstate, init_bv); return ret; } else if (use_fxsr()) { return fxrstor_from_user_sigframe(buf); } else { return frstor_from_user_sigframe(buf); } } /* * Attempt to restore the FPU registers directly from user memory. * Pagefaults are handled and any errors returned are fatal. */ static bool restore_fpregs_from_user(void __user *buf, u64 xrestore, bool fx_only) { struct fpu *fpu = x86_task_fpu(current); int ret; /* Restore enabled features only. */ xrestore &= fpu->fpstate->user_xfeatures; retry: fpregs_lock(); /* Ensure that XFD is up to date */ xfd_update_state(fpu->fpstate); pagefault_disable(); ret = __restore_fpregs_from_user(buf, fpu->fpstate->user_xfeatures, xrestore, fx_only); pagefault_enable(); if (unlikely(ret)) { /* * The above did an FPU restore operation, restricted to * the user portion of the registers, and failed, but the * microcode might have modified the FPU registers * nevertheless. * * If the FPU registers do not belong to current, then * invalidate the FPU register state otherwise the task * might preempt current and return to user space with * corrupted FPU registers. */ if (test_thread_flag(TIF_NEED_FPU_LOAD)) __cpu_invalidate_fpregs_state(); fpregs_unlock(); /* Try to handle #PF, but anything else is fatal. */ if (ret != X86_TRAP_PF) return false; if (!fault_in_readable(buf, fpu->fpstate->user_size)) goto retry; return false; } /* * Restore supervisor states: previous context switch etc has done * XSAVES and saved the supervisor states in the kernel buffer from * which they can be restored now. * * It would be optimal to handle this with a single XRSTORS, but * this does not work because the rest of the FPU registers have * been restored from a user buffer directly. */ if (test_thread_flag(TIF_NEED_FPU_LOAD) && xfeatures_mask_supervisor()) os_xrstor_supervisor(fpu->fpstate); fpregs_mark_activate(); fpregs_unlock(); return true; } static bool __fpu_restore_sig(void __user *buf, void __user *buf_fx, bool ia32_fxstate) { struct task_struct *tsk = current; struct fpu *fpu = x86_task_fpu(tsk); struct user_i387_ia32_struct env; bool success, fx_only = false; union fpregs_state *fpregs; u64 user_xfeatures = 0; if (use_xsave()) { struct _fpx_sw_bytes fx_sw_user; if (!check_xstate_in_sigframe(buf_fx, &fx_sw_user)) return false; fx_only = !fx_sw_user.magic1; user_xfeatures = fx_sw_user.xfeatures; } else { user_xfeatures = XFEATURE_MASK_FPSSE; } if (likely(!ia32_fxstate)) { /* Restore the FPU registers directly from user memory. */ return restore_fpregs_from_user(buf_fx, user_xfeatures, fx_only); } /* * Copy the legacy state because the FP portion of the FX frame has * to be ignored for histerical raisins. The legacy state is folded * in once the larger state has been copied. */ if (__copy_from_user(&env, buf, sizeof(env))) return false; /* * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is * not modified on context switch and that the xstate is considered * to be loaded again on return to userland (overriding last_cpu avoids * the optimisation). */ fpregs_lock(); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { /* * If supervisor states are available then save the * hardware state in current's fpstate so that the * supervisor state is preserved. Save the full state for * simplicity. There is no point in optimizing this by only * saving the supervisor states and then shuffle them to * the right place in memory. It's ia32 mode. Shrug. */ if (xfeatures_mask_supervisor()) os_xsave(fpu->fpstate); set_thread_flag(TIF_NEED_FPU_LOAD); } __fpu_invalidate_fpregs_state(fpu); __cpu_invalidate_fpregs_state(); fpregs_unlock(); fpregs = &fpu->fpstate->regs; if (use_xsave() && !fx_only) { if (copy_sigframe_from_user_to_xstate(tsk, buf_fx)) return false; } else { if (__copy_from_user(&fpregs->fxsave, buf_fx, sizeof(fpregs->fxsave))) return false; if (IS_ENABLED(CONFIG_X86_64)) { /* Reject invalid MXCSR values. */ if (fpregs->fxsave.mxcsr & ~mxcsr_feature_mask) return false; } else { /* Mask invalid bits out for historical reasons (broken hardware). */ fpregs->fxsave.mxcsr &= mxcsr_feature_mask; } /* Enforce XFEATURE_MASK_FPSSE when XSAVE is enabled */ if (use_xsave()) fpregs->xsave.header.xfeatures |= XFEATURE_MASK_FPSSE; } /* Fold the legacy FP storage */ convert_to_fxsr(&fpregs->fxsave, &env); fpregs_lock(); if (use_xsave()) { /* * Remove all UABI feature bits not set in user_xfeatures * from the memory xstate header which makes the full * restore below bring them into init state. This works for * fx_only mode as well because that has only FP and SSE * set in user_xfeatures. * * Preserve supervisor states! */ u64 mask = user_xfeatures | xfeatures_mask_supervisor(); fpregs->xsave.header.xfeatures &= mask; success = !os_xrstor_safe(fpu->fpstate, fpu_kernel_cfg.max_features); } else { success = !fxrstor_safe(&fpregs->fxsave); } if (likely(success)) fpregs_mark_activate(); fpregs_unlock(); return success; } static inline unsigned int xstate_sigframe_size(struct fpstate *fpstate) { unsigned int size = fpstate->user_size; return use_xsave() ? size + FP_XSTATE_MAGIC2_SIZE : size; } /* * Restore FPU state from a sigframe: */ bool fpu__restore_sig(void __user *buf, int ia32_frame) { struct fpu *fpu = x86_task_fpu(current); void __user *buf_fx = buf; bool ia32_fxstate = false; bool success = false; unsigned int size; if (unlikely(!buf)) { fpu__clear_user_states(fpu); return true; } size = xstate_sigframe_size(fpu->fpstate); ia32_frame &= (IS_ENABLED(CONFIG_X86_32) || IS_ENABLED(CONFIG_IA32_EMULATION)); /* * Only FXSR enabled systems need the FX state quirk. * FRSTOR does not need it and can use the fast path. */ if (ia32_frame && use_fxsr()) { buf_fx = buf + sizeof(struct fregs_state); size += sizeof(struct fregs_state); ia32_fxstate = true; } if (!access_ok(buf, size)) goto out; if (!IS_ENABLED(CONFIG_X86_64) && !cpu_feature_enabled(X86_FEATURE_FPU)) { success = !fpregs_soft_set(current, NULL, 0, sizeof(struct user_i387_ia32_struct), NULL, buf); } else { success = __fpu_restore_sig(buf, buf_fx, ia32_fxstate); } out: if (unlikely(!success)) fpu__clear_user_states(fpu); return success; } unsigned long fpu__alloc_mathframe(unsigned long sp, int ia32_frame, unsigned long *buf_fx, unsigned long *size) { unsigned long frame_size = xstate_sigframe_size(x86_task_fpu(current)->fpstate); *buf_fx = sp = round_down(sp - frame_size, 64); if (ia32_frame && use_fxsr()) { frame_size += sizeof(struct fregs_state); sp -= sizeof(struct fregs_state); } *size = frame_size; return sp; } unsigned long __init fpu__get_fpstate_size(void) { unsigned long ret = fpu_user_cfg.max_size; if (use_xsave()) ret += FP_XSTATE_MAGIC2_SIZE; /* * This space is needed on (most) 32-bit kernels, or when a 32-bit * app is running on a 64-bit kernel. To keep things simple, just * assume the worst case and always include space for 'freg_state', * even for 64-bit apps on 64-bit kernels. This wastes a bit of * space, but keeps the code simple. */ if ((IS_ENABLED(CONFIG_IA32_EMULATION) || IS_ENABLED(CONFIG_X86_32)) && use_fxsr()) ret += sizeof(struct fregs_state); return ret; }
16 6 6 6 5 1 1 6 5 5 6 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 23 23 1712 1710 1 1450 1450 18 3 3 3 20 1 2 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 2 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 1 2 2 78 18 20 2 5 5 5 5 5 351 352 3 3 3 3 352 351 351 352 352 352 352 12 12 12 12 2 2 2 2 12 12 12 1 1 1 1 1 1 1 5 4 3 3 3 3 2 2 3 5 1 4 1 1 2 2 1 4 1 1 1 1 1 1 5 5 5 2 4 4 4 2 1 3 3 1 3 3 1 3 5 3 2 3 5 1 64 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 /* CPU control. * (C) 2001, 2002, 2003, 2004 Rusty Russell * * This code is licenced under the GPL. */ #include <linux/sched/mm.h> #include <linux/proc_fs.h> #include <linux/smp.h> #include <linux/init.h> #include <linux/notifier.h> #include <linux/sched/signal.h> #include <linux/sched/hotplug.h> #include <linux/sched/isolation.h> #include <linux/sched/task.h> #include <linux/sched/smt.h> #include <linux/unistd.h> #include <linux/cpu.h> #include <linux/oom.h> #include <linux/rcupdate.h> #include <linux/delay.h> #include <linux/export.h> #include <linux/bug.h> #include <linux/kthread.h> #include <linux/stop_machine.h> #include <linux/mutex.h> #include <linux/gfp.h> #include <linux/suspend.h> #include <linux/lockdep.h> #include <linux/tick.h> #include <linux/irq.h> #include <linux/nmi.h> #include <linux/smpboot.h> #include <linux/relay.h> #include <linux/slab.h> #include <linux/scs.h> #include <linux/percpu-rwsem.h> #include <linux/cpuset.h> #include <linux/random.h> #include <linux/cc_platform.h> #include <linux/parser.h> #include <trace/events/power.h> #define CREATE_TRACE_POINTS #include <trace/events/cpuhp.h> #include "smpboot.h" /** * struct cpuhp_cpu_state - Per cpu hotplug state storage * @state: The current cpu state * @target: The target state * @fail: Current CPU hotplug callback state * @thread: Pointer to the hotplug thread * @should_run: Thread should execute * @rollback: Perform a rollback * @single: Single callback invocation * @bringup: Single callback bringup or teardown selector * @node: Remote CPU node; for multi-instance, do a * single entry callback for install/remove * @last: For multi-instance rollback, remember how far we got * @cb_state: The state for a single callback (install/uninstall) * @result: Result of the operation * @ap_sync_state: State for AP synchronization * @done_up: Signal completion to the issuer of the task for cpu-up * @done_down: Signal completion to the issuer of the task for cpu-down */ struct cpuhp_cpu_state { enum cpuhp_state state; enum cpuhp_state target; enum cpuhp_state fail; #ifdef CONFIG_SMP struct task_struct *thread; bool should_run; bool rollback; bool single; bool bringup; struct hlist_node *node; struct hlist_node *last; enum cpuhp_state cb_state; int result; atomic_t ap_sync_state; struct completion done_up; struct completion done_down; #endif }; static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state) = { .fail = CPUHP_INVALID, }; #ifdef CONFIG_SMP cpumask_t cpus_booted_once_mask; #endif #if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP) static struct lockdep_map cpuhp_state_up_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-up", &cpuhp_state_up_map); static struct lockdep_map cpuhp_state_down_map = STATIC_LOCKDEP_MAP_INIT("cpuhp_state-down", &cpuhp_state_down_map); static inline void cpuhp_lock_acquire(bool bringup) { lock_map_acquire(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); } static inline void cpuhp_lock_release(bool bringup) { lock_map_release(bringup ? &cpuhp_state_up_map : &cpuhp_state_down_map); } #else static inline void cpuhp_lock_acquire(bool bringup) { } static inline void cpuhp_lock_release(bool bringup) { } #endif /** * struct cpuhp_step - Hotplug state machine step * @name: Name of the step * @startup: Startup function of the step * @teardown: Teardown function of the step * @cant_stop: Bringup/teardown can't be stopped at this step * @multi_instance: State has multiple instances which get added afterwards */ struct cpuhp_step { const char *name; union { int (*single)(unsigned int cpu); int (*multi)(unsigned int cpu, struct hlist_node *node); } startup; union { int (*single)(unsigned int cpu); int (*multi)(unsigned int cpu, struct hlist_node *node); } teardown; /* private: */ struct hlist_head list; /* public: */ bool cant_stop; bool multi_instance; }; static DEFINE_MUTEX(cpuhp_state_mutex); static struct cpuhp_step cpuhp_hp_states[]; static struct cpuhp_step *cpuhp_get_step(enum cpuhp_state state) { return cpuhp_hp_states + state; } static bool cpuhp_step_empty(bool bringup, struct cpuhp_step *step) { return bringup ? !step->startup.single : !step->teardown.single; } /** * cpuhp_invoke_callback - Invoke the callbacks for a given state * @cpu: The cpu for which the callback should be invoked * @state: The state to do callbacks for * @bringup: True if the bringup callback should be invoked * @node: For multi-instance, do a single entry callback for install/remove * @lastp: For multi-instance rollback, remember how far we got * * Called from cpu hotplug and from the state register machinery. * * Return: %0 on success or a negative errno code */ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node, struct hlist_node **lastp) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); struct cpuhp_step *step = cpuhp_get_step(state); int (*cbm)(unsigned int cpu, struct hlist_node *node); int (*cb)(unsigned int cpu); int ret, cnt; if (st->fail == state) { st->fail = CPUHP_INVALID; return -EAGAIN; } if (cpuhp_step_empty(bringup, step)) { WARN_ON_ONCE(1); return 0; } if (!step->multi_instance) { WARN_ON_ONCE(lastp && *lastp); cb = bringup ? step->startup.single : step->teardown.single; trace_cpuhp_enter(cpu, st->target, state, cb); ret = cb(cpu); trace_cpuhp_exit(cpu, st->state, state, ret); return ret; } cbm = bringup ? step->startup.multi : step->teardown.multi; /* Single invocation for instance add/remove */ if (node) { WARN_ON_ONCE(lastp && *lastp); trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); ret = cbm(cpu, node); trace_cpuhp_exit(cpu, st->state, state, ret); return ret; } /* State transition. Invoke on all instances */ cnt = 0; hlist_for_each(node, &step->list) { if (lastp && node == *lastp) break; trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); ret = cbm(cpu, node); trace_cpuhp_exit(cpu, st->state, state, ret); if (ret) { if (!lastp) goto err; *lastp = node; return ret; } cnt++; } if (lastp) *lastp = NULL; return 0; err: /* Rollback the instances if one failed */ cbm = !bringup ? step->startup.multi : step->teardown.multi; if (!cbm) return ret; hlist_for_each(node, &step->list) { if (!cnt--) break; trace_cpuhp_multi_enter(cpu, st->target, state, cbm, node); ret = cbm(cpu, node); trace_cpuhp_exit(cpu, st->state, state, ret); /* * Rollback must not fail, */ WARN_ON_ONCE(ret); } return ret; } #ifdef CONFIG_SMP static bool cpuhp_is_ap_state(enum cpuhp_state state) { /* * The extra check for CPUHP_TEARDOWN_CPU is only for documentation * purposes as that state is handled explicitly in cpu_down. */ return state > CPUHP_BRINGUP_CPU && state != CPUHP_TEARDOWN_CPU; } static inline void wait_for_ap_thread(struct cpuhp_cpu_state *st, bool bringup) { struct completion *done = bringup ? &st->done_up : &st->done_down; wait_for_completion(done); } static inline void complete_ap_thread(struct cpuhp_cpu_state *st, bool bringup) { struct completion *done = bringup ? &st->done_up : &st->done_down; complete(done); } /* * The former STARTING/DYING states, ran with IRQs disabled and must not fail. */ static bool cpuhp_is_atomic_state(enum cpuhp_state state) { return CPUHP_AP_IDLE_DEAD <= state && state < CPUHP_AP_ONLINE; } /* Synchronization state management */ enum cpuhp_sync_state { SYNC_STATE_DEAD, SYNC_STATE_KICKED, SYNC_STATE_SHOULD_DIE, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE, SYNC_STATE_ONLINE, }; #ifdef CONFIG_HOTPLUG_CORE_SYNC /** * cpuhp_ap_update_sync_state - Update synchronization state during bringup/teardown * @state: The synchronization state to set * * No synchronization point. Just update of the synchronization state, but implies * a full barrier so that the AP changes are visible before the control CPU proceeds. */ static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); (void)atomic_xchg(st, state); } void __weak arch_cpuhp_sync_state_poll(void) { cpu_relax(); } static bool cpuhp_wait_for_sync_state(unsigned int cpu, enum cpuhp_sync_state state, enum cpuhp_sync_state next_state) { atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); ktime_t now, end, start = ktime_get(); int sync; end = start + 10ULL * NSEC_PER_SEC; sync = atomic_read(st); while (1) { if (sync == state) { if (!atomic_try_cmpxchg(st, &sync, next_state)) continue; return true; } now = ktime_get(); if (now > end) { /* Timeout. Leave the state unchanged */ return false; } else if (now - start < NSEC_PER_MSEC) { /* Poll for one millisecond */ arch_cpuhp_sync_state_poll(); } else { usleep_range(USEC_PER_MSEC, 2 * USEC_PER_MSEC); } sync = atomic_read(st); } return true; } #else /* CONFIG_HOTPLUG_CORE_SYNC */ static inline void cpuhp_ap_update_sync_state(enum cpuhp_sync_state state) { } #endif /* !CONFIG_HOTPLUG_CORE_SYNC */ #ifdef CONFIG_HOTPLUG_CORE_SYNC_DEAD /** * cpuhp_ap_report_dead - Update synchronization state to DEAD * * No synchronization point. Just update of the synchronization state. */ void cpuhp_ap_report_dead(void) { cpuhp_ap_update_sync_state(SYNC_STATE_DEAD); } void __weak arch_cpuhp_cleanup_dead_cpu(unsigned int cpu) { } /* * Late CPU shutdown synchronization point. Cannot use cpuhp_state::done_down * because the AP cannot issue complete() at this stage. */ static void cpuhp_bp_sync_dead(unsigned int cpu) { atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); int sync = atomic_read(st); do { /* CPU can have reported dead already. Don't overwrite that! */ if (sync == SYNC_STATE_DEAD) break; } while (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_SHOULD_DIE)); if (cpuhp_wait_for_sync_state(cpu, SYNC_STATE_DEAD, SYNC_STATE_DEAD)) { /* CPU reached dead state. Invoke the cleanup function */ arch_cpuhp_cleanup_dead_cpu(cpu); return; } /* No further action possible. Emit message and give up. */ pr_err("CPU%u failed to report dead state\n", cpu); } #else /* CONFIG_HOTPLUG_CORE_SYNC_DEAD */ static inline void cpuhp_bp_sync_dead(unsigned int cpu) { } #endif /* !CONFIG_HOTPLUG_CORE_SYNC_DEAD */ #ifdef CONFIG_HOTPLUG_CORE_SYNC_FULL /** * cpuhp_ap_sync_alive - Synchronize AP with the control CPU once it is alive * * Updates the AP synchronization state to SYNC_STATE_ALIVE and waits * for the BP to release it. */ void cpuhp_ap_sync_alive(void) { atomic_t *st = this_cpu_ptr(&cpuhp_state.ap_sync_state); cpuhp_ap_update_sync_state(SYNC_STATE_ALIVE); /* Wait for the control CPU to release it. */ while (atomic_read(st) != SYNC_STATE_SHOULD_ONLINE) cpu_relax(); } static bool cpuhp_can_boot_ap(unsigned int cpu) { atomic_t *st = per_cpu_ptr(&cpuhp_state.ap_sync_state, cpu); int sync = atomic_read(st); again: switch (sync) { case SYNC_STATE_DEAD: /* CPU is properly dead */ break; case SYNC_STATE_KICKED: /* CPU did not come up in previous attempt */ break; case SYNC_STATE_ALIVE: /* CPU is stuck cpuhp_ap_sync_alive(). */ break; default: /* CPU failed to report online or dead and is in limbo state. */ return false; } /* Prepare for booting */ if (!atomic_try_cmpxchg(st, &sync, SYNC_STATE_KICKED)) goto again; return true; } void __weak arch_cpuhp_cleanup_kick_cpu(unsigned int cpu) { } /* * Early CPU bringup synchronization point. Cannot use cpuhp_state::done_up * because the AP cannot issue complete() so early in the bringup. */ static int cpuhp_bp_sync_alive(unsigned int cpu) { int ret = 0; if (!IS_ENABLED(CONFIG_HOTPLUG_CORE_SYNC_FULL)) return 0; if (!cpuhp_wait_for_sync_state(cpu, SYNC_STATE_ALIVE, SYNC_STATE_SHOULD_ONLINE)) { pr_err("CPU%u failed to report alive state\n", cpu); ret = -EIO; } /* Let the architecture cleanup the kick alive mechanics. */ arch_cpuhp_cleanup_kick_cpu(cpu); return ret; } #else /* CONFIG_HOTPLUG_CORE_SYNC_FULL */ static inline int cpuhp_bp_sync_alive(unsigned int cpu) { return 0; } static inline bool cpuhp_can_boot_ap(unsigned int cpu) { return true; } #endif /* !CONFIG_HOTPLUG_CORE_SYNC_FULL */ /* Serializes the updates to cpu_online_mask, cpu_present_mask */ static DEFINE_MUTEX(cpu_add_remove_lock); bool cpuhp_tasks_frozen; EXPORT_SYMBOL_GPL(cpuhp_tasks_frozen); /* * The following two APIs (cpu_maps_update_begin/done) must be used when * attempting to serialize the updates to cpu_online_mask & cpu_present_mask. */ void cpu_maps_update_begin(void) { mutex_lock(&cpu_add_remove_lock); } void cpu_maps_update_done(void) { mutex_unlock(&cpu_add_remove_lock); } /* * If set, cpu_up and cpu_down will return -EBUSY and do nothing. * Should always be manipulated under cpu_add_remove_lock */ static int cpu_hotplug_disabled; #ifdef CONFIG_HOTPLUG_CPU DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock); static bool cpu_hotplug_offline_disabled __ro_after_init; void cpus_read_lock(void) { percpu_down_read(&cpu_hotplug_lock); } EXPORT_SYMBOL_GPL(cpus_read_lock); int cpus_read_trylock(void) { return percpu_down_read_trylock(&cpu_hotplug_lock); } EXPORT_SYMBOL_GPL(cpus_read_trylock); void cpus_read_unlock(void) { percpu_up_read(&cpu_hotplug_lock); } EXPORT_SYMBOL_GPL(cpus_read_unlock); void cpus_write_lock(void) { percpu_down_write(&cpu_hotplug_lock); } void cpus_write_unlock(void) { percpu_up_write(&cpu_hotplug_lock); } void lockdep_assert_cpus_held(void) { /* * We can't have hotplug operations before userspace starts running, * and some init codepaths will knowingly not take the hotplug lock. * This is all valid, so mute lockdep until it makes sense to report * unheld locks. */ if (system_state < SYSTEM_RUNNING) return; percpu_rwsem_assert_held(&cpu_hotplug_lock); } EXPORT_SYMBOL_GPL(lockdep_assert_cpus_held); #ifdef CONFIG_LOCKDEP int lockdep_is_cpus_held(void) { return percpu_rwsem_is_held(&cpu_hotplug_lock); } #endif static void lockdep_acquire_cpus_lock(void) { rwsem_acquire(&cpu_hotplug_lock.dep_map, 0, 0, _THIS_IP_); } static void lockdep_release_cpus_lock(void) { rwsem_release(&cpu_hotplug_lock.dep_map, _THIS_IP_); } /* Declare CPU offlining not supported */ void cpu_hotplug_disable_offlining(void) { cpu_maps_update_begin(); cpu_hotplug_offline_disabled = true; cpu_maps_update_done(); } /* * Wait for currently running CPU hotplug operations to complete (if any) and * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the * hotplug path before performing hotplug operations. So acquiring that lock * guarantees mutual exclusion from any currently running hotplug operations. */ void cpu_hotplug_disable(void) { cpu_maps_update_begin(); cpu_hotplug_disabled++; cpu_maps_update_done(); } EXPORT_SYMBOL_GPL(cpu_hotplug_disable); static void __cpu_hotplug_enable(void) { if (WARN_ONCE(!cpu_hotplug_disabled, "Unbalanced cpu hotplug enable\n")) return; cpu_hotplug_disabled--; } void cpu_hotplug_enable(void) { cpu_maps_update_begin(); __cpu_hotplug_enable(); cpu_maps_update_done(); } EXPORT_SYMBOL_GPL(cpu_hotplug_enable); #else static void lockdep_acquire_cpus_lock(void) { } static void lockdep_release_cpus_lock(void) { } #endif /* CONFIG_HOTPLUG_CPU */ /* * Architectures that need SMT-specific errata handling during SMT hotplug * should override this. */ void __weak arch_smt_update(void) { } #ifdef CONFIG_HOTPLUG_SMT enum cpuhp_smt_control cpu_smt_control __read_mostly = CPU_SMT_ENABLED; static unsigned int cpu_smt_max_threads __ro_after_init; unsigned int cpu_smt_num_threads __read_mostly = UINT_MAX; void __init cpu_smt_disable(bool force) { if (!cpu_smt_possible()) return; if (force) { pr_info("SMT: Force disabled\n"); cpu_smt_control = CPU_SMT_FORCE_DISABLED; } else { pr_info("SMT: disabled\n"); cpu_smt_control = CPU_SMT_DISABLED; } cpu_smt_num_threads = 1; } /* * The decision whether SMT is supported can only be done after the full * CPU identification. Called from architecture code. */ void __init cpu_smt_set_num_threads(unsigned int num_threads, unsigned int max_threads) { WARN_ON(!num_threads || (num_threads > max_threads)); if (max_threads == 1) cpu_smt_control = CPU_SMT_NOT_SUPPORTED; cpu_smt_max_threads = max_threads; /* * If SMT has been disabled via the kernel command line or SMT is * not supported, set cpu_smt_num_threads to 1 for consistency. * If enabled, take the architecture requested number of threads * to bring up into account. */ if (cpu_smt_control != CPU_SMT_ENABLED) cpu_smt_num_threads = 1; else if (num_threads < cpu_smt_num_threads) cpu_smt_num_threads = num_threads; } static int __init smt_cmdline_disable(char *str) { cpu_smt_disable(str && !strcmp(str, "force")); return 0; } early_param("nosmt", smt_cmdline_disable); /* * For Archicture supporting partial SMT states check if the thread is allowed. * Otherwise this has already been checked through cpu_smt_max_threads when * setting the SMT level. */ static inline bool cpu_smt_thread_allowed(unsigned int cpu) { #ifdef CONFIG_SMT_NUM_THREADS_DYNAMIC return topology_smt_thread_allowed(cpu); #else return true; #endif } static inline bool cpu_bootable(unsigned int cpu) { if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu)) return true; /* All CPUs are bootable if controls are not configured */ if (cpu_smt_control == CPU_SMT_NOT_IMPLEMENTED) return true; /* All CPUs are bootable if CPU is not SMT capable */ if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED) return true; if (topology_is_primary_thread(cpu)) return true; /* * On x86 it's required to boot all logical CPUs at least once so * that the init code can get a chance to set CR4.MCE on each * CPU. Otherwise, a broadcasted MCE observing CR4.MCE=0b on any * core will shutdown the machine. */ return !cpumask_test_cpu(cpu, &cpus_booted_once_mask); } /* Returns true if SMT is supported and not forcefully (irreversibly) disabled */ bool cpu_smt_possible(void) { return cpu_smt_control != CPU_SMT_FORCE_DISABLED && cpu_smt_control != CPU_SMT_NOT_SUPPORTED; } EXPORT_SYMBOL_GPL(cpu_smt_possible); #else static inline bool cpu_bootable(unsigned int cpu) { return true; } #endif static inline enum cpuhp_state cpuhp_set_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; bool bringup = st->state < target; st->rollback = false; st->last = NULL; st->target = target; st->single = false; st->bringup = bringup; if (cpu_dying(cpu) != !bringup) set_cpu_dying(cpu, !bringup); return prev_state; } static inline void cpuhp_reset_state(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state prev_state) { bool bringup = !st->bringup; st->target = prev_state; /* * Already rolling back. No need invert the bringup value or to change * the current state. */ if (st->rollback) return; st->rollback = true; /* * If we have st->last we need to undo partial multi_instance of this * state first. Otherwise start undo at the previous state. */ if (!st->last) { if (st->bringup) st->state--; else st->state++; } st->bringup = bringup; if (cpu_dying(cpu) != !bringup) set_cpu_dying(cpu, !bringup); } /* Regular hotplug invocation of the AP hotplug thread */ static void __cpuhp_kick_ap(struct cpuhp_cpu_state *st) { if (!st->single && st->state == st->target) return; st->result = 0; /* * Make sure the above stores are visible before should_run becomes * true. Paired with the mb() above in cpuhp_thread_fun() */ smp_mb(); st->should_run = true; wake_up_process(st->thread); wait_for_ap_thread(st, st->bringup); } static int cpuhp_kick_ap(int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state; int ret; prev_state = cpuhp_set_state(cpu, st, target); __cpuhp_kick_ap(st); if ((ret = st->result)) { cpuhp_reset_state(cpu, st, prev_state); __cpuhp_kick_ap(st); } return ret; } static int bringup_wait_for_ap_online(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); /* Wait for the CPU to reach CPUHP_AP_ONLINE_IDLE */ wait_for_ap_thread(st, true); if (WARN_ON_ONCE((!cpu_online(cpu)))) return -ECANCELED; /* Unpark the hotplug thread of the target cpu */ kthread_unpark(st->thread); /* * SMT soft disabling on X86 requires to bring the CPU out of the * BIOS 'wait for SIPI' state in order to set the CR4.MCE bit. The * CPU marked itself as booted_once in notify_cpu_starting() so the * cpu_bootable() check will now return false if this is not the * primary sibling. */ if (!cpu_bootable(cpu)) return -ECANCELED; return 0; } #ifdef CONFIG_HOTPLUG_SPLIT_STARTUP static int cpuhp_kick_ap_alive(unsigned int cpu) { if (!cpuhp_can_boot_ap(cpu)) return -EAGAIN; return arch_cpuhp_kick_ap_alive(cpu, idle_thread_get(cpu)); } static int cpuhp_bringup_ap(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int ret; /* * Some architectures have to walk the irq descriptors to * setup the vector space for the cpu which comes online. * Prevent irq alloc/free across the bringup. */ irq_lock_sparse(); ret = cpuhp_bp_sync_alive(cpu); if (ret) goto out_unlock; ret = bringup_wait_for_ap_online(cpu); if (ret) goto out_unlock; irq_unlock_sparse(); if (st->target <= CPUHP_AP_ONLINE_IDLE) return 0; return cpuhp_kick_ap(cpu, st, st->target); out_unlock: irq_unlock_sparse(); return ret; } #else static int bringup_cpu(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); struct task_struct *idle = idle_thread_get(cpu); int ret; if (!cpuhp_can_boot_ap(cpu)) return -EAGAIN; /* * Some architectures have to walk the irq descriptors to * setup the vector space for the cpu which comes online. * * Prevent irq alloc/free across the bringup by acquiring the * sparse irq lock. Hold it until the upcoming CPU completes the * startup in cpuhp_online_idle() which allows to avoid * intermediate synchronization points in the architecture code. */ irq_lock_sparse(); ret = __cpu_up(cpu, idle); if (ret) goto out_unlock; ret = cpuhp_bp_sync_alive(cpu); if (ret) goto out_unlock; ret = bringup_wait_for_ap_online(cpu); if (ret) goto out_unlock; irq_unlock_sparse(); if (st->target <= CPUHP_AP_ONLINE_IDLE) return 0; return cpuhp_kick_ap(cpu, st, st->target); out_unlock: irq_unlock_sparse(); return ret; } #endif static int finish_cpu(unsigned int cpu) { struct task_struct *idle = idle_thread_get(cpu); struct mm_struct *mm = idle->active_mm; /* * sched_force_init_mm() ensured the use of &init_mm, * drop that refcount now that the CPU has stopped. */ WARN_ON(mm != &init_mm); idle->active_mm = NULL; mmdrop_lazy_tlb(mm); return 0; } /* * Hotplug state machine related functions */ /* * Get the next state to run. Empty ones will be skipped. Returns true if a * state must be run. * * st->state will be modified ahead of time, to match state_to_run, as if it * has already ran. */ static bool cpuhp_next_state(bool bringup, enum cpuhp_state *state_to_run, struct cpuhp_cpu_state *st, enum cpuhp_state target) { do { if (bringup) { if (st->state >= target) return false; *state_to_run = ++st->state; } else { if (st->state <= target) return false; *state_to_run = st->state--; } if (!cpuhp_step_empty(bringup, cpuhp_get_step(*state_to_run))) break; } while (true); return true; } static int __cpuhp_invoke_callback_range(bool bringup, unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target, bool nofail) { enum cpuhp_state state; int ret = 0; while (cpuhp_next_state(bringup, &state, st, target)) { int err; err = cpuhp_invoke_callback(cpu, state, bringup, NULL, NULL); if (!err) continue; if (nofail) { pr_warn("CPU %u %s state %s (%d) failed (%d)\n", cpu, bringup ? "UP" : "DOWN", cpuhp_get_step(st->state)->name, st->state, err); ret = -1; } else { ret = err; break; } } return ret; } static inline int cpuhp_invoke_callback_range(bool bringup, unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { return __cpuhp_invoke_callback_range(bringup, cpu, st, target, false); } static inline void cpuhp_invoke_callback_range_nofail(bool bringup, unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { __cpuhp_invoke_callback_range(bringup, cpu, st, target, true); } static inline bool can_rollback_cpu(struct cpuhp_cpu_state *st) { if (IS_ENABLED(CONFIG_HOTPLUG_CPU)) return true; /* * When CPU hotplug is disabled, then taking the CPU down is not * possible because takedown_cpu() and the architecture and * subsystem specific mechanisms are not available. So the CPU * which would be completely unplugged again needs to stay around * in the current state. */ return st->state <= CPUHP_BRINGUP_CPU; } static int cpuhp_up_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; int ret = 0; ret = cpuhp_invoke_callback_range(true, cpu, st, target); if (ret) { pr_debug("CPU UP failed (%d) CPU %u state %s (%d)\n", ret, cpu, cpuhp_get_step(st->state)->name, st->state); cpuhp_reset_state(cpu, st, prev_state); if (can_rollback_cpu(st)) WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, prev_state)); } return ret; } /* * The cpu hotplug threads manage the bringup and teardown of the cpus */ static int cpuhp_should_run(unsigned int cpu) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); return st->should_run; } /* * Execute teardown/startup callbacks on the plugged cpu. Also used to invoke * callbacks when a state gets [un]installed at runtime. * * Each invocation of this function by the smpboot thread does a single AP * state callback. * * It has 3 modes of operation: * - single: runs st->cb_state * - up: runs ++st->state, while st->state < st->target * - down: runs st->state--, while st->state > st->target * * When complete or on error, should_run is cleared and the completion is fired. */ static void cpuhp_thread_fun(unsigned int cpu) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); bool bringup = st->bringup; enum cpuhp_state state; if (WARN_ON_ONCE(!st->should_run)) return; /* * ACQUIRE for the cpuhp_should_run() load of ->should_run. Ensures * that if we see ->should_run we also see the rest of the state. */ smp_mb(); /* * The BP holds the hotplug lock, but we're now running on the AP, * ensure that anybody asserting the lock is held, will actually find * it so. */ lockdep_acquire_cpus_lock(); cpuhp_lock_acquire(bringup); if (st->single) { state = st->cb_state; st->should_run = false; } else { st->should_run = cpuhp_next_state(bringup, &state, st, st->target); if (!st->should_run) goto end; } WARN_ON_ONCE(!cpuhp_is_ap_state(state)); if (cpuhp_is_atomic_state(state)) { local_irq_disable(); st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); local_irq_enable(); /* * STARTING/DYING must not fail! */ WARN_ON_ONCE(st->result); } else { st->result = cpuhp_invoke_callback(cpu, state, bringup, st->node, &st->last); } if (st->result) { /* * If we fail on a rollback, we're up a creek without no * paddle, no way forward, no way back. We loose, thanks for * playing. */ WARN_ON_ONCE(st->rollback); st->should_run = false; } end: cpuhp_lock_release(bringup); lockdep_release_cpus_lock(); if (!st->should_run) complete_ap_thread(st, bringup); } /* Invoke a single callback on a remote cpu */ static int cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int ret; if (!cpu_online(cpu)) return 0; cpuhp_lock_acquire(false); cpuhp_lock_release(false); cpuhp_lock_acquire(true); cpuhp_lock_release(true); /* * If we are up and running, use the hotplug thread. For early calls * we invoke the thread function directly. */ if (!st->thread) return cpuhp_invoke_callback(cpu, state, bringup, node, NULL); st->rollback = false; st->last = NULL; st->node = node; st->bringup = bringup; st->cb_state = state; st->single = true; __cpuhp_kick_ap(st); /* * If we failed and did a partial, do a rollback. */ if ((ret = st->result) && st->last) { st->rollback = true; st->bringup = !bringup; __cpuhp_kick_ap(st); } /* * Clean up the leftovers so the next hotplug operation wont use stale * data. */ st->node = st->last = NULL; return ret; } static int cpuhp_kick_ap_work(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); enum cpuhp_state prev_state = st->state; int ret; cpuhp_lock_acquire(false); cpuhp_lock_release(false); cpuhp_lock_acquire(true); cpuhp_lock_release(true); trace_cpuhp_enter(cpu, st->target, prev_state, cpuhp_kick_ap_work); ret = cpuhp_kick_ap(cpu, st, st->target); trace_cpuhp_exit(cpu, st->state, prev_state, ret); return ret; } static struct smp_hotplug_thread cpuhp_threads = { .store = &cpuhp_state.thread, .thread_should_run = cpuhp_should_run, .thread_fn = cpuhp_thread_fun, .thread_comm = "cpuhp/%u", .selfparking = true, }; static __init void cpuhp_init_state(void) { struct cpuhp_cpu_state *st; int cpu; for_each_possible_cpu(cpu) { st = per_cpu_ptr(&cpuhp_state, cpu); init_completion(&st->done_up); init_completion(&st->done_down); } } void __init cpuhp_threads_init(void) { cpuhp_init_state(); BUG_ON(smpboot_register_percpu_thread(&cpuhp_threads)); kthread_unpark(this_cpu_read(cpuhp_state.thread)); } #ifdef CONFIG_HOTPLUG_CPU #ifndef arch_clear_mm_cpumask_cpu #define arch_clear_mm_cpumask_cpu(cpu, mm) cpumask_clear_cpu(cpu, mm_cpumask(mm)) #endif /** * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU * @cpu: a CPU id * * This function walks all processes, finds a valid mm struct for each one and * then clears a corresponding bit in mm's cpumask. While this all sounds * trivial, there are various non-obvious corner cases, which this function * tries to solve in a safe manner. * * Also note that the function uses a somewhat relaxed locking scheme, so it may * be called only for an already offlined CPU. */ void clear_tasks_mm_cpumask(int cpu) { struct task_struct *p; /* * This function is called after the cpu is taken down and marked * offline, so its not like new tasks will ever get this cpu set in * their mm mask. -- Peter Zijlstra * Thus, we may use rcu_read_lock() here, instead of grabbing * full-fledged tasklist_lock. */ WARN_ON(cpu_online(cpu)); rcu_read_lock(); for_each_process(p) { struct task_struct *t; /* * Main thread might exit, but other threads may still have * a valid mm. Find one. */ t = find_lock_task_mm(p); if (!t) continue; arch_clear_mm_cpumask_cpu(cpu, t->mm); task_unlock(t); } rcu_read_unlock(); } /* Take this CPU down. */ static int take_cpu_down(void *_param) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); enum cpuhp_state target = max((int)st->target, CPUHP_AP_OFFLINE); int err, cpu = smp_processor_id(); /* Ensure this CPU doesn't handle any more interrupts. */ err = __cpu_disable(); if (err < 0) return err; /* * Must be called from CPUHP_TEARDOWN_CPU, which means, as we are going * down, that the current state is CPUHP_TEARDOWN_CPU - 1. */ WARN_ON(st->state != (CPUHP_TEARDOWN_CPU - 1)); /* * Invoke the former CPU_DYING callbacks. DYING must not fail! */ cpuhp_invoke_callback_range_nofail(false, cpu, st, target); /* Park the stopper thread */ stop_machine_park(cpu); return 0; } static int takedown_cpu(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int err; /* Park the smpboot threads */ kthread_park(st->thread); /* * Prevent irq alloc/free while the dying cpu reorganizes the * interrupt affinities. */ irq_lock_sparse(); err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu)); if (err) { /* CPU refused to die */ irq_unlock_sparse(); /* Unpark the hotplug thread so we can rollback there */ kthread_unpark(st->thread); return err; } BUG_ON(cpu_online(cpu)); /* * The teardown callback for CPUHP_AP_SCHED_STARTING will have removed * all runnable tasks from the CPU, there's only the idle task left now * that the migration thread is done doing the stop_machine thing. * * Wait for the stop thread to go away. */ wait_for_ap_thread(st, false); BUG_ON(st->state != CPUHP_AP_IDLE_DEAD); /* Interrupts are moved away from the dying cpu, reenable alloc/free */ irq_unlock_sparse(); hotplug_cpu__broadcast_tick_pull(cpu); /* This actually kills the CPU. */ __cpu_die(cpu); cpuhp_bp_sync_dead(cpu); lockdep_cleanup_dead_cpu(cpu, idle_thread_get(cpu)); /* * Callbacks must be re-integrated right away to the RCU state machine. * Otherwise an RCU callback could block a further teardown function * waiting for its completion. */ rcutree_migrate_callbacks(cpu); return 0; } static void cpuhp_complete_idle_dead(void *arg) { struct cpuhp_cpu_state *st = arg; complete_ap_thread(st, false); } void cpuhp_report_idle_dead(void) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); BUG_ON(st->state != CPUHP_AP_OFFLINE); tick_assert_timekeeping_handover(); rcutree_report_cpu_dead(); st->state = CPUHP_AP_IDLE_DEAD; /* * We cannot call complete after rcutree_report_cpu_dead() so we delegate it * to an online cpu. */ smp_call_function_single(cpumask_first(cpu_online_mask), cpuhp_complete_idle_dead, st, 0); } static int cpuhp_down_callbacks(unsigned int cpu, struct cpuhp_cpu_state *st, enum cpuhp_state target) { enum cpuhp_state prev_state = st->state; int ret = 0; ret = cpuhp_invoke_callback_range(false, cpu, st, target); if (ret) { pr_debug("CPU DOWN failed (%d) CPU %u state %s (%d)\n", ret, cpu, cpuhp_get_step(st->state)->name, st->state); cpuhp_reset_state(cpu, st, prev_state); if (st->state < prev_state) WARN_ON(cpuhp_invoke_callback_range(true, cpu, st, prev_state)); } return ret; } /* Requires cpu_add_remove_lock to be held */ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int prev_state, ret = 0; if (num_online_cpus() == 1) return -EBUSY; if (!cpu_present(cpu)) return -EINVAL; cpus_write_lock(); cpuhp_tasks_frozen = tasks_frozen; prev_state = cpuhp_set_state(cpu, st, target); /* * If the current CPU state is in the range of the AP hotplug thread, * then we need to kick the thread. */ if (st->state > CPUHP_TEARDOWN_CPU) { st->target = max((int)target, CPUHP_TEARDOWN_CPU); ret = cpuhp_kick_ap_work(cpu); /* * The AP side has done the error rollback already. Just * return the error code.. */ if (ret) goto out; /* * We might have stopped still in the range of the AP hotplug * thread. Nothing to do anymore. */ if (st->state > CPUHP_TEARDOWN_CPU) goto out; st->target = target; } /* * The AP brought itself down to CPUHP_TEARDOWN_CPU. So we need * to do the further cleanups. */ ret = cpuhp_down_callbacks(cpu, st, target); if (ret && st->state < prev_state) { if (st->state == CPUHP_TEARDOWN_CPU) { cpuhp_reset_state(cpu, st, prev_state); __cpuhp_kick_ap(st); } else { WARN(1, "DEAD callback error for CPU%d", cpu); } } out: cpus_write_unlock(); arch_smt_update(); return ret; } struct cpu_down_work { unsigned int cpu; enum cpuhp_state target; }; static long __cpu_down_maps_locked(void *arg) { struct cpu_down_work *work = arg; return _cpu_down(work->cpu, 0, work->target); } static int cpu_down_maps_locked(unsigned int cpu, enum cpuhp_state target) { struct cpu_down_work work = { .cpu = cpu, .target = target, }; /* * If the platform does not support hotplug, report it explicitly to * differentiate it from a transient offlining failure. */ if (cpu_hotplug_offline_disabled) return -EOPNOTSUPP; if (cpu_hotplug_disabled) return -EBUSY; /* * Ensure that the control task does not run on the to be offlined * CPU to prevent a deadlock against cfs_b->period_timer. * Also keep at least one housekeeping cpu onlined to avoid generating * an empty sched_domain span. */ for_each_cpu_and(cpu, cpu_online_mask, housekeeping_cpumask(HK_TYPE_DOMAIN)) { if (cpu != work.cpu) return work_on_cpu(cpu, __cpu_down_maps_locked, &work); } return -EBUSY; } static int cpu_down(unsigned int cpu, enum cpuhp_state target) { int err; cpu_maps_update_begin(); err = cpu_down_maps_locked(cpu, target); cpu_maps_update_done(); return err; } /** * cpu_device_down - Bring down a cpu device * @dev: Pointer to the cpu device to offline * * This function is meant to be used by device core cpu subsystem only. * * Other subsystems should use remove_cpu() instead. * * Return: %0 on success or a negative errno code */ int cpu_device_down(struct device *dev) { return cpu_down(dev->id, CPUHP_OFFLINE); } int remove_cpu(unsigned int cpu) { int ret; lock_device_hotplug(); ret = device_offline(get_cpu_device(cpu)); unlock_device_hotplug(); return ret; } EXPORT_SYMBOL_GPL(remove_cpu); void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { unsigned int cpu; int error; cpu_maps_update_begin(); /* * Make certain the cpu I'm about to reboot on is online. * * This is inline to what migrate_to_reboot_cpu() already do. */ if (!cpu_online(primary_cpu)) primary_cpu = cpumask_first(cpu_online_mask); for_each_online_cpu(cpu) { if (cpu == primary_cpu) continue; error = cpu_down_maps_locked(cpu, CPUHP_OFFLINE); if (error) { pr_err("Failed to offline CPU%d - error=%d", cpu, error); break; } } /* * Ensure all but the reboot CPU are offline. */ BUG_ON(num_online_cpus() > 1); /* * Make sure the CPUs won't be enabled by someone else after this * point. Kexec will reboot to a new kernel shortly resetting * everything along the way. */ cpu_hotplug_disabled++; cpu_maps_update_done(); } #else #define takedown_cpu NULL #endif /*CONFIG_HOTPLUG_CPU*/ /** * notify_cpu_starting(cpu) - Invoke the callbacks on the starting CPU * @cpu: cpu that just started * * It must be called by the arch code on the new cpu, before the new cpu * enables interrupts and before the "boot" cpu returns from __cpu_up(). */ void notify_cpu_starting(unsigned int cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); enum cpuhp_state target = min((int)st->target, CPUHP_AP_ONLINE); rcutree_report_cpu_starting(cpu); /* Enables RCU usage on this CPU. */ cpumask_set_cpu(cpu, &cpus_booted_once_mask); /* * STARTING must not fail! */ cpuhp_invoke_callback_range_nofail(true, cpu, st, target); } /* * Called from the idle task. Wake up the controlling task which brings the * hotplug thread of the upcoming CPU up and then delegates the rest of the * online bringup to the hotplug thread. */ void cpuhp_online_idle(enum cpuhp_state state) { struct cpuhp_cpu_state *st = this_cpu_ptr(&cpuhp_state); /* Happens for the boot cpu */ if (state != CPUHP_AP_ONLINE_IDLE) return; cpuhp_ap_update_sync_state(SYNC_STATE_ONLINE); /* * Unpark the stopper thread before we start the idle loop (and start * scheduling); this ensures the stopper task is always available. */ stop_machine_unpark(smp_processor_id()); st->state = CPUHP_AP_ONLINE_IDLE; complete_ap_thread(st, true); } /* Requires cpu_add_remove_lock to be held */ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); struct task_struct *idle; int ret = 0; cpus_write_lock(); if (!cpu_present(cpu)) { ret = -EINVAL; goto out; } /* * The caller of cpu_up() might have raced with another * caller. Nothing to do. */ if (st->state >= target) goto out; if (st->state == CPUHP_OFFLINE) { /* Let it fail before we try to bring the cpu up */ idle = idle_thread_get(cpu); if (IS_ERR(idle)) { ret = PTR_ERR(idle); goto out; } /* * Reset stale stack state from the last time this CPU was online. */ scs_task_reset(idle); kasan_unpoison_task_stack(idle); } cpuhp_tasks_frozen = tasks_frozen; cpuhp_set_state(cpu, st, target); /* * If the current CPU state is in the range of the AP hotplug thread, * then we need to kick the thread once more. */ if (st->state > CPUHP_BRINGUP_CPU) { ret = cpuhp_kick_ap_work(cpu); /* * The AP side has done the error rollback already. Just * return the error code.. */ if (ret) goto out; } /* * Try to reach the target state. We max out on the BP at * CPUHP_BRINGUP_CPU. After that the AP hotplug thread is * responsible for bringing it up to the target state. */ target = min((int)target, CPUHP_BRINGUP_CPU); ret = cpuhp_up_callbacks(cpu, st, target); out: cpus_write_unlock(); arch_smt_update(); return ret; } static int cpu_up(unsigned int cpu, enum cpuhp_state target) { int err = 0; if (!cpu_possible(cpu)) { pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", cpu); return -EINVAL; } err = try_online_node(cpu_to_node(cpu)); if (err) return err; cpu_maps_update_begin(); if (cpu_hotplug_disabled) { err = -EBUSY; goto out; } if (!cpu_bootable(cpu)) { err = -EPERM; goto out; } err = _cpu_up(cpu, 0, target); out: cpu_maps_update_done(); return err; } /** * cpu_device_up - Bring up a cpu device * @dev: Pointer to the cpu device to online * * This function is meant to be used by device core cpu subsystem only. * * Other subsystems should use add_cpu() instead. * * Return: %0 on success or a negative errno code */ int cpu_device_up(struct device *dev) { return cpu_up(dev->id, CPUHP_ONLINE); } int add_cpu(unsigned int cpu) { int ret; lock_device_hotplug(); ret = device_online(get_cpu_device(cpu)); unlock_device_hotplug(); return ret; } EXPORT_SYMBOL_GPL(add_cpu); /** * bringup_hibernate_cpu - Bring up the CPU that we hibernated on * @sleep_cpu: The cpu we hibernated on and should be brought up. * * On some architectures like arm64, we can hibernate on any CPU, but on * wake up the CPU we hibernated on might be offline as a side effect of * using maxcpus= for example. * * Return: %0 on success or a negative errno code */ int bringup_hibernate_cpu(unsigned int sleep_cpu) { int ret; if (!cpu_online(sleep_cpu)) { pr_info("Hibernated on a CPU that is offline! Bringing CPU up.\n"); ret = cpu_up(sleep_cpu, CPUHP_ONLINE); if (ret) { pr_err("Failed to bring hibernate-CPU up!\n"); return ret; } } return 0; } static void __init cpuhp_bringup_mask(const struct cpumask *mask, unsigned int ncpus, enum cpuhp_state target) { unsigned int cpu; for_each_cpu(cpu, mask) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); if (cpu_up(cpu, target) && can_rollback_cpu(st)) { /* * If this failed then cpu_up() might have only * rolled back to CPUHP_BP_KICK_AP for the final * online. Clean it up. NOOP if already rolled back. */ WARN_ON(cpuhp_invoke_callback_range(false, cpu, st, CPUHP_OFFLINE)); } if (!--ncpus) break; } } #ifdef CONFIG_HOTPLUG_PARALLEL static bool __cpuhp_parallel_bringup __ro_after_init = true; static int __init parallel_bringup_parse_param(char *arg) { return kstrtobool(arg, &__cpuhp_parallel_bringup); } early_param("cpuhp.parallel", parallel_bringup_parse_param); #ifdef CONFIG_HOTPLUG_SMT static inline bool cpuhp_smt_aware(void) { return cpu_smt_max_threads > 1; } static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) { return cpu_primary_thread_mask; } #else static inline bool cpuhp_smt_aware(void) { return false; } static inline const struct cpumask *cpuhp_get_primary_thread_mask(void) { return cpu_none_mask; } #endif bool __weak arch_cpuhp_init_parallel_bringup(void) { return true; } /* * On architectures which have enabled parallel bringup this invokes all BP * prepare states for each of the to be onlined APs first. The last state * sends the startup IPI to the APs. The APs proceed through the low level * bringup code in parallel and then wait for the control CPU to release * them one by one for the final onlining procedure. * * This avoids waiting for each AP to respond to the startup IPI in * CPUHP_BRINGUP_CPU. */ static bool __init cpuhp_bringup_cpus_parallel(unsigned int ncpus) { const struct cpumask *mask = cpu_present_mask; if (__cpuhp_parallel_bringup) __cpuhp_parallel_bringup = arch_cpuhp_init_parallel_bringup(); if (!__cpuhp_parallel_bringup) return false; if (cpuhp_smt_aware()) { const struct cpumask *pmask = cpuhp_get_primary_thread_mask(); static struct cpumask tmp_mask __initdata; /* * X86 requires to prevent that SMT siblings stopped while * the primary thread does a microcode update for various * reasons. Bring the primary threads up first. */ cpumask_and(&tmp_mask, mask, pmask); cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_BP_KICK_AP); cpuhp_bringup_mask(&tmp_mask, ncpus, CPUHP_ONLINE); /* Account for the online CPUs */ ncpus -= num_online_cpus(); if (!ncpus) return true; /* Create the mask for secondary CPUs */ cpumask_andnot(&tmp_mask, mask, pmask); mask = &tmp_mask; } /* Bring the not-yet started CPUs up */ cpuhp_bringup_mask(mask, ncpus, CPUHP_BP_KICK_AP); cpuhp_bringup_mask(mask, ncpus, CPUHP_ONLINE); return true; } #else static inline bool cpuhp_bringup_cpus_parallel(unsigned int ncpus) { return false; } #endif /* CONFIG_HOTPLUG_PARALLEL */ void __init bringup_nonboot_cpus(unsigned int max_cpus) { if (!max_cpus) return; /* Try parallel bringup optimization if enabled */ if (cpuhp_bringup_cpus_parallel(max_cpus)) return; /* Full per CPU serialized bringup */ cpuhp_bringup_mask(cpu_present_mask, max_cpus, CPUHP_ONLINE); } #ifdef CONFIG_PM_SLEEP_SMP static cpumask_var_t frozen_cpus; int freeze_secondary_cpus(int primary) { int cpu, error = 0; cpu_maps_update_begin(); if (primary == -1) { primary = cpumask_first(cpu_online_mask); if (!housekeeping_cpu(primary, HK_TYPE_TIMER)) primary = housekeeping_any_cpu(HK_TYPE_TIMER); } else { if (!cpu_online(primary)) primary = cpumask_first(cpu_online_mask); } /* * We take down all of the non-boot CPUs in one shot to avoid races * with the userspace trying to use the CPU hotplug at the same time */ cpumask_clear(frozen_cpus); pr_info("Disabling non-boot CPUs ...\n"); for (cpu = nr_cpu_ids - 1; cpu >= 0; cpu--) { if (!cpu_online(cpu) || cpu == primary) continue; if (pm_wakeup_pending()) { pr_info("Wakeup pending. Abort CPU freeze\n"); error = -EBUSY; break; } trace_suspend_resume(TPS("CPU_OFF"), cpu, true); error = _cpu_down(cpu, 1, CPUHP_OFFLINE); trace_suspend_resume(TPS("CPU_OFF"), cpu, false); if (!error) cpumask_set_cpu(cpu, frozen_cpus); else { pr_err("Error taking CPU%d down: %d\n", cpu, error); break; } } if (!error) BUG_ON(num_online_cpus() > 1); else pr_err("Non-boot CPUs are not disabled\n"); /* * Make sure the CPUs won't be enabled by someone else. We need to do * this even in case of failure as all freeze_secondary_cpus() users are * supposed to do thaw_secondary_cpus() on the failure path. */ cpu_hotplug_disabled++; cpu_maps_update_done(); return error; } void __weak arch_thaw_secondary_cpus_begin(void) { } void __weak arch_thaw_secondary_cpus_end(void) { } void thaw_secondary_cpus(void) { int cpu, error; /* Allow everyone to use the CPU hotplug again */ cpu_maps_update_begin(); __cpu_hotplug_enable(); if (cpumask_empty(frozen_cpus)) goto out; pr_info("Enabling non-boot CPUs ...\n"); arch_thaw_secondary_cpus_begin(); for_each_cpu(cpu, frozen_cpus) { trace_suspend_resume(TPS("CPU_ON"), cpu, true); error = _cpu_up(cpu, 1, CPUHP_ONLINE); trace_suspend_resume(TPS("CPU_ON"), cpu, false); if (!error) { pr_info("CPU%d is up\n", cpu); continue; } pr_warn("Error taking CPU%d up: %d\n", cpu, error); } arch_thaw_secondary_cpus_end(); cpumask_clear(frozen_cpus); out: cpu_maps_update_done(); } static int __init alloc_frozen_cpus(void) { if (!alloc_cpumask_var(&frozen_cpus, GFP_KERNEL|__GFP_ZERO)) return -ENOMEM; return 0; } core_initcall(alloc_frozen_cpus); /* * When callbacks for CPU hotplug notifications are being executed, we must * ensure that the state of the system with respect to the tasks being frozen * or not, as reported by the notification, remains unchanged *throughout the * duration* of the execution of the callbacks. * Hence we need to prevent the freezer from racing with regular CPU hotplug. * * This synchronization is implemented by mutually excluding regular CPU * hotplug and Suspend/Hibernate call paths by hooking onto the Suspend/ * Hibernate notifications. */ static int cpu_hotplug_pm_callback(struct notifier_block *nb, unsigned long action, void *ptr) { switch (action) { case PM_SUSPEND_PREPARE: case PM_HIBERNATION_PREPARE: cpu_hotplug_disable(); break; case PM_POST_SUSPEND: case PM_POST_HIBERNATION: cpu_hotplug_enable(); break; default: return NOTIFY_DONE; } return NOTIFY_OK; } static int __init cpu_hotplug_pm_sync_init(void) { /* * cpu_hotplug_pm_callback has higher priority than x86 * bsp_pm_callback which depends on cpu_hotplug_pm_callback * to disable cpu hotplug to avoid cpu hotplug race. */ pm_notifier(cpu_hotplug_pm_callback, 0); return 0; } core_initcall(cpu_hotplug_pm_sync_init); #endif /* CONFIG_PM_SLEEP_SMP */ int __boot_cpu_id; #endif /* CONFIG_SMP */ /* Boot processor state steps */ static struct cpuhp_step cpuhp_hp_states[] = { [CPUHP_OFFLINE] = { .name = "offline", .startup.single = NULL, .teardown.single = NULL, }, #ifdef CONFIG_SMP [CPUHP_CREATE_THREADS]= { .name = "threads:prepare", .startup.single = smpboot_create_threads, .teardown.single = NULL, .cant_stop = true, }, [CPUHP_RANDOM_PREPARE] = { .name = "random:prepare", .startup.single = random_prepare_cpu, .teardown.single = NULL, }, [CPUHP_WORKQUEUE_PREP] = { .name = "workqueue:prepare", .startup.single = workqueue_prepare_cpu, .teardown.single = NULL, }, [CPUHP_HRTIMERS_PREPARE] = { .name = "hrtimers:prepare", .startup.single = hrtimers_prepare_cpu, .teardown.single = NULL, }, [CPUHP_SMPCFD_PREPARE] = { .name = "smpcfd:prepare", .startup.single = smpcfd_prepare_cpu, .teardown.single = smpcfd_dead_cpu, }, [CPUHP_RELAY_PREPARE] = { .name = "relay:prepare", .startup.single = relay_prepare_cpu, .teardown.single = NULL, }, [CPUHP_RCUTREE_PREP] = { .name = "RCU/tree:prepare", .startup.single = rcutree_prepare_cpu, .teardown.single = rcutree_dead_cpu, }, /* * On the tear-down path, timers_dead_cpu() must be invoked * before blk_mq_queue_reinit_notify() from notify_dead(), * otherwise a RCU stall occurs. */ [CPUHP_TIMERS_PREPARE] = { .name = "timers:prepare", .startup.single = timers_prepare_cpu, .teardown.single = timers_dead_cpu, }, #ifdef CONFIG_HOTPLUG_SPLIT_STARTUP /* * Kicks the AP alive. AP will wait in cpuhp_ap_sync_alive() until * the next step will release it. */ [CPUHP_BP_KICK_AP] = { .name = "cpu:kick_ap", .startup.single = cpuhp_kick_ap_alive, }, /* * Waits for the AP to reach cpuhp_ap_sync_alive() and then * releases it for the complete bringup. */ [CPUHP_BRINGUP_CPU] = { .name = "cpu:bringup", .startup.single = cpuhp_bringup_ap, .teardown.single = finish_cpu, .cant_stop = true, }, #else /* * All-in-one CPU bringup state which includes the kick alive. */ [CPUHP_BRINGUP_CPU] = { .name = "cpu:bringup", .startup.single = bringup_cpu, .teardown.single = finish_cpu, .cant_stop = true, }, #endif /* Final state before CPU kills itself */ [CPUHP_AP_IDLE_DEAD] = { .name = "idle:dead", }, /* * Last state before CPU enters the idle loop to die. Transient state * for synchronization. */ [CPUHP_AP_OFFLINE] = { .name = "ap:offline", .cant_stop = true, }, /* First state is scheduler control. Interrupts are disabled */ [CPUHP_AP_SCHED_STARTING] = { .name = "sched:starting", .startup.single = sched_cpu_starting, .teardown.single = sched_cpu_dying, }, [CPUHP_AP_RCUTREE_DYING] = { .name = "RCU/tree:dying", .startup.single = NULL, .teardown.single = rcutree_dying_cpu, }, [CPUHP_AP_SMPCFD_DYING] = { .name = "smpcfd:dying", .startup.single = NULL, .teardown.single = smpcfd_dying_cpu, }, [CPUHP_AP_HRTIMERS_DYING] = { .name = "hrtimers:dying", .startup.single = hrtimers_cpu_starting, .teardown.single = hrtimers_cpu_dying, }, [CPUHP_AP_TICK_DYING] = { .name = "tick:dying", .startup.single = NULL, .teardown.single = tick_cpu_dying, }, /* Entry state on starting. Interrupts enabled from here on. Transient * state for synchronsization */ [CPUHP_AP_ONLINE] = { .name = "ap:online", }, /* * Handled on control processor until the plugged processor manages * this itself. */ [CPUHP_TEARDOWN_CPU] = { .name = "cpu:teardown", .startup.single = NULL, .teardown.single = takedown_cpu, .cant_stop = true, }, [CPUHP_AP_SCHED_WAIT_EMPTY] = { .name = "sched:waitempty", .startup.single = NULL, .teardown.single = sched_cpu_wait_empty, }, /* Handle smpboot threads park/unpark */ [CPUHP_AP_SMPBOOT_THREADS] = { .name = "smpboot/threads:online", .startup.single = smpboot_unpark_threads, .teardown.single = smpboot_park_threads, }, [CPUHP_AP_IRQ_AFFINITY_ONLINE] = { .name = "irq/affinity:online", .startup.single = irq_affinity_online_cpu, .teardown.single = NULL, }, [CPUHP_AP_PERF_ONLINE] = { .name = "perf:online", .startup.single = perf_event_init_cpu, .teardown.single = perf_event_exit_cpu, }, [CPUHP_AP_WATCHDOG_ONLINE] = { .name = "lockup_detector:online", .startup.single = lockup_detector_online_cpu, .teardown.single = lockup_detector_offline_cpu, }, [CPUHP_AP_WORKQUEUE_ONLINE] = { .name = "workqueue:online", .startup.single = workqueue_online_cpu, .teardown.single = workqueue_offline_cpu, }, [CPUHP_AP_RANDOM_ONLINE] = { .name = "random:online", .startup.single = random_online_cpu, .teardown.single = NULL, }, [CPUHP_AP_RCUTREE_ONLINE] = { .name = "RCU/tree:online", .startup.single = rcutree_online_cpu, .teardown.single = rcutree_offline_cpu, }, #endif /* * The dynamically registered state space is here */ #ifdef CONFIG_SMP /* Last state is scheduler control setting the cpu active */ [CPUHP_AP_ACTIVE] = { .name = "sched:active", .startup.single = sched_cpu_activate, .teardown.single = sched_cpu_deactivate, }, #endif /* CPU is fully up and running. */ [CPUHP_ONLINE] = { .name = "online", .startup.single = NULL, .teardown.single = NULL, }, }; /* Sanity check for callbacks */ static int cpuhp_cb_check(enum cpuhp_state state) { if (state <= CPUHP_OFFLINE || state >= CPUHP_ONLINE) return -EINVAL; return 0; } /* * Returns a free for dynamic slot assignment of the Online state. The states * are protected by the cpuhp_slot_states mutex and an empty slot is identified * by having no name assigned. */ static int cpuhp_reserve_state(enum cpuhp_state state) { enum cpuhp_state i, end; struct cpuhp_step *step; switch (state) { case CPUHP_AP_ONLINE_DYN: step = cpuhp_hp_states + CPUHP_AP_ONLINE_DYN; end = CPUHP_AP_ONLINE_DYN_END; break; case CPUHP_BP_PREPARE_DYN: step = cpuhp_hp_states + CPUHP_BP_PREPARE_DYN; end = CPUHP_BP_PREPARE_DYN_END; break; default: return -EINVAL; } for (i = state; i <= end; i++, step++) { if (!step->name) return i; } WARN(1, "No more dynamic states available for CPU hotplug\n"); return -ENOSPC; } static int cpuhp_store_callbacks(enum cpuhp_state state, const char *name, int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu), bool multi_instance) { /* (Un)Install the callbacks for further cpu hotplug operations */ struct cpuhp_step *sp; int ret = 0; /* * If name is NULL, then the state gets removed. * * CPUHP_AP_ONLINE_DYN and CPUHP_BP_PREPARE_DYN are handed out on * the first allocation from these dynamic ranges, so the removal * would trigger a new allocation and clear the wrong (already * empty) state, leaving the callbacks of the to be cleared state * dangling, which causes wreckage on the next hotplug operation. */ if (name && (state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN)) { ret = cpuhp_reserve_state(state); if (ret < 0) return ret; state = ret; } sp = cpuhp_get_step(state); if (name && sp->name) return -EBUSY; sp->startup.single = startup; sp->teardown.single = teardown; sp->name = name; sp->multi_instance = multi_instance; INIT_HLIST_HEAD(&sp->list); return ret; } static void *cpuhp_get_teardown_cb(enum cpuhp_state state) { return cpuhp_get_step(state)->teardown.single; } /* * Call the startup/teardown function for a step either on the AP or * on the current CPU. */ static int cpuhp_issue_call(int cpu, enum cpuhp_state state, bool bringup, struct hlist_node *node) { struct cpuhp_step *sp = cpuhp_get_step(state); int ret; /* * If there's nothing to do, we done. * Relies on the union for multi_instance. */ if (cpuhp_step_empty(bringup, sp)) return 0; /* * The non AP bound callbacks can fail on bringup. On teardown * e.g. module removal we crash for now. */ #ifdef CONFIG_SMP if (cpuhp_is_ap_state(state)) ret = cpuhp_invoke_ap_callback(cpu, state, bringup, node); else ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL); #else ret = cpuhp_invoke_callback(cpu, state, bringup, node, NULL); #endif BUG_ON(ret && !bringup); return ret; } /* * Called from __cpuhp_setup_state on a recoverable failure. * * Note: The teardown callbacks for rollback are not allowed to fail! */ static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state, struct hlist_node *node) { int cpu; /* Roll back the already executed steps on the other cpus */ for_each_present_cpu(cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int cpustate = st->state; if (cpu >= failedcpu) break; /* Did we invoke the startup call on that cpu ? */ if (cpustate >= state) cpuhp_issue_call(cpu, state, false, node); } } int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state, struct hlist_node *node, bool invoke) { struct cpuhp_step *sp; int cpu; int ret; lockdep_assert_cpus_held(); sp = cpuhp_get_step(state); if (sp->multi_instance == false) return -EINVAL; mutex_lock(&cpuhp_state_mutex); if (!invoke || !sp->startup.multi) goto add_node; /* * Try to call the startup callback for each present cpu * depending on the hotplug state of the cpu. */ for_each_present_cpu(cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int cpustate = st->state; if (cpustate < state) continue; ret = cpuhp_issue_call(cpu, state, true, node); if (ret) { if (sp->teardown.multi) cpuhp_rollback_install(cpu, state, node); goto unlock; } } add_node: ret = 0; hlist_add_head(node, &sp->list); unlock: mutex_unlock(&cpuhp_state_mutex); return ret; } int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke) { int ret; cpus_read_lock(); ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke); cpus_read_unlock(); return ret; } EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance); /** * __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state * @state: The state to setup * @name: Name of the step * @invoke: If true, the startup function is invoked for cpus where * cpu state >= @state * @startup: startup callback function * @teardown: teardown callback function * @multi_instance: State is set up for multiple instances which get * added afterwards. * * The caller needs to hold cpus read locked while calling this function. * Return: * On success: * Positive state number if @state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN; * 0 for all other states * On failure: proper (negative) error code */ int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name, bool invoke, int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu), bool multi_instance) { int cpu, ret = 0; bool dynstate; lockdep_assert_cpus_held(); if (cpuhp_cb_check(state) || !name) return -EINVAL; mutex_lock(&cpuhp_state_mutex); ret = cpuhp_store_callbacks(state, name, startup, teardown, multi_instance); dynstate = state == CPUHP_AP_ONLINE_DYN || state == CPUHP_BP_PREPARE_DYN; if (ret > 0 && dynstate) { state = ret; ret = 0; } if (ret || !invoke || !startup) goto out; /* * Try to call the startup callback for each present cpu * depending on the hotplug state of the cpu. */ for_each_present_cpu(cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int cpustate = st->state; if (cpustate < state) continue; ret = cpuhp_issue_call(cpu, state, true, NULL); if (ret) { if (teardown) cpuhp_rollback_install(cpu, state, NULL); cpuhp_store_callbacks(state, NULL, NULL, NULL, false); goto out; } } out: mutex_unlock(&cpuhp_state_mutex); /* * If the requested state is CPUHP_AP_ONLINE_DYN or CPUHP_BP_PREPARE_DYN, * return the dynamically allocated state in case of success. */ if (!ret && dynstate) return state; return ret; } EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked); int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke, int (*startup)(unsigned int cpu), int (*teardown)(unsigned int cpu), bool multi_instance) { int ret; cpus_read_lock(); ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup, teardown, multi_instance); cpus_read_unlock(); return ret; } EXPORT_SYMBOL(__cpuhp_setup_state); int __cpuhp_state_remove_instance(enum cpuhp_state state, struct hlist_node *node, bool invoke) { struct cpuhp_step *sp = cpuhp_get_step(state); int cpu; BUG_ON(cpuhp_cb_check(state)); if (!sp->multi_instance) return -EINVAL; cpus_read_lock(); mutex_lock(&cpuhp_state_mutex); if (!invoke || !cpuhp_get_teardown_cb(state)) goto remove; /* * Call the teardown callback for each present cpu depending * on the hotplug state of the cpu. This function is not * allowed to fail currently! */ for_each_present_cpu(cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int cpustate = st->state; if (cpustate >= state) cpuhp_issue_call(cpu, state, false, node); } remove: hlist_del(node); mutex_unlock(&cpuhp_state_mutex); cpus_read_unlock(); return 0; } EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance); /** * __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state * @state: The state to remove * @invoke: If true, the teardown function is invoked for cpus where * cpu state >= @state * * The caller needs to hold cpus read locked while calling this function. * The teardown callback is currently not allowed to fail. Think * about module removal! */ void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke) { struct cpuhp_step *sp = cpuhp_get_step(state); int cpu; BUG_ON(cpuhp_cb_check(state)); lockdep_assert_cpus_held(); mutex_lock(&cpuhp_state_mutex); if (sp->multi_instance) { WARN(!hlist_empty(&sp->list), "Error: Removing state %d which has instances left.\n", state); goto remove; } if (!invoke || !cpuhp_get_teardown_cb(state)) goto remove; /* * Call the teardown callback for each present cpu depending * on the hotplug state of the cpu. This function is not * allowed to fail currently! */ for_each_present_cpu(cpu) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu); int cpustate = st->state; if (cpustate >= state) cpuhp_issue_call(cpu, state, false, NULL); } remove: cpuhp_store_callbacks(state, NULL, NULL, NULL, false); mutex_unlock(&cpuhp_state_mutex); } EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked); void __cpuhp_remove_state(enum cpuhp_state state, bool invoke) { cpus_read_lock(); __cpuhp_remove_state_cpuslocked(state, invoke); cpus_read_unlock(); } EXPORT_SYMBOL(__cpuhp_remove_state); #ifdef CONFIG_HOTPLUG_SMT static void cpuhp_offline_cpu_device(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); dev->offline = true; /* Tell user space about the state change */ kobject_uevent(&dev->kobj, KOBJ_OFFLINE); } static void cpuhp_online_cpu_device(unsigned int cpu) { struct device *dev = get_cpu_device(cpu); dev->offline = false; /* Tell user space about the state change */ kobject_uevent(&dev->kobj, KOBJ_ONLINE); } int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { int cpu, ret = 0; cpu_maps_update_begin(); for_each_online_cpu(cpu) { if (topology_is_primary_thread(cpu)) continue; /* * Disable can be called with CPU_SMT_ENABLED when changing * from a higher to lower number of SMT threads per core. */ if (ctrlval == CPU_SMT_ENABLED && cpu_smt_thread_allowed(cpu)) continue; ret = cpu_down_maps_locked(cpu, CPUHP_OFFLINE); if (ret) break; /* * As this needs to hold the cpu maps lock it's impossible * to call device_offline() because that ends up calling * cpu_down() which takes cpu maps lock. cpu maps lock * needs to be held as this might race against in kernel * abusers of the hotplug machinery (thermal management). * * So nothing would update device:offline state. That would * leave the sysfs entry stale and prevent onlining after * smt control has been changed to 'off' again. This is * called under the sysfs hotplug lock, so it is properly * serialized against the regular offline usage. */ cpuhp_offline_cpu_device(cpu); } if (!ret) cpu_smt_control = ctrlval; cpu_maps_update_done(); return ret; } /* Check if the core a CPU belongs to is online */ #if !defined(topology_is_core_online) static inline bool topology_is_core_online(unsigned int cpu) { return true; } #endif int cpuhp_smt_enable(void) { int cpu, ret = 0; cpu_maps_update_begin(); cpu_smt_control = CPU_SMT_ENABLED; for_each_present_cpu(cpu) { /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) continue; if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu)) continue; ret = _cpu_up(cpu, 0, CPUHP_ONLINE); if (ret) break; /* See comment in cpuhp_smt_disable() */ cpuhp_online_cpu_device(cpu); } cpu_maps_update_done(); return ret; } #endif #if defined(CONFIG_SYSFS) && defined(CONFIG_HOTPLUG_CPU) static ssize_t state_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); return sprintf(buf, "%d\n", st->state); } static DEVICE_ATTR_RO(state); static ssize_t target_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); struct cpuhp_step *sp; int target, ret; ret = kstrtoint(buf, 10, &target); if (ret) return ret; #ifdef CONFIG_CPU_HOTPLUG_STATE_CONTROL if (target < CPUHP_OFFLINE || target > CPUHP_ONLINE) return -EINVAL; #else if (target != CPUHP_OFFLINE && target != CPUHP_ONLINE) return -EINVAL; #endif ret = lock_device_hotplug_sysfs(); if (ret) return ret; mutex_lock(&cpuhp_state_mutex); sp = cpuhp_get_step(target); ret = !sp->name || sp->cant_stop ? -EINVAL : 0; mutex_unlock(&cpuhp_state_mutex); if (ret) goto out; if (st->state < target) ret = cpu_up(dev->id, target); else if (st->state > target) ret = cpu_down(dev->id, target); else if (WARN_ON(st->target != target)) st->target = target; out: unlock_device_hotplug(); return ret ? ret : count; } static ssize_t target_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); return sprintf(buf, "%d\n", st->target); } static DEVICE_ATTR_RW(target); static ssize_t fail_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); struct cpuhp_step *sp; int fail, ret; ret = kstrtoint(buf, 10, &fail); if (ret) return ret; if (fail == CPUHP_INVALID) { st->fail = fail; return count; } if (fail < CPUHP_OFFLINE || fail > CPUHP_ONLINE) return -EINVAL; /* * Cannot fail STARTING/DYING callbacks. */ if (cpuhp_is_atomic_state(fail)) return -EINVAL; /* * DEAD callbacks cannot fail... * ... neither can CPUHP_BRINGUP_CPU during hotunplug. The latter * triggering STARTING callbacks, a failure in this state would * hinder rollback. */ if (fail <= CPUHP_BRINGUP_CPU && st->state > CPUHP_BRINGUP_CPU) return -EINVAL; /* * Cannot fail anything that doesn't have callbacks. */ mutex_lock(&cpuhp_state_mutex); sp = cpuhp_get_step(fail); if (!sp->startup.single && !sp->teardown.single) ret = -EINVAL; mutex_unlock(&cpuhp_state_mutex); if (ret) return ret; st->fail = fail; return count; } static ssize_t fail_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, dev->id); return sprintf(buf, "%d\n", st->fail); } static DEVICE_ATTR_RW(fail); static struct attribute *cpuhp_cpu_attrs[] = { &dev_attr_state.attr, &dev_attr_target.attr, &dev_attr_fail.attr, NULL }; static const struct attribute_group cpuhp_cpu_attr_group = { .attrs = cpuhp_cpu_attrs, .name = "hotplug", }; static ssize_t states_show(struct device *dev, struct device_attribute *attr, char *buf) { ssize_t cur, res = 0; int i; mutex_lock(&cpuhp_state_mutex); for (i = CPUHP_OFFLINE; i <= CPUHP_ONLINE; i++) { struct cpuhp_step *sp = cpuhp_get_step(i); if (sp->name) { cur = sprintf(buf, "%3d: %s\n", i, sp->name); buf += cur; res += cur; } } mutex_unlock(&cpuhp_state_mutex); return res; } static DEVICE_ATTR_RO(states); static struct attribute *cpuhp_cpu_root_attrs[] = { &dev_attr_states.attr, NULL }; static const struct attribute_group cpuhp_cpu_root_attr_group = { .attrs = cpuhp_cpu_root_attrs, .name = "hotplug", }; #ifdef CONFIG_HOTPLUG_SMT static bool cpu_smt_num_threads_valid(unsigned int threads) { if (IS_ENABLED(CONFIG_SMT_NUM_THREADS_DYNAMIC)) return threads >= 1 && threads <= cpu_smt_max_threads; return threads == 1 || threads == cpu_smt_max_threads; } static ssize_t __store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { int ctrlval, ret, num_threads, orig_threads; bool force_off; if (cpu_smt_control == CPU_SMT_FORCE_DISABLED) return -EPERM; if (cpu_smt_control == CPU_SMT_NOT_SUPPORTED) return -ENODEV; if (sysfs_streq(buf, "on")) { ctrlval = CPU_SMT_ENABLED; num_threads = cpu_smt_max_threads; } else if (sysfs_streq(buf, "off")) { ctrlval = CPU_SMT_DISABLED; num_threads = 1; } else if (sysfs_streq(buf, "forceoff")) { ctrlval = CPU_SMT_FORCE_DISABLED; num_threads = 1; } else if (kstrtoint(buf, 10, &num_threads) == 0) { if (num_threads == 1) ctrlval = CPU_SMT_DISABLED; else if (cpu_smt_num_threads_valid(num_threads)) ctrlval = CPU_SMT_ENABLED; else return -EINVAL; } else { return -EINVAL; } ret = lock_device_hotplug_sysfs(); if (ret) return ret; orig_threads = cpu_smt_num_threads; cpu_smt_num_threads = num_threads; force_off = ctrlval != cpu_smt_control && ctrlval == CPU_SMT_FORCE_DISABLED; if (num_threads > orig_threads) ret = cpuhp_smt_enable(); else if (num_threads < orig_threads || force_off) ret = cpuhp_smt_disable(ctrlval); unlock_device_hotplug(); return ret ? ret : count; } #else /* !CONFIG_HOTPLUG_SMT */ static ssize_t __store_smt_control(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { return -ENODEV; } #endif /* CONFIG_HOTPLUG_SMT */ static const char *smt_states[] = { [CPU_SMT_ENABLED] = "on", [CPU_SMT_DISABLED] = "off", [CPU_SMT_FORCE_DISABLED] = "forceoff", [CPU_SMT_NOT_SUPPORTED] = "notsupported", [CPU_SMT_NOT_IMPLEMENTED] = "notimplemented", }; static ssize_t control_show(struct device *dev, struct device_attribute *attr, char *buf) { const char *state = smt_states[cpu_smt_control]; #ifdef CONFIG_HOTPLUG_SMT /* * If SMT is enabled but not all threads are enabled then show the * number of threads. If all threads are enabled show "on". Otherwise * show the state name. */ if (cpu_smt_control == CPU_SMT_ENABLED && cpu_smt_num_threads != cpu_smt_max_threads) return sysfs_emit(buf, "%d\n", cpu_smt_num_threads); #endif return sysfs_emit(buf, "%s\n", state); } static ssize_t control_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { return __store_smt_control(dev, attr, buf, count); } static DEVICE_ATTR_RW(control); static ssize_t active_show(struct device *dev, struct device_attribute *attr, char *buf) { return sysfs_emit(buf, "%d\n", sched_smt_active()); } static DEVICE_ATTR_RO(active); static struct attribute *cpuhp_smt_attrs[] = { &dev_attr_control.attr, &dev_attr_active.attr, NULL }; static const struct attribute_group cpuhp_smt_attr_group = { .attrs = cpuhp_smt_attrs, .name = "smt", }; static int __init cpu_smt_sysfs_init(void) { struct device *dev_root; int ret = -ENODEV; dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { ret = sysfs_create_group(&dev_root->kobj, &cpuhp_smt_attr_group); put_device(dev_root); } return ret; } static int __init cpuhp_sysfs_init(void) { struct device *dev_root; int cpu, ret; ret = cpu_smt_sysfs_init(); if (ret) return ret; dev_root = bus_get_dev_root(&cpu_subsys); if (dev_root) { ret = sysfs_create_group(&dev_root->kobj, &cpuhp_cpu_root_attr_group); put_device(dev_root); if (ret) return ret; } for_each_possible_cpu(cpu) { struct device *dev = get_cpu_device(cpu); if (!dev) continue; ret = sysfs_create_group(&dev->kobj, &cpuhp_cpu_attr_group); if (ret) return ret; } return 0; } device_initcall(cpuhp_sysfs_init); #endif /* CONFIG_SYSFS && CONFIG_HOTPLUG_CPU */ /* * cpu_bit_bitmap[] is a special, "compressed" data structure that * represents all NR_CPUS bits binary values of 1<<nr. * * It is used by cpumask_of() to get a constant address to a CPU * mask value that has a single bit set only. */ /* cpu_bit_bitmap[0] is empty - so we can back into it */ #define MASK_DECLARE_1(x) [x+1][0] = (1UL << (x)) #define MASK_DECLARE_2(x) MASK_DECLARE_1(x), MASK_DECLARE_1(x+1) #define MASK_DECLARE_4(x) MASK_DECLARE_2(x), MASK_DECLARE_2(x+2) #define MASK_DECLARE_8(x) MASK_DECLARE_4(x), MASK_DECLARE_4(x+4) const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { MASK_DECLARE_8(0), MASK_DECLARE_8(8), MASK_DECLARE_8(16), MASK_DECLARE_8(24), #if BITS_PER_LONG > 32 MASK_DECLARE_8(32), MASK_DECLARE_8(40), MASK_DECLARE_8(48), MASK_DECLARE_8(56), #endif }; EXPORT_SYMBOL_GPL(cpu_bit_bitmap); const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; EXPORT_SYMBOL(cpu_all_bits); #ifdef CONFIG_INIT_ALL_POSSIBLE struct cpumask __cpu_possible_mask __ro_after_init = {CPU_BITS_ALL}; #else struct cpumask __cpu_possible_mask __ro_after_init; #endif EXPORT_SYMBOL(__cpu_possible_mask); struct cpumask __cpu_online_mask __read_mostly; EXPORT_SYMBOL(__cpu_online_mask); struct cpumask __cpu_enabled_mask __read_mostly; EXPORT_SYMBOL(__cpu_enabled_mask); struct cpumask __cpu_present_mask __read_mostly; EXPORT_SYMBOL(__cpu_present_mask); struct cpumask __cpu_active_mask __read_mostly; EXPORT_SYMBOL(__cpu_active_mask); struct cpumask __cpu_dying_mask __read_mostly; EXPORT_SYMBOL(__cpu_dying_mask); atomic_t __num_online_cpus __read_mostly; EXPORT_SYMBOL(__num_online_cpus); void init_cpu_present(const struct cpumask *src) { cpumask_copy(&__cpu_present_mask, src); } void init_cpu_possible(const struct cpumask *src) { cpumask_copy(&__cpu_possible_mask, src); } void set_cpu_online(unsigned int cpu, bool online) { /* * atomic_inc/dec() is required to handle the horrid abuse of this * function by the reboot and kexec code which invoke it from * IPI/NMI broadcasts when shutting down CPUs. Invocation from * regular CPU hotplug is properly serialized. * * Note, that the fact that __num_online_cpus is of type atomic_t * does not protect readers which are not serialized against * concurrent hotplug operations. */ if (online) { if (!cpumask_test_and_set_cpu(cpu, &__cpu_online_mask)) atomic_inc(&__num_online_cpus); } else { if (cpumask_test_and_clear_cpu(cpu, &__cpu_online_mask)) atomic_dec(&__num_online_cpus); } } /* * Activate the first processor. */ void __init boot_cpu_init(void) { int cpu = smp_processor_id(); /* Mark the boot cpu "present", "online" etc for SMP and UP case */ set_cpu_online(cpu, true); set_cpu_active(cpu, true); set_cpu_present(cpu, true); set_cpu_possible(cpu, true); #ifdef CONFIG_SMP __boot_cpu_id = cpu; #endif } /* * Must be called _AFTER_ setting up the per_cpu areas */ void __init boot_cpu_hotplug_init(void) { #ifdef CONFIG_SMP cpumask_set_cpu(smp_processor_id(), &cpus_booted_once_mask); atomic_set(this_cpu_ptr(&cpuhp_state.ap_sync_state), SYNC_STATE_ONLINE); #endif this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); this_cpu_write(cpuhp_state.target, CPUHP_ONLINE); } #ifdef CONFIG_CPU_MITIGATIONS /* * All except the cross-thread attack vector are mitigated by default. * Cross-thread mitigation often requires disabling SMT which is expensive * so cross-thread mitigations are only partially enabled by default. * * Guest-to-Host and Guest-to-Guest vectors are only needed if KVM support is * present. */ static bool attack_vectors[NR_CPU_ATTACK_VECTORS] __ro_after_init = { [CPU_MITIGATE_USER_KERNEL] = true, [CPU_MITIGATE_USER_USER] = true, [CPU_MITIGATE_GUEST_HOST] = IS_ENABLED(CONFIG_KVM), [CPU_MITIGATE_GUEST_GUEST] = IS_ENABLED(CONFIG_KVM), }; bool cpu_attack_vector_mitigated(enum cpu_attack_vectors v) { if (v < NR_CPU_ATTACK_VECTORS) return attack_vectors[v]; WARN_ONCE(1, "Invalid attack vector %d\n", v); return false; } /* * There are 3 global options, 'off', 'auto', 'auto,nosmt'. These may optionally * be combined with attack-vector disables which follow them. * * Examples: * mitigations=auto,no_user_kernel,no_user_user,no_cross_thread * mitigations=auto,nosmt,no_guest_host,no_guest_guest * * mitigations=off is equivalent to disabling all attack vectors. */ enum cpu_mitigations { CPU_MITIGATIONS_OFF, CPU_MITIGATIONS_AUTO, CPU_MITIGATIONS_AUTO_NOSMT, }; enum { NO_USER_KERNEL, NO_USER_USER, NO_GUEST_HOST, NO_GUEST_GUEST, NO_CROSS_THREAD, NR_VECTOR_PARAMS, }; enum smt_mitigations smt_mitigations __ro_after_init = SMT_MITIGATIONS_AUTO; static enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; static const match_table_t global_mitigations = { { CPU_MITIGATIONS_AUTO_NOSMT, "auto,nosmt"}, { CPU_MITIGATIONS_AUTO, "auto"}, { CPU_MITIGATIONS_OFF, "off"}, }; static const match_table_t vector_mitigations = { { NO_USER_KERNEL, "no_user_kernel"}, { NO_USER_USER, "no_user_user"}, { NO_GUEST_HOST, "no_guest_host"}, { NO_GUEST_GUEST, "no_guest_guest"}, { NO_CROSS_THREAD, "no_cross_thread"}, { NR_VECTOR_PARAMS, NULL}, }; static int __init mitigations_parse_global_opt(char *arg) { int i; for (i = 0; i < ARRAY_SIZE(global_mitigations); i++) { const char *pattern = global_mitigations[i].pattern; if (!strncmp(arg, pattern, strlen(pattern))) { cpu_mitigations = global_mitigations[i].token; return strlen(pattern); } } return 0; } static int __init mitigations_parse_cmdline(char *arg) { char *s, *p; int len; len = mitigations_parse_global_opt(arg); if (cpu_mitigations_off()) { memset(attack_vectors, 0, sizeof(attack_vectors)); smt_mitigations = SMT_MITIGATIONS_OFF; } else if (cpu_mitigations_auto_nosmt()) { smt_mitigations = SMT_MITIGATIONS_ON; } p = arg + len; if (!*p) return 0; /* Attack vector controls may come after the ',' */ if (*p++ != ',' || !IS_ENABLED(CONFIG_ARCH_HAS_CPU_ATTACK_VECTORS)) { pr_crit("Unsupported mitigations=%s, system may still be vulnerable\n", arg); return 0; } while ((s = strsep(&p, ",")) != NULL) { switch (match_token(s, vector_mitigations, NULL)) { case NO_USER_KERNEL: attack_vectors[CPU_MITIGATE_USER_KERNEL] = false; break; case NO_USER_USER: attack_vectors[CPU_MITIGATE_USER_USER] = false; break; case NO_GUEST_HOST: attack_vectors[CPU_MITIGATE_GUEST_HOST] = false; break; case NO_GUEST_GUEST: attack_vectors[CPU_MITIGATE_GUEST_GUEST] = false; break; case NO_CROSS_THREAD: smt_mitigations = SMT_MITIGATIONS_OFF; break; default: pr_crit("Unsupported mitigations options %s\n", s); return 0; } } return 0; } /* mitigations=off */ bool cpu_mitigations_off(void) { return cpu_mitigations == CPU_MITIGATIONS_OFF; } EXPORT_SYMBOL_GPL(cpu_mitigations_off); /* mitigations=auto,nosmt */ bool cpu_mitigations_auto_nosmt(void) { return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; } EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); #else static int __init mitigations_parse_cmdline(char *arg) { pr_crit("Kernel compiled without mitigations, ignoring 'mitigations'; system may still be vulnerable\n"); return 0; } #endif early_param("mitigations", mitigations_parse_cmdline);
800 800 797 84 86 84 83 83 84 83 84 84 84 84 87 99 2 2 24 2 24 86 24 24 86 23 23 23 84 84 84 84 84 84 84 84 22 22 21 21 21 21 21 21 17 21 16 21 21 22 21 84 84 84 84 84 84 83 84 84 5 100 99 72 31 99 99 88 88 88 5 5 5 82 81 85 85 85 85 84 84 85 79 85 85 85 85 100 87 87 31 87 31 28 3 31 31 1 31 99 2 82 82 1 81 82 82 82 82 82 2 80 82 68 68 68 65 64 65 65 65 65 68 68 68 68 68 82 82 82 82 68 79 79 3 78 79 104 4 100 1 100 21 86 86 84 84 84 82 82 76 76 11 82 84 97 100 91 92 72 36 2 2 1 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/exec.c * * Copyright (C) 1991, 1992 Linus Torvalds */ /* * #!-checking implemented by tytso. */ /* * Demand-loading implemented 01.12.91 - no need to read anything but * the header into memory. The inode of the executable is put into * "current->executable", and page faults do the actual loading. Clean. * * Once more I can proudly say that linux stood up to being changed: it * was less than 2 hours work to get demand-loading completely implemented. * * Demand loading changed July 1993 by Eric Youngdale. Use mmap instead, * current->executable is only used by the procfs. This allows a dispatch * table to check for several different types of binary formats. We keep * trying until we recognize the file or we run out of supported binary * formats. */ #include <linux/kernel_read_file.h> #include <linux/slab.h> #include <linux/file.h> #include <linux/fdtable.h> #include <linux/mm.h> #include <linux/stat.h> #include <linux/fcntl.h> #include <linux/swap.h> #include <linux/string.h> #include <linux/init.h> #include <linux/sched/mm.h> #include <linux/sched/coredump.h> #include <linux/sched/signal.h> #include <linux/sched/numa_balancing.h> #include <linux/sched/task.h> #include <linux/pagemap.h> #include <linux/perf_event.h> #include <linux/highmem.h> #include <linux/spinlock.h> #include <linux/key.h> #include <linux/personality.h> #include <linux/binfmts.h> #include <linux/utsname.h> #include <linux/pid_namespace.h> #include <linux/module.h> #include <linux/namei.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/syscalls.h> #include <linux/tsacct_kern.h> #include <linux/cn_proc.h> #include <linux/audit.h> #include <linux/kmod.h> #include <linux/fsnotify.h> #include <linux/fs_struct.h> #include <linux/oom.h> #include <linux/compat.h> #include <linux/vmalloc.h> #include <linux/io_uring.h> #include <linux/syscall_user_dispatch.h> #include <linux/coredump.h> #include <linux/time_namespace.h> #include <linux/user_events.h> #include <linux/rseq.h> #include <linux/ksm.h> #include <linux/uaccess.h> #include <asm/mmu_context.h> #include <asm/tlb.h> #include <trace/events/task.h> #include "internal.h" #include <trace/events/sched.h> /* For vma exec functions. */ #include "../mm/internal.h" static int bprm_creds_from_file(struct linux_binprm *bprm); int suid_dumpable = 0; static LIST_HEAD(formats); static DEFINE_RWLOCK(binfmt_lock); void __register_binfmt(struct linux_binfmt * fmt, int insert) { write_lock(&binfmt_lock); insert ? list_add(&fmt->lh, &formats) : list_add_tail(&fmt->lh, &formats); write_unlock(&binfmt_lock); } EXPORT_SYMBOL(__register_binfmt); void unregister_binfmt(struct linux_binfmt * fmt) { write_lock(&binfmt_lock); list_del(&fmt->lh); write_unlock(&binfmt_lock); } EXPORT_SYMBOL(unregister_binfmt); static inline void put_binfmt(struct linux_binfmt * fmt) { module_put(fmt->module); } bool path_noexec(const struct path *path) { /* If it's an anonymous inode make sure that we catch any shenanigans. */ VFS_WARN_ON_ONCE(IS_ANON_FILE(d_inode(path->dentry)) && !(path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC)); return (path->mnt->mnt_flags & MNT_NOEXEC) || (path->mnt->mnt_sb->s_iflags & SB_I_NOEXEC); } #ifdef CONFIG_MMU /* * The nascent bprm->mm is not visible until exec_mmap() but it can * use a lot of memory, account these pages in current->mm temporary * for oom_badness()->get_mm_rss(). Once exec succeeds or fails, we * change the counter back via acct_arg_size(0). */ static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { struct mm_struct *mm = current->mm; long diff = (long)(pages - bprm->vma_pages); if (!mm || !diff) return; bprm->vma_pages = pages; add_mm_counter(mm, MM_ANONPAGES, diff); } static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { struct page *page; struct vm_area_struct *vma = bprm->vma; struct mm_struct *mm = bprm->mm; int ret; /* * Avoid relying on expanding the stack down in GUP (which * does not work for STACK_GROWSUP anyway), and just do it * ahead of time. */ if (!mmap_read_lock_maybe_expand(mm, vma, pos, write)) return NULL; /* * We are doing an exec(). 'current' is the process * doing the exec and 'mm' is the new process's mm. */ ret = get_user_pages_remote(mm, pos, 1, write ? FOLL_WRITE : 0, &page, NULL); mmap_read_unlock(mm); if (ret <= 0) return NULL; if (write) acct_arg_size(bprm, vma_pages(vma)); return page; } static void put_arg_page(struct page *page) { put_page(page); } static void free_arg_pages(struct linux_binprm *bprm) { } static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, struct page *page) { flush_cache_page(bprm->vma, pos, page_to_pfn(page)); } static bool valid_arg_len(struct linux_binprm *bprm, long len) { return len <= MAX_ARG_STRLEN; } #else static inline void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) { } static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, int write) { struct page *page; page = bprm->page[pos / PAGE_SIZE]; if (!page && write) { page = alloc_page(GFP_HIGHUSER|__GFP_ZERO); if (!page) return NULL; bprm->page[pos / PAGE_SIZE] = page; } return page; } static void put_arg_page(struct page *page) { } static void free_arg_page(struct linux_binprm *bprm, int i) { if (bprm->page[i]) { __free_page(bprm->page[i]); bprm->page[i] = NULL; } } static void free_arg_pages(struct linux_binprm *bprm) { int i; for (i = 0; i < MAX_ARG_PAGES; i++) free_arg_page(bprm, i); } static void flush_arg_page(struct linux_binprm *bprm, unsigned long pos, struct page *page) { } static bool valid_arg_len(struct linux_binprm *bprm, long len) { return len <= bprm->p; } #endif /* CONFIG_MMU */ /* * Create a new mm_struct and populate it with a temporary stack * vm_area_struct. We don't have enough context at this point to set the stack * flags, permissions, and offset, so we use temporary values. We'll update * them later in setup_arg_pages(). */ static int bprm_mm_init(struct linux_binprm *bprm) { int err; struct mm_struct *mm = NULL; bprm->mm = mm = mm_alloc(); err = -ENOMEM; if (!mm) goto err; /* Save current stack limit for all calculations made during exec. */ task_lock(current->group_leader); bprm->rlim_stack = current->signal->rlim[RLIMIT_STACK]; task_unlock(current->group_leader); #ifndef CONFIG_MMU bprm->p = PAGE_SIZE * MAX_ARG_PAGES - sizeof(void *); #else err = create_init_stack_vma(bprm->mm, &bprm->vma, &bprm->p); if (err) goto err; #endif return 0; err: if (mm) { bprm->mm = NULL; mmdrop(mm); } return err; } struct user_arg_ptr { #ifdef CONFIG_COMPAT bool is_compat; #endif union { const char __user *const __user *native; #ifdef CONFIG_COMPAT const compat_uptr_t __user *compat; #endif } ptr; }; static const char __user *get_user_arg_ptr(struct user_arg_ptr argv, int nr) { const char __user *native; #ifdef CONFIG_COMPAT if (unlikely(argv.is_compat)) { compat_uptr_t compat; if (get_user(compat, argv.ptr.compat + nr)) return ERR_PTR(-EFAULT); return compat_ptr(compat); } #endif if (get_user(native, argv.ptr.native + nr)) return ERR_PTR(-EFAULT); return native; } /* * count() counts the number of strings in array ARGV. */ static int count(struct user_arg_ptr argv, int max) { int i = 0; if (argv.ptr.native != NULL) { for (;;) { const char __user *p = get_user_arg_ptr(argv, i); if (!p) break; if (IS_ERR(p)) return -EFAULT; if (i >= max) return -E2BIG; ++i; if (fatal_signal_pending(current)) return -ERESTARTNOHAND; cond_resched(); } } return i; } static int count_strings_kernel(const char *const *argv) { int i; if (!argv) return 0; for (i = 0; argv[i]; ++i) { if (i >= MAX_ARG_STRINGS) return -E2BIG; if (fatal_signal_pending(current)) return -ERESTARTNOHAND; cond_resched(); } return i; } static inline int bprm_set_stack_limit(struct linux_binprm *bprm, unsigned long limit) { #ifdef CONFIG_MMU /* Avoid a pathological bprm->p. */ if (bprm->p < limit) return -E2BIG; bprm->argmin = bprm->p - limit; #endif return 0; } static inline bool bprm_hit_stack_limit(struct linux_binprm *bprm) { #ifdef CONFIG_MMU return bprm->p < bprm->argmin; #else return false; #endif } /* * Calculate bprm->argmin from: * - _STK_LIM * - ARG_MAX * - bprm->rlim_stack.rlim_cur * - bprm->argc * - bprm->envc * - bprm->p */ static int bprm_stack_limits(struct linux_binprm *bprm) { unsigned long limit, ptr_size; /* * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM * (whichever is smaller) for the argv+env strings. * This ensures that: * - the remaining binfmt code will not run out of stack space, * - the program will have a reasonable amount of stack left * to work from. */ limit = _STK_LIM / 4 * 3; limit = min(limit, bprm->rlim_stack.rlim_cur / 4); /* * We've historically supported up to 32 pages (ARG_MAX) * of argument strings even with small stacks */ limit = max_t(unsigned long, limit, ARG_MAX); /* Reject totally pathological counts. */ if (bprm->argc < 0 || bprm->envc < 0) return -E2BIG; /* * We must account for the size of all the argv and envp pointers to * the argv and envp strings, since they will also take up space in * the stack. They aren't stored until much later when we can't * signal to the parent that the child has run out of stack space. * Instead, calculate it here so it's possible to fail gracefully. * * In the case of argc = 0, make sure there is space for adding a * empty string (which will bump argc to 1), to ensure confused * userspace programs don't start processing from argv[1], thinking * argc can never be 0, to keep them from walking envp by accident. * See do_execveat_common(). */ if (check_add_overflow(max(bprm->argc, 1), bprm->envc, &ptr_size) || check_mul_overflow(ptr_size, sizeof(void *), &ptr_size)) return -E2BIG; if (limit <= ptr_size) return -E2BIG; limit -= ptr_size; return bprm_set_stack_limit(bprm, limit); } /* * 'copy_strings()' copies argument/environment strings from the old * processes's memory to the new process's stack. The call to get_user_pages() * ensures the destination page is created and not swapped out. */ static int copy_strings(int argc, struct user_arg_ptr argv, struct linux_binprm *bprm) { struct page *kmapped_page = NULL; char *kaddr = NULL; unsigned long kpos = 0; int ret; while (argc-- > 0) { const char __user *str; int len; unsigned long pos; ret = -EFAULT; str = get_user_arg_ptr(argv, argc); if (IS_ERR(str)) goto out; len = strnlen_user(str, MAX_ARG_STRLEN); if (!len) goto out; ret = -E2BIG; if (!valid_arg_len(bprm, len)) goto out; /* We're going to work our way backwards. */ pos = bprm->p; str += len; bprm->p -= len; if (bprm_hit_stack_limit(bprm)) goto out; while (len > 0) { int offset, bytes_to_copy; if (fatal_signal_pending(current)) { ret = -ERESTARTNOHAND; goto out; } cond_resched(); offset = pos % PAGE_SIZE; if (offset == 0) offset = PAGE_SIZE; bytes_to_copy = offset; if (bytes_to_copy > len) bytes_to_copy = len; offset -= bytes_to_copy; pos -= bytes_to_copy; str -= bytes_to_copy; len -= bytes_to_copy; if (!kmapped_page || kpos != (pos & PAGE_MASK)) { struct page *page; page = get_arg_page(bprm, pos, 1); if (!page) { ret = -E2BIG; goto out; } if (kmapped_page) { flush_dcache_page(kmapped_page); kunmap_local(kaddr); put_arg_page(kmapped_page); } kmapped_page = page; kaddr = kmap_local_page(kmapped_page); kpos = pos & PAGE_MASK; flush_arg_page(bprm, kpos, kmapped_page); } if (copy_from_user(kaddr+offset, str, bytes_to_copy)) { ret = -EFAULT; goto out; } } } ret = 0; out: if (kmapped_page) { flush_dcache_page(kmapped_page); kunmap_local(kaddr); put_arg_page(kmapped_page); } return ret; } /* * Copy and argument/environment string from the kernel to the processes stack. */ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) { int len = strnlen(arg, MAX_ARG_STRLEN) + 1 /* terminating NUL */; unsigned long pos = bprm->p; if (len == 0) return -EFAULT; if (!valid_arg_len(bprm, len)) return -E2BIG; /* We're going to work our way backwards. */ arg += len; bprm->p -= len; if (bprm_hit_stack_limit(bprm)) return -E2BIG; while (len > 0) { unsigned int bytes_to_copy = min_t(unsigned int, len, min_not_zero(offset_in_page(pos), PAGE_SIZE)); struct page *page; pos -= bytes_to_copy; arg -= bytes_to_copy; len -= bytes_to_copy; page = get_arg_page(bprm, pos, 1); if (!page) return -E2BIG; flush_arg_page(bprm, pos & PAGE_MASK, page); memcpy_to_page(page, offset_in_page(pos), arg, bytes_to_copy); put_arg_page(page); } return 0; } EXPORT_SYMBOL(copy_string_kernel); static int copy_strings_kernel(int argc, const char *const *argv, struct linux_binprm *bprm) { while (argc-- > 0) { int ret = copy_string_kernel(argv[argc], bprm); if (ret < 0) return ret; if (fatal_signal_pending(current)) return -ERESTARTNOHAND; cond_resched(); } return 0; } #ifdef CONFIG_MMU /* * Finalizes the stack vm_area_struct. The flags and permissions are updated, * the stack is optionally relocated, and some extra space is added. */ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) { int ret; unsigned long stack_shift; struct mm_struct *mm = current->mm; struct vm_area_struct *vma = bprm->vma; struct vm_area_struct *prev = NULL; vm_flags_t vm_flags; unsigned long stack_base; unsigned long stack_size; unsigned long stack_expand; unsigned long rlim_stack; struct mmu_gather tlb; struct vma_iterator vmi; #ifdef CONFIG_STACK_GROWSUP /* Limit stack size */ stack_base = bprm->rlim_stack.rlim_max; stack_base = calc_max_stack_size(stack_base); /* Add space for stack randomization. */ if (current->flags & PF_RANDOMIZE) stack_base += (STACK_RND_MASK << PAGE_SHIFT); /* Make sure we didn't let the argument array grow too large. */ if (vma->vm_end - vma->vm_start > stack_base) return -ENOMEM; stack_base = PAGE_ALIGN(stack_top - stack_base); stack_shift = vma->vm_start - stack_base; mm->arg_start = bprm->p - stack_shift; bprm->p = vma->vm_end - stack_shift; #else stack_top = arch_align_stack(stack_top); stack_top = PAGE_ALIGN(stack_top); if (unlikely(stack_top < mmap_min_addr) || unlikely(vma->vm_end - vma->vm_start >= stack_top - mmap_min_addr)) return -ENOMEM; stack_shift = vma->vm_end - stack_top; bprm->p -= stack_shift; mm->arg_start = bprm->p; #endif bprm->exec -= stack_shift; if (mmap_write_lock_killable(mm)) return -EINTR; vm_flags = VM_STACK_FLAGS; /* * Adjust stack execute permissions; explicitly enable for * EXSTACK_ENABLE_X, disable for EXSTACK_DISABLE_X and leave alone * (arch default) otherwise. */ if (unlikely(executable_stack == EXSTACK_ENABLE_X)) vm_flags |= VM_EXEC; else if (executable_stack == EXSTACK_DISABLE_X) vm_flags &= ~VM_EXEC; vm_flags |= mm->def_flags; vm_flags |= VM_STACK_INCOMPLETE_SETUP; vma_iter_init(&vmi, mm, vma->vm_start); tlb_gather_mmu(&tlb, mm); ret = mprotect_fixup(&vmi, &tlb, vma, &prev, vma->vm_start, vma->vm_end, vm_flags); tlb_finish_mmu(&tlb); if (ret) goto out_unlock; BUG_ON(prev != vma); if (unlikely(vm_flags & VM_EXEC)) { pr_warn_once("process '%pD4' started with executable stack\n", bprm->file); } /* Move stack pages down in memory. */ if (stack_shift) { /* * During bprm_mm_init(), we create a temporary stack at STACK_TOP_MAX. Once * the binfmt code determines where the new stack should reside, we shift it to * its final location. */ ret = relocate_vma_down(vma, stack_shift); if (ret) goto out_unlock; } /* mprotect_fixup is overkill to remove the temporary stack flags */ vm_flags_clear(vma, VM_STACK_INCOMPLETE_SETUP); stack_expand = 131072UL; /* randomly 32*4k (or 2*64k) pages */ stack_size = vma->vm_end - vma->vm_start; /* * Align this down to a page boundary as expand_stack * will align it up. */ rlim_stack = bprm->rlim_stack.rlim_cur & PAGE_MASK; stack_expand = min(rlim_stack, stack_size + stack_expand); #ifdef CONFIG_STACK_GROWSUP stack_base = vma->vm_start + stack_expand; #else stack_base = vma->vm_end - stack_expand; #endif current->mm->start_stack = bprm->p; ret = expand_stack_locked(vma, stack_base); if (ret) ret = -EFAULT; out_unlock: mmap_write_unlock(mm); return ret; } EXPORT_SYMBOL(setup_arg_pages); #else /* * Transfer the program arguments and environment from the holding pages * onto the stack. The provided stack pointer is adjusted accordingly. */ int transfer_args_to_stack(struct linux_binprm *bprm, unsigned long *sp_location) { unsigned long index, stop, sp; int ret = 0; stop = bprm->p >> PAGE_SHIFT; sp = *sp_location; for (index = MAX_ARG_PAGES - 1; index >= stop; index--) { unsigned int offset = index == stop ? bprm->p & ~PAGE_MASK : 0; char *src = kmap_local_page(bprm->page[index]) + offset; sp -= PAGE_SIZE - offset; if (copy_to_user((void *) sp, src, PAGE_SIZE - offset) != 0) ret = -EFAULT; kunmap_local(src); if (ret) goto out; } bprm->exec += *sp_location - MAX_ARG_PAGES * PAGE_SIZE; *sp_location = sp; out: return ret; } EXPORT_SYMBOL(transfer_args_to_stack); #endif /* CONFIG_MMU */ /* * On success, caller must call do_close_execat() on the returned * struct file to close it. */ static struct file *do_open_execat(int fd, struct filename *name, int flags) { int err; struct file *file __free(fput) = NULL; struct open_flags open_exec_flags = { .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, .acc_mode = MAY_EXEC, .intent = LOOKUP_OPEN, .lookup_flags = LOOKUP_FOLLOW, }; if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_EXECVE_CHECK)) != 0) return ERR_PTR(-EINVAL); if (flags & AT_SYMLINK_NOFOLLOW) open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW; if (flags & AT_EMPTY_PATH) open_exec_flags.lookup_flags |= LOOKUP_EMPTY; file = do_filp_open(fd, name, &open_exec_flags); if (IS_ERR(file)) return file; if (path_noexec(&file->f_path)) return ERR_PTR(-EACCES); /* * In the past the regular type check was here. It moved to may_open() in * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is * an invariant that all non-regular files error out before we get here. */ if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode))) return ERR_PTR(-EACCES); err = exe_file_deny_write_access(file); if (err) return ERR_PTR(err); return no_free_ptr(file); } /** * open_exec - Open a path name for execution * * @name: path name to open with the intent of executing it. * * Returns ERR_PTR on failure or allocated struct file on success. * * As this is a wrapper for the internal do_open_execat(), callers * must call exe_file_allow_write_access() before fput() on release. Also see * do_close_execat(). */ struct file *open_exec(const char *name) { struct filename *filename = getname_kernel(name); struct file *f = ERR_CAST(filename); if (!IS_ERR(filename)) { f = do_open_execat(AT_FDCWD, filename, 0); putname(filename); } return f; } EXPORT_SYMBOL(open_exec); #if defined(CONFIG_BINFMT_FLAT) || defined(CONFIG_BINFMT_ELF_FDPIC) ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len) { ssize_t res = vfs_read(file, (void __user *)addr, len, &pos); if (res > 0) flush_icache_user_range(addr, addr + len); return res; } EXPORT_SYMBOL(read_code); #endif /* * Maps the mm_struct mm into the current task struct. * On success, this function returns with exec_update_lock * held for writing. */ static int exec_mmap(struct mm_struct *mm) { struct task_struct *tsk; struct mm_struct *old_mm, *active_mm; int ret; /* Notify parent that we're no longer interested in the old VM */ tsk = current; old_mm = current->mm; exec_mm_release(tsk, old_mm); ret = down_write_killable(&tsk->signal->exec_update_lock); if (ret) return ret; if (old_mm) { /* * If there is a pending fatal signal perhaps a signal * whose default action is to create a coredump get * out and die instead of going through with the exec. */ ret = mmap_read_lock_killable(old_mm); if (ret) { up_write(&tsk->signal->exec_update_lock); return ret; } } task_lock(tsk); membarrier_exec_mmap(mm); local_irq_disable(); active_mm = tsk->active_mm; tsk->active_mm = mm; tsk->mm = mm; mm_init_cid(mm, tsk); /* * This prevents preemption while active_mm is being loaded and * it and mm are being updated, which could cause problems for * lazy tlb mm refcounting when these are updated by context * switches. Not all architectures can handle irqs off over * activate_mm yet. */ if (!IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); activate_mm(active_mm, mm); if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM)) local_irq_enable(); lru_gen_add_mm(mm); task_unlock(tsk); lru_gen_use_mm(mm); if (old_mm) { mmap_read_unlock(old_mm); BUG_ON(active_mm != old_mm); setmax_mm_hiwater_rss(&tsk->signal->maxrss, old_mm); mm_update_next_owner(old_mm); mmput(old_mm); return 0; } mmdrop_lazy_tlb(active_mm); return 0; } static int de_thread(struct task_struct *tsk) { struct signal_struct *sig = tsk->signal; struct sighand_struct *oldsighand = tsk->sighand; spinlock_t *lock = &oldsighand->siglock; if (thread_group_empty(tsk)) goto no_thread_group; /* * Kill all other threads in the thread group. */ spin_lock_irq(lock); if ((sig->flags & SIGNAL_GROUP_EXIT) || sig->group_exec_task) { /* * Another group action in progress, just * return so that the signal is processed. */ spin_unlock_irq(lock); return -EAGAIN; } sig->group_exec_task = tsk; sig->notify_count = zap_other_threads(tsk); if (!thread_group_leader(tsk)) sig->notify_count--; while (sig->notify_count) { __set_current_state(TASK_KILLABLE); spin_unlock_irq(lock); schedule(); if (__fatal_signal_pending(tsk)) goto killed; spin_lock_irq(lock); } spin_unlock_irq(lock); /* * At this point all other threads have exited, all we have to * do is to wait for the thread group leader to become inactive, * and to assume its PID: */ if (!thread_group_leader(tsk)) { struct task_struct *leader = tsk->group_leader; for (;;) { cgroup_threadgroup_change_begin(tsk); write_lock_irq(&tasklist_lock); /* * Do this under tasklist_lock to ensure that * exit_notify() can't miss ->group_exec_task */ sig->notify_count = -1; if (likely(leader->exit_state)) break; __set_current_state(TASK_KILLABLE); write_unlock_irq(&tasklist_lock); cgroup_threadgroup_change_end(tsk); schedule(); if (__fatal_signal_pending(tsk)) goto killed; } /* * The only record we have of the real-time age of a * process, regardless of execs it's done, is start_time. * All the past CPU time is accumulated in signal_struct * from sister threads now dead. But in this non-leader * exec, nothing survives from the original leader thread, * whose birth marks the true age of this process now. * When we take on its identity by switching to its PID, we * also take its birthdate (always earlier than our own). */ tsk->start_time = leader->start_time; tsk->start_boottime = leader->start_boottime; BUG_ON(!same_thread_group(leader, tsk)); /* * An exec() starts a new thread group with the * TGID of the previous thread group. Rehash the * two threads with a switched PID, and release * the former thread group leader: */ /* Become a process group leader with the old leader's pid. * The old leader becomes a thread of the this thread group. */ exchange_tids(tsk, leader); transfer_pid(leader, tsk, PIDTYPE_TGID); transfer_pid(leader, tsk, PIDTYPE_PGID); transfer_pid(leader, tsk, PIDTYPE_SID); list_replace_rcu(&leader->tasks, &tsk->tasks); list_replace_init(&leader->sibling, &tsk->sibling); tsk->group_leader = tsk; leader->group_leader = tsk; tsk->exit_signal = SIGCHLD; leader->exit_signal = -1; BUG_ON(leader->exit_state != EXIT_ZOMBIE); leader->exit_state = EXIT_DEAD; /* * We are going to release_task()->ptrace_unlink() silently, * the tracer can sleep in do_wait(). EXIT_DEAD guarantees * the tracer won't block again waiting for this thread. */ if (unlikely(leader->ptrace)) __wake_up_parent(leader, leader->parent); write_unlock_irq(&tasklist_lock); cgroup_threadgroup_change_end(tsk); release_task(leader); } sig->group_exec_task = NULL; sig->notify_count = 0; no_thread_group: /* we have changed execution domain */ tsk->exit_signal = SIGCHLD; BUG_ON(!thread_group_leader(tsk)); return 0; killed: /* protects against exit_notify() and __exit_signal() */ read_lock(&tasklist_lock); sig->group_exec_task = NULL; sig->notify_count = 0; read_unlock(&tasklist_lock); return -EAGAIN; } /* * This function makes sure the current process has its own signal table, * so that flush_signal_handlers can later reset the handlers without * disturbing other processes. (Other processes might share the signal * table via the CLONE_SIGHAND option to clone().) */ static int unshare_sighand(struct task_struct *me) { struct sighand_struct *oldsighand = me->sighand; if (refcount_read(&oldsighand->count) != 1) { struct sighand_struct *newsighand; /* * This ->sighand is shared with the CLONE_SIGHAND * but not CLONE_THREAD task, switch to the new one. */ newsighand = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); if (!newsighand) return -ENOMEM; refcount_set(&newsighand->count, 1); write_lock_irq(&tasklist_lock); spin_lock(&oldsighand->siglock); memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action)); rcu_assign_pointer(me->sighand, newsighand); spin_unlock(&oldsighand->siglock); write_unlock_irq(&tasklist_lock); __cleanup_sighand(oldsighand); } return 0; } /* * This is unlocked -- the string will always be NUL-terminated, but * may show overlapping contents if racing concurrent reads. */ void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) { size_t len = min(strlen(buf), sizeof(tsk->comm) - 1); trace_task_rename(tsk, buf); memcpy(tsk->comm, buf, len); memset(&tsk->comm[len], 0, sizeof(tsk->comm) - len); perf_event_comm(tsk, exec); } /* * Calling this is the point of no return. None of the failures will be * seen by userspace since either the process is already taking a fatal * signal (via de_thread() or coredump), or will have SEGV raised * (after exec_mmap()) by search_binary_handler (see below). */ int begin_new_exec(struct linux_binprm * bprm) { struct task_struct *me = current; int retval; /* Once we are committed compute the creds */ retval = bprm_creds_from_file(bprm); if (retval) return retval; /* * This tracepoint marks the point before flushing the old exec where * the current task is still unchanged, but errors are fatal (point of * no return). The later "sched_process_exec" tracepoint is called after * the current task has successfully switched to the new exec. */ trace_sched_prepare_exec(current, bprm); /* * Ensure all future errors are fatal. */ bprm->point_of_no_return = true; /* Make this the only thread in the thread group */ retval = de_thread(me); if (retval) goto out; /* see the comment in check_unsafe_exec() */ current->fs->in_exec = 0; /* * Cancel any io_uring activity across execve */ io_uring_task_cancel(); /* Ensure the files table is not shared. */ retval = unshare_files(); if (retval) goto out; /* * Must be called _before_ exec_mmap() as bprm->mm is * not visible until then. Doing it here also ensures * we don't race against replace_mm_exe_file(). */ retval = set_mm_exe_file(bprm->mm, bprm->file); if (retval) goto out; /* If the binary is not readable then enforce mm->dumpable=0 */ would_dump(bprm, bprm->file); if (bprm->have_execfd) would_dump(bprm, bprm->executable); /* * Release all of the old mmap stuff */ acct_arg_size(bprm, 0); retval = exec_mmap(bprm->mm); if (retval) goto out; bprm->mm = NULL; retval = exec_task_namespaces(); if (retval) goto out_unlock; #ifdef CONFIG_POSIX_TIMERS spin_lock_irq(&me->sighand->siglock); posix_cpu_timers_exit(me); spin_unlock_irq(&me->sighand->siglock); exit_itimers(me); flush_itimer_signals(); #endif /* * Make the signal table private. */ retval = unshare_sighand(me); if (retval) goto out_unlock; me->flags &= ~(PF_RANDOMIZE | PF_FORKNOEXEC | PF_NOFREEZE | PF_NO_SETAFFINITY); flush_thread(); me->personality &= ~bprm->per_clear; clear_syscall_work_syscall_user_dispatch(me); /* * We have to apply CLOEXEC before we change whether the process is * dumpable (in setup_new_exec) to avoid a race with a process in userspace * trying to access the should-be-closed file descriptors of a process * undergoing exec(2). */ do_close_on_exec(me->files); if (bprm->secureexec) { /* Make sure parent cannot signal privileged process. */ me->pdeath_signal = 0; /* * For secureexec, reset the stack limit to sane default to * avoid bad behavior from the prior rlimits. This has to * happen before arch_pick_mmap_layout(), which examines * RLIMIT_STACK, but after the point of no return to avoid * needing to clean up the change on failure. */ if (bprm->rlim_stack.rlim_cur > _STK_LIM) bprm->rlim_stack.rlim_cur = _STK_LIM; } me->sas_ss_sp = me->sas_ss_size = 0; /* * Figure out dumpability. Note that this checking only of current * is wrong, but userspace depends on it. This should be testing * bprm->secureexec instead. */ if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP || !(uid_eq(current_euid(), current_uid()) && gid_eq(current_egid(), current_gid()))) set_dumpable(current->mm, suid_dumpable); else set_dumpable(current->mm, SUID_DUMP_USER); perf_event_exec(); /* * If the original filename was empty, alloc_bprm() made up a path * that will probably not be useful to admins running ps or similar. * Let's fix it up to be something reasonable. */ if (bprm->comm_from_dentry) { /* * Hold RCU lock to keep the name from being freed behind our back. * Use acquire semantics to make sure the terminating NUL from * __d_alloc() is seen. * * Note, we're deliberately sloppy here. We don't need to care about * detecting a concurrent rename and just want a terminated name. */ rcu_read_lock(); __set_task_comm(me, smp_load_acquire(&bprm->file->f_path.dentry->d_name.name), true); rcu_read_unlock(); } else { __set_task_comm(me, kbasename(bprm->filename), true); } /* An exec changes our domain. We are no longer part of the thread group */ WRITE_ONCE(me->self_exec_id, me->self_exec_id + 1); flush_signal_handlers(me, 0); retval = set_cred_ucounts(bprm->cred); if (retval < 0) goto out_unlock; /* * install the new credentials for this executable */ security_bprm_committing_creds(bprm); commit_creds(bprm->cred); bprm->cred = NULL; /* * Disable monitoring for regular users * when executing setuid binaries. Must * wait until new credentials are committed * by commit_creds() above */ if (get_dumpable(me->mm) != SUID_DUMP_USER) perf_event_exit_task(me); /* * cred_guard_mutex must be held at least to this point to prevent * ptrace_attach() from altering our determination of the task's * credentials; any time after this it may be unlocked. */ security_bprm_committed_creds(bprm); /* Pass the opened binary to the interpreter. */ if (bprm->have_execfd) { retval = get_unused_fd_flags(0); if (retval < 0) goto out_unlock; fd_install(retval, bprm->executable); bprm->executable = NULL; bprm->execfd = retval; } return 0; out_unlock: up_write(&me->signal->exec_update_lock); if (!bprm->cred) mutex_unlock(&me->signal->cred_guard_mutex); out: return retval; } EXPORT_SYMBOL(begin_new_exec); void would_dump(struct linux_binprm *bprm, struct file *file) { struct inode *inode = file_inode(file); struct mnt_idmap *idmap = file_mnt_idmap(file); if (inode_permission(idmap, inode, MAY_READ) < 0) { struct user_namespace *old, *user_ns; bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP; /* Ensure mm->user_ns contains the executable */ user_ns = old = bprm->mm->user_ns; while ((user_ns != &init_user_ns) && !privileged_wrt_inode_uidgid(user_ns, idmap, inode)) user_ns = user_ns->parent; if (old != user_ns) { bprm->mm->user_ns = get_user_ns(user_ns); put_user_ns(old); } } } EXPORT_SYMBOL(would_dump); void setup_new_exec(struct linux_binprm * bprm) { /* Setup things that can depend upon the personality */ struct task_struct *me = current; arch_pick_mmap_layout(me->mm, &bprm->rlim_stack); arch_setup_new_exec(); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on * some architectures like powerpc */ me->mm->task_size = TASK_SIZE; up_write(&me->signal->exec_update_lock); mutex_unlock(&me->signal->cred_guard_mutex); } EXPORT_SYMBOL(setup_new_exec); /* Runs immediately before start_thread() takes over. */ void finalize_exec(struct linux_binprm *bprm) { /* Store any stack rlimit changes before starting thread. */ task_lock(current->group_leader); current->signal->rlim[RLIMIT_STACK] = bprm->rlim_stack; task_unlock(current->group_leader); } EXPORT_SYMBOL(finalize_exec); /* * Prepare credentials and lock ->cred_guard_mutex. * setup_new_exec() commits the new creds and drops the lock. * Or, if exec fails before, free_bprm() should release ->cred * and unlock. */ static int prepare_bprm_creds(struct linux_binprm *bprm) { if (mutex_lock_interruptible(&current->signal->cred_guard_mutex)) return -ERESTARTNOINTR; bprm->cred = prepare_exec_creds(); if (likely(bprm->cred)) return 0; mutex_unlock(&current->signal->cred_guard_mutex); return -ENOMEM; } /* Matches do_open_execat() */ static void do_close_execat(struct file *file) { if (!file) return; exe_file_allow_write_access(file); fput(file); } static void free_bprm(struct linux_binprm *bprm) { if (bprm->mm) { acct_arg_size(bprm, 0); mmput(bprm->mm); } free_arg_pages(bprm); if (bprm->cred) { /* in case exec fails before de_thread() succeeds */ current->fs->in_exec = 0; mutex_unlock(&current->signal->cred_guard_mutex); abort_creds(bprm->cred); } do_close_execat(bprm->file); if (bprm->executable) fput(bprm->executable); /* If a binfmt changed the interp, free it. */ if (bprm->interp != bprm->filename) kfree(bprm->interp); kfree(bprm->fdpath); kfree(bprm); } static struct linux_binprm *alloc_bprm(int fd, struct filename *filename, int flags) { struct linux_binprm *bprm; struct file *file; int retval = -ENOMEM; file = do_open_execat(fd, filename, flags); if (IS_ERR(file)) return ERR_CAST(file); bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); if (!bprm) { do_close_execat(file); return ERR_PTR(-ENOMEM); } bprm->file = file; if (fd == AT_FDCWD || filename->name[0] == '/') { bprm->filename = filename->name; } else { if (filename->name[0] == '\0') { bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd); bprm->comm_from_dentry = 1; } else { bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s", fd, filename->name); } if (!bprm->fdpath) goto out_free; /* * Record that a name derived from an O_CLOEXEC fd will be * inaccessible after exec. This allows the code in exec to * choose to fail when the executable is not mmaped into the * interpreter and an open file descriptor is not passed to * the interpreter. This makes for a better user experience * than having the interpreter start and then immediately fail * when it finds the executable is inaccessible. */ if (get_close_on_exec(fd)) bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; bprm->filename = bprm->fdpath; } bprm->interp = bprm->filename; /* * At this point, security_file_open() has already been called (with * __FMODE_EXEC) and access control checks for AT_EXECVE_CHECK will * stop just after the security_bprm_creds_for_exec() call in * bprm_execve(). Indeed, the kernel should not try to parse the * content of the file with exec_binprm() nor change the calling * thread, which means that the following security functions will not * be called: * - security_bprm_check() * - security_bprm_creds_from_file() * - security_bprm_committing_creds() * - security_bprm_committed_creds() */ bprm->is_check = !!(flags & AT_EXECVE_CHECK); retval = bprm_mm_init(bprm); if (!retval) return bprm; out_free: free_bprm(bprm); return ERR_PTR(retval); } int bprm_change_interp(const char *interp, struct linux_binprm *bprm) { /* If a binfmt changed the interp, free it first. */ if (bprm->interp != bprm->filename) kfree(bprm->interp); bprm->interp = kstrdup(interp, GFP_KERNEL); if (!bprm->interp) return -ENOMEM; return 0; } EXPORT_SYMBOL(bprm_change_interp); /* * determine how safe it is to execute the proposed program * - the caller must hold ->cred_guard_mutex to protect against * PTRACE_ATTACH or seccomp thread-sync */ static void check_unsafe_exec(struct linux_binprm *bprm) { struct task_struct *p = current, *t; unsigned n_fs; if (p->ptrace) bprm->unsafe |= LSM_UNSAFE_PTRACE; /* * This isn't strictly necessary, but it makes it harder for LSMs to * mess up. */ if (task_no_new_privs(current)) bprm->unsafe |= LSM_UNSAFE_NO_NEW_PRIVS; /* * If another task is sharing our fs, we cannot safely * suid exec because the differently privileged task * will be able to manipulate the current directory, etc. * It would be nice to force an unshare instead... * * Otherwise we set fs->in_exec = 1 to deny clone(CLONE_FS) * from another sub-thread until de_thread() succeeds, this * state is protected by cred_guard_mutex we hold. */ n_fs = 1; read_seqlock_excl(&p->fs->seq); rcu_read_lock(); for_other_threads(p, t) { if (t->fs == p->fs) n_fs++; } rcu_read_unlock(); /* "users" and "in_exec" locked for copy_fs() */ if (p->fs->users > n_fs) bprm->unsafe |= LSM_UNSAFE_SHARE; else p->fs->in_exec = 1; read_sequnlock_excl(&p->fs->seq); } static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) { /* Handle suid and sgid on files */ struct mnt_idmap *idmap; struct inode *inode = file_inode(file); unsigned int mode; vfsuid_t vfsuid; vfsgid_t vfsgid; int err; if (!mnt_may_suid(file->f_path.mnt)) return; if (task_no_new_privs(current)) return; mode = READ_ONCE(inode->i_mode); if (!(mode & (S_ISUID|S_ISGID))) return; idmap = file_mnt_idmap(file); /* Be careful if suid/sgid is set */ inode_lock(inode); /* Atomically reload and check mode/uid/gid now that lock held. */ mode = inode->i_mode; vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid = i_gid_into_vfsgid(idmap, inode); err = inode_permission(idmap, inode, MAY_EXEC); inode_unlock(inode); /* Did the exec bit vanish out from under us? Give up. */ if (err) return; /* We ignore suid/sgid if there are no mappings for them in the ns */ if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) || !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid)) return; if (mode & S_ISUID) { bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->euid = vfsuid_into_kuid(vfsuid); } if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->egid = vfsgid_into_kgid(vfsgid); } } /* * Compute brpm->cred based upon the final binary. */ static int bprm_creds_from_file(struct linux_binprm *bprm) { /* Compute creds based on which file? */ struct file *file = bprm->execfd_creds ? bprm->executable : bprm->file; bprm_fill_uid(bprm, file); return security_bprm_creds_from_file(bprm, file); } /* * Fill the binprm structure from the inode. * Read the first BINPRM_BUF_SIZE bytes * * This may be called multiple times for binary chains (scripts for example). */ static int prepare_binprm(struct linux_binprm *bprm) { loff_t pos = 0; memset(bprm->buf, 0, BINPRM_BUF_SIZE); return kernel_read(bprm->file, bprm->buf, BINPRM_BUF_SIZE, &pos); } /* * Arguments are '\0' separated strings found at the location bprm->p * points to; chop off the first by relocating brpm->p to right after * the first '\0' encountered. */ int remove_arg_zero(struct linux_binprm *bprm) { unsigned long offset; char *kaddr; struct page *page; if (!bprm->argc) return 0; do { offset = bprm->p & ~PAGE_MASK; page = get_arg_page(bprm, bprm->p, 0); if (!page) return -EFAULT; kaddr = kmap_local_page(page); for (; offset < PAGE_SIZE && kaddr[offset]; offset++, bprm->p++) ; kunmap_local(kaddr); put_arg_page(page); } while (offset == PAGE_SIZE); bprm->p++; bprm->argc--; return 0; } EXPORT_SYMBOL(remove_arg_zero); /* * cycle the list of binary formats handler, until one recognizes the image */ static int search_binary_handler(struct linux_binprm *bprm) { struct linux_binfmt *fmt; int retval; retval = prepare_binprm(bprm); if (retval < 0) return retval; retval = security_bprm_check(bprm); if (retval) return retval; read_lock(&binfmt_lock); list_for_each_entry(fmt, &formats, lh) { if (!try_module_get(fmt->module)) continue; read_unlock(&binfmt_lock); retval = fmt->load_binary(bprm); read_lock(&binfmt_lock); put_binfmt(fmt); if (bprm->point_of_no_return || (retval != -ENOEXEC)) { read_unlock(&binfmt_lock); return retval; } } read_unlock(&binfmt_lock); return -ENOEXEC; } /* binfmt handlers will call back into begin_new_exec() on success. */ static int exec_binprm(struct linux_binprm *bprm) { pid_t old_pid, old_vpid; int ret, depth; /* Need to fetch pid before load_binary changes it */ old_pid = current->pid; rcu_read_lock(); old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent)); rcu_read_unlock(); /* This allows 4 levels of binfmt rewrites before failing hard. */ for (depth = 0;; depth++) { struct file *exec; if (depth > 5) return -ELOOP; ret = search_binary_handler(bprm); if (ret < 0) return ret; if (!bprm->interpreter) break; exec = bprm->file; bprm->file = bprm->interpreter; bprm->interpreter = NULL; exe_file_allow_write_access(exec); if (unlikely(bprm->have_execfd)) { if (bprm->executable) { fput(exec); return -ENOEXEC; } bprm->executable = exec; } else fput(exec); } audit_bprm(bprm); trace_sched_process_exec(current, old_pid, bprm); ptrace_event(PTRACE_EVENT_EXEC, old_vpid); proc_exec_connector(current); return 0; } static int bprm_execve(struct linux_binprm *bprm) { int retval; retval = prepare_bprm_creds(bprm); if (retval) return retval; /* * Check for unsafe execution states before exec_binprm(), which * will call back into begin_new_exec(), into bprm_creds_from_file(), * where setuid-ness is evaluated. */ check_unsafe_exec(bprm); current->in_execve = 1; sched_mm_cid_before_execve(current); sched_exec(); /* Set the unchanging part of bprm->cred */ retval = security_bprm_creds_for_exec(bprm); if (retval || bprm->is_check) goto out; retval = exec_binprm(bprm); if (retval < 0) goto out; sched_mm_cid_after_execve(current); rseq_execve(current); /* execve succeeded */ current->in_execve = 0; user_events_execve(current); acct_update_integrals(current); task_numa_free(current, false); return retval; out: /* * If past the point of no return ensure the code never * returns to the userspace process. Use an existing fatal * signal if present otherwise terminate the process with * SIGSEGV. */ if (bprm->point_of_no_return && !fatal_signal_pending(current)) force_fatal_sig(SIGSEGV); sched_mm_cid_after_execve(current); rseq_set_notify_resume(current); current->in_execve = 0; return retval; } static int do_execveat_common(int fd, struct filename *filename, struct user_arg_ptr argv, struct user_arg_ptr envp, int flags) { struct linux_binprm *bprm; int retval; if (IS_ERR(filename)) return PTR_ERR(filename); /* * We move the actual failure in case of RLIMIT_NPROC excess from * set*uid() to execve() because too many poorly written programs * don't check setuid() return code. Here we additionally recheck * whether NPROC limit is still exceeded. */ if ((current->flags & PF_NPROC_EXCEEDED) && is_rlimit_overlimit(current_ucounts(), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) { retval = -EAGAIN; goto out_ret; } /* We're below the limit (still or again), so we don't want to make * further execve() calls fail. */ current->flags &= ~PF_NPROC_EXCEEDED; bprm = alloc_bprm(fd, filename, flags); if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); goto out_ret; } retval = count(argv, MAX_ARG_STRINGS); if (retval < 0) goto out_free; bprm->argc = retval; retval = count(envp, MAX_ARG_STRINGS); if (retval < 0) goto out_free; bprm->envc = retval; retval = bprm_stack_limits(bprm); if (retval < 0) goto out_free; retval = copy_string_kernel(bprm->filename, bprm); if (retval < 0) goto out_free; bprm->exec = bprm->p; retval = copy_strings(bprm->envc, envp, bprm); if (retval < 0) goto out_free; retval = copy_strings(bprm->argc, argv, bprm); if (retval < 0) goto out_free; /* * When argv is empty, add an empty string ("") as argv[0] to * ensure confused userspace programs that start processing * from argv[1] won't end up walking envp. See also * bprm_stack_limits(). */ if (bprm->argc == 0) { retval = copy_string_kernel("", bprm); if (retval < 0) goto out_free; bprm->argc = 1; pr_warn_once("process '%s' launched '%s' with NULL argv: empty string added\n", current->comm, bprm->filename); } retval = bprm_execve(bprm); out_free: free_bprm(bprm); out_ret: putname(filename); return retval; } int kernel_execve(const char *kernel_filename, const char *const *argv, const char *const *envp) { struct filename *filename; struct linux_binprm *bprm; int fd = AT_FDCWD; int retval; /* It is non-sense for kernel threads to call execve */ if (WARN_ON_ONCE(current->flags & PF_KTHREAD)) return -EINVAL; filename = getname_kernel(kernel_filename); if (IS_ERR(filename)) return PTR_ERR(filename); bprm = alloc_bprm(fd, filename, 0); if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); goto out_ret; } retval = count_strings_kernel(argv); if (WARN_ON_ONCE(retval == 0)) retval = -EINVAL; if (retval < 0) goto out_free; bprm->argc = retval; retval = count_strings_kernel(envp); if (retval < 0) goto out_free; bprm->envc = retval; retval = bprm_stack_limits(bprm); if (retval < 0) goto out_free; retval = copy_string_kernel(bprm->filename, bprm); if (retval < 0) goto out_free; bprm->exec = bprm->p; retval = copy_strings_kernel(bprm->envc, envp, bprm); if (retval < 0) goto out_free; retval = copy_strings_kernel(bprm->argc, argv, bprm); if (retval < 0) goto out_free; retval = bprm_execve(bprm); out_free: free_bprm(bprm); out_ret: putname(filename); return retval; } static int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp) { struct user_arg_ptr argv = { .ptr.native = __argv }; struct user_arg_ptr envp = { .ptr.native = __envp }; return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); } static int do_execveat(int fd, struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp, int flags) { struct user_arg_ptr argv = { .ptr.native = __argv }; struct user_arg_ptr envp = { .ptr.native = __envp }; return do_execveat_common(fd, filename, argv, envp, flags); } #ifdef CONFIG_COMPAT static int compat_do_execve(struct filename *filename, const compat_uptr_t __user *__argv, const compat_uptr_t __user *__envp) { struct user_arg_ptr argv = { .is_compat = true, .ptr.compat = __argv, }; struct user_arg_ptr envp = { .is_compat = true, .ptr.compat = __envp, }; return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); } static int compat_do_execveat(int fd, struct filename *filename, const compat_uptr_t __user *__argv, const compat_uptr_t __user *__envp, int flags) { struct user_arg_ptr argv = { .is_compat = true, .ptr.compat = __argv, }; struct user_arg_ptr envp = { .is_compat = true, .ptr.compat = __envp, }; return do_execveat_common(fd, filename, argv, envp, flags); } #endif void set_binfmt(struct linux_binfmt *new) { struct mm_struct *mm = current->mm; if (mm->binfmt) module_put(mm->binfmt->module); mm->binfmt = new; if (new) __module_get(new->module); } EXPORT_SYMBOL(set_binfmt); /* * set_dumpable stores three-value SUID_DUMP_* into mm->flags. */ void set_dumpable(struct mm_struct *mm, int value) { if (WARN_ON((unsigned)value > SUID_DUMP_ROOT)) return; __mm_flags_set_mask_dumpable(mm, value); } SYSCALL_DEFINE3(execve, const char __user *, filename, const char __user *const __user *, argv, const char __user *const __user *, envp) { return do_execve(getname(filename), argv, envp); } SYSCALL_DEFINE5(execveat, int, fd, const char __user *, filename, const char __user *const __user *, argv, const char __user *const __user *, envp, int, flags) { return do_execveat(fd, getname_uflags(filename, flags), argv, envp, flags); } #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, const compat_uptr_t __user *, argv, const compat_uptr_t __user *, envp) { return compat_do_execve(getname(filename), argv, envp); } COMPAT_SYSCALL_DEFINE5(execveat, int, fd, const char __user *, filename, const compat_uptr_t __user *, argv, const compat_uptr_t __user *, envp, int, flags) { return compat_do_execveat(fd, getname_uflags(filename, flags), argv, envp, flags); } #endif #ifdef CONFIG_SYSCTL static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!error && write) validate_coredump_safety(); return error; } static const struct ctl_table fs_exec_sysctls[] = { { .procname = "suid_dumpable", .data = &suid_dumpable, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec_minmax_coredump, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, }; static int __init init_fs_exec_sysctls(void) { register_sysctl_init("fs", fs_exec_sysctls); return 0; } fs_initcall(init_fs_exec_sysctls); #endif /* CONFIG_SYSCTL */ #ifdef CONFIG_EXEC_KUNIT_TEST #include "tests/exec_kunit.c" #endif
13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 /* SPDX-License-Identifier: GPL-2.0 */ /* * RT Mutexes: blocking mutual exclusion locks with PI support * * started by Ingo Molnar and Thomas Gleixner: * * Copyright (C) 2004-2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> * * This file contains the public data structure and API definitions. */ #ifndef __LINUX_RT_MUTEX_H #define __LINUX_RT_MUTEX_H #include <linux/compiler.h> #include <linux/linkage.h> #include <linux/rbtree_types.h> #include <linux/spinlock_types_raw.h> extern int max_lock_depth; struct rt_mutex_base { raw_spinlock_t wait_lock; struct rb_root_cached waiters; struct task_struct *owner; }; #define __RT_MUTEX_BASE_INITIALIZER(rtbasename) \ { \ .wait_lock = __RAW_SPIN_LOCK_UNLOCKED(rtbasename.wait_lock), \ .waiters = RB_ROOT_CACHED, \ .owner = NULL \ } /** * rt_mutex_base_is_locked - is the rtmutex locked * @lock: the mutex to be queried * * Returns true if the mutex is locked, false if unlocked. */ static inline bool rt_mutex_base_is_locked(struct rt_mutex_base *lock) { return READ_ONCE(lock->owner) != NULL; } #ifdef CONFIG_RT_MUTEXES #define RT_MUTEX_HAS_WAITERS 1UL static inline struct task_struct *rt_mutex_owner(struct rt_mutex_base *lock) { unsigned long owner = (unsigned long) READ_ONCE(lock->owner); return (struct task_struct *) (owner & ~RT_MUTEX_HAS_WAITERS); } #endif extern void rt_mutex_base_init(struct rt_mutex_base *rtb); /** * The rt_mutex structure * * @wait_lock: spinlock to protect the structure * @waiters: rbtree root to enqueue waiters in priority order; * caches top-waiter (leftmost node). * @owner: the mutex owner */ struct rt_mutex { struct rt_mutex_base rtmutex; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif }; struct rt_mutex_waiter; struct hrtimer_sleeper; #ifdef CONFIG_DEBUG_RT_MUTEXES extern void rt_mutex_debug_task_free(struct task_struct *tsk); #else static inline void rt_mutex_debug_task_free(struct task_struct *tsk) { } #endif #define rt_mutex_init(mutex) \ do { \ static struct lock_class_key __key; \ __rt_mutex_init(mutex, __func__, &__key); \ } while (0) #ifdef CONFIG_DEBUG_LOCK_ALLOC #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ .dep_map = { \ .name = #mutexname, \ .wait_type_inner = LD_WAIT_SLEEP, \ } #else #define __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) #endif #define __RT_MUTEX_INITIALIZER(mutexname) \ { \ .rtmutex = __RT_MUTEX_BASE_INITIALIZER(mutexname.rtmutex), \ __DEP_MAP_RT_MUTEX_INITIALIZER(mutexname) \ } #define DEFINE_RT_MUTEX(mutexname) \ struct rt_mutex mutexname = __RT_MUTEX_INITIALIZER(mutexname) extern void __rt_mutex_init(struct rt_mutex *lock, const char *name, struct lock_class_key *key); #ifdef CONFIG_DEBUG_LOCK_ALLOC extern void rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass); extern void _rt_mutex_lock_nest_lock(struct rt_mutex *lock, struct lockdep_map *nest_lock); #define rt_mutex_lock(lock) rt_mutex_lock_nested(lock, 0) #define rt_mutex_lock_nest_lock(lock, nest_lock) \ do { \ typecheck(struct lockdep_map *, &(nest_lock)->dep_map); \ _rt_mutex_lock_nest_lock(lock, &(nest_lock)->dep_map); \ } while (0) #else extern void rt_mutex_lock(struct rt_mutex *lock); #define rt_mutex_lock_nested(lock, subclass) rt_mutex_lock(lock) #define rt_mutex_lock_nest_lock(lock, nest_lock) rt_mutex_lock(lock) #endif extern int rt_mutex_lock_interruptible(struct rt_mutex *lock); extern int rt_mutex_lock_killable(struct rt_mutex *lock); extern int rt_mutex_trylock(struct rt_mutex *lock); extern void rt_mutex_unlock(struct rt_mutex *lock); #endif
7 7 3 3 3 2 2 7 10 10 10 9 9 7 6 4 1 4 4 1 4 3 3 1 7 7 7 6 3 2 2 2 4 3 2 1 1 4 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 // SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/errno.h> #include <linux/fs.h> #include <linux/file.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/namei.h> #include <linux/poll.h> #include <linux/vmalloc.h> #include <linux/io_uring.h> #include <uapi/linux/io_uring.h> #include "io_uring.h" #include "opdef.h" #include "kbuf.h" #include "memmap.h" /* BIDs are addressed by a 16-bit field in a CQE */ #define MAX_BIDS_PER_BGID (1 << 16) /* Mapped buffer ring, return io_uring_buf from head */ #define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)] struct io_provide_buf { struct file *file; __u64 addr; __u32 len; __u32 bgid; __u32 nbufs; __u16 bid; }; static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len) { while (len) { struct io_uring_buf *buf; u32 buf_len, this_len; buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask); buf_len = READ_ONCE(buf->len); this_len = min_t(u32, len, buf_len); buf_len -= this_len; /* Stop looping for invalid buffer length of 0 */ if (buf_len || !this_len) { buf->addr += this_len; buf->len = buf_len; return false; } buf->len = 0; bl->head++; len -= this_len; } return true; } bool io_kbuf_commit(struct io_kiocb *req, struct io_buffer_list *bl, int len, int nr) { if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT))) return true; req->flags &= ~REQ_F_BUFFERS_COMMIT; if (unlikely(len < 0)) return true; if (bl->flags & IOBL_INC) return io_kbuf_inc_commit(bl, len); bl->head += nr; return true; } static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx, unsigned int bgid) { lockdep_assert_held(&ctx->uring_lock); return xa_load(&ctx->io_bl_xa, bgid); } static int io_buffer_add_list(struct io_ring_ctx *ctx, struct io_buffer_list *bl, unsigned int bgid) { /* * Store buffer group ID and finally mark the list as visible. * The normal lookup doesn't care about the visibility as we're * always under the ->uring_lock, but lookups from mmap do. */ bl->bgid = bgid; guard(mutex)(&ctx->mmap_lock); return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL)); } void io_kbuf_drop_legacy(struct io_kiocb *req) { if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED))) return; req->flags &= ~REQ_F_BUFFER_SELECTED; kfree(req->kbuf); req->kbuf = NULL; } bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags) { struct io_ring_ctx *ctx = req->ctx; struct io_buffer_list *bl; struct io_buffer *buf; io_ring_submit_lock(ctx, issue_flags); buf = req->kbuf; bl = io_buffer_get_list(ctx, buf->bgid); list_add(&buf->list, &bl->buf_list); bl->nbufs++; req->flags &= ~REQ_F_BUFFER_SELECTED; io_ring_submit_unlock(ctx, issue_flags); return true; } static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len, struct io_buffer_list *bl) { if (!list_empty(&bl->buf_list)) { struct io_buffer *kbuf; kbuf = list_first_entry(&bl->buf_list, struct io_buffer, list); list_del(&kbuf->list); bl->nbufs--; if (*len == 0 || *len > kbuf->len) *len = kbuf->len; if (list_empty(&bl->buf_list)) req->flags |= REQ_F_BL_EMPTY; req->flags |= REQ_F_BUFFER_SELECTED; req->kbuf = kbuf; req->buf_index = kbuf->bid; return u64_to_user_ptr(kbuf->addr); } return NULL; } static int io_provided_buffers_select(struct io_kiocb *req, size_t *len, struct io_buffer_list *bl, struct iovec *iov) { void __user *buf; buf = io_provided_buffer_select(req, len, bl); if (unlikely(!buf)) return -ENOBUFS; iov[0].iov_base = buf; iov[0].iov_len = *len; return 1; } static struct io_br_sel io_ring_buffer_select(struct io_kiocb *req, size_t *len, struct io_buffer_list *bl, unsigned int issue_flags) { struct io_uring_buf_ring *br = bl->buf_ring; __u16 tail, head = bl->head; struct io_br_sel sel = { }; struct io_uring_buf *buf; u32 buf_len; tail = smp_load_acquire(&br->tail); if (unlikely(tail == head)) return sel; if (head + 1 == tail) req->flags |= REQ_F_BL_EMPTY; buf = io_ring_head_to_buf(br, head, bl->mask); buf_len = READ_ONCE(buf->len); if (*len == 0 || *len > buf_len) *len = buf_len; req->flags |= REQ_F_BUFFER_RING | REQ_F_BUFFERS_COMMIT; req->buf_index = buf->bid; sel.buf_list = bl; sel.addr = u64_to_user_ptr(buf->addr); if (issue_flags & IO_URING_F_UNLOCKED || !io_file_can_poll(req)) { /* * If we came in unlocked, we have no choice but to consume the * buffer here, otherwise nothing ensures that the buffer won't * get used by others. This does mean it'll be pinned until the * IO completes, coming in unlocked means we're being called from * io-wq context and there may be further retries in async hybrid * mode. For the locked case, the caller must call commit when * the transfer completes (or if we get -EAGAIN and must poll of * retry). */ io_kbuf_commit(req, sel.buf_list, *len, 1); sel.buf_list = NULL; } return sel; } struct io_br_sel io_buffer_select(struct io_kiocb *req, size_t *len, unsigned buf_group, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; struct io_br_sel sel = { }; struct io_buffer_list *bl; io_ring_submit_lock(req->ctx, issue_flags); bl = io_buffer_get_list(ctx, buf_group); if (likely(bl)) { if (bl->flags & IOBL_BUF_RING) sel = io_ring_buffer_select(req, len, bl, issue_flags); else sel.addr = io_provided_buffer_select(req, len, bl); } io_ring_submit_unlock(req->ctx, issue_flags); return sel; } /* cap it at a reasonable 256, will be one page even for 4K */ #define PEEK_MAX_IMPORT 256 static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, struct io_buffer_list *bl) { struct io_uring_buf_ring *br = bl->buf_ring; struct iovec *iov = arg->iovs; int nr_iovs = arg->nr_iovs; __u16 nr_avail, tail, head; struct io_uring_buf *buf; tail = smp_load_acquire(&br->tail); head = bl->head; nr_avail = min_t(__u16, tail - head, UIO_MAXIOV); if (unlikely(!nr_avail)) return -ENOBUFS; buf = io_ring_head_to_buf(br, head, bl->mask); if (arg->max_len) { u32 len = READ_ONCE(buf->len); size_t needed; if (unlikely(!len)) return -ENOBUFS; needed = (arg->max_len + len - 1) / len; needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT); if (nr_avail > needed) nr_avail = needed; } /* * only alloc a bigger array if we know we have data to map, eg not * a speculative peek operation. */ if (arg->mode & KBUF_MODE_EXPAND && nr_avail > nr_iovs && arg->max_len) { iov = kmalloc_array(nr_avail, sizeof(struct iovec), GFP_KERNEL); if (unlikely(!iov)) return -ENOMEM; if (arg->mode & KBUF_MODE_FREE) kfree(arg->iovs); arg->iovs = iov; nr_iovs = nr_avail; } else if (nr_avail < nr_iovs) { nr_iovs = nr_avail; } /* set it to max, if not set, so we can use it unconditionally */ if (!arg->max_len) arg->max_len = INT_MAX; req->buf_index = buf->bid; do { u32 len = READ_ONCE(buf->len); /* truncate end piece, if needed, for non partial buffers */ if (len > arg->max_len) { len = arg->max_len; if (!(bl->flags & IOBL_INC)) { arg->partial_map = 1; if (iov != arg->iovs) break; buf->len = len; } } iov->iov_base = u64_to_user_ptr(buf->addr); iov->iov_len = len; iov++; arg->out_len += len; arg->max_len -= len; if (!arg->max_len) break; buf = io_ring_head_to_buf(br, ++head, bl->mask); } while (--nr_iovs); if (head == tail) req->flags |= REQ_F_BL_EMPTY; req->flags |= REQ_F_BUFFER_RING; return iov - arg->iovs; } int io_buffers_select(struct io_kiocb *req, struct buf_sel_arg *arg, struct io_br_sel *sel, unsigned int issue_flags) { struct io_ring_ctx *ctx = req->ctx; int ret = -ENOENT; io_ring_submit_lock(ctx, issue_flags); sel->buf_list = io_buffer_get_list(ctx, arg->buf_group); if (unlikely(!sel->buf_list)) goto out_unlock; if (sel->buf_list->flags & IOBL_BUF_RING) { ret = io_ring_buffers_peek(req, arg, sel->buf_list); /* * Don't recycle these buffers if we need to go through poll. * Nobody else can use them anyway, and holding on to provided * buffers for a send/write operation would happen on the app * side anyway with normal buffers. Besides, we already * committed them, they cannot be put back in the queue. */ if (ret > 0) { req->flags |= REQ_F_BUFFERS_COMMIT | REQ_F_BL_NO_RECYCLE; io_kbuf_commit(req, sel->buf_list, arg->out_len, ret); } } else { ret = io_provided_buffers_select(req, &arg->out_len, sel->buf_list, arg->iovs); } out_unlock: if (issue_flags & IO_URING_F_UNLOCKED) { sel->buf_list = NULL; mutex_unlock(&ctx->uring_lock); } return ret; } int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, struct io_br_sel *sel) { struct io_ring_ctx *ctx = req->ctx; struct io_buffer_list *bl; int ret; lockdep_assert_held(&ctx->uring_lock); bl = io_buffer_get_list(ctx, arg->buf_group); if (unlikely(!bl)) return -ENOENT; if (bl->flags & IOBL_BUF_RING) { ret = io_ring_buffers_peek(req, arg, bl); if (ret > 0) req->flags |= REQ_F_BUFFERS_COMMIT; sel->buf_list = bl; return ret; } /* don't support multiple buffer selections for legacy */ sel->buf_list = NULL; return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs); } static inline bool __io_put_kbuf_ring(struct io_kiocb *req, struct io_buffer_list *bl, int len, int nr) { bool ret = true; if (bl) ret = io_kbuf_commit(req, bl, len, nr); req->flags &= ~REQ_F_BUFFER_RING; return ret; } unsigned int __io_put_kbufs(struct io_kiocb *req, struct io_buffer_list *bl, int len, int nbufs) { unsigned int ret; ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT); if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) { io_kbuf_drop_legacy(req); return ret; } if (!__io_put_kbuf_ring(req, bl, len, nbufs)) ret |= IORING_CQE_F_BUF_MORE; return ret; } static int io_remove_buffers_legacy(struct io_ring_ctx *ctx, struct io_buffer_list *bl, unsigned long nbufs) { unsigned long i = 0; struct io_buffer *nxt; /* protects io_buffers_cache */ lockdep_assert_held(&ctx->uring_lock); WARN_ON_ONCE(bl->flags & IOBL_BUF_RING); for (i = 0; i < nbufs && !list_empty(&bl->buf_list); i++) { nxt = list_first_entry(&bl->buf_list, struct io_buffer, list); list_del(&nxt->list); bl->nbufs--; kfree(nxt); cond_resched(); } return i; } static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) { if (bl->flags & IOBL_BUF_RING) io_free_region(ctx, &bl->region); else io_remove_buffers_legacy(ctx, bl, -1U); kfree(bl); } void io_destroy_buffers(struct io_ring_ctx *ctx) { struct io_buffer_list *bl; while (1) { unsigned long index = 0; scoped_guard(mutex, &ctx->mmap_lock) { bl = xa_find(&ctx->io_bl_xa, &index, ULONG_MAX, XA_PRESENT); if (bl) xa_erase(&ctx->io_bl_xa, bl->bgid); } if (!bl) break; io_put_bl(ctx, bl); } } static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl) { scoped_guard(mutex, &ctx->mmap_lock) WARN_ON_ONCE(xa_erase(&ctx->io_bl_xa, bl->bgid) != bl); io_put_bl(ctx, bl); } int io_remove_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); u64 tmp; if (sqe->rw_flags || sqe->addr || sqe->len || sqe->off || sqe->splice_fd_in) return -EINVAL; tmp = READ_ONCE(sqe->fd); if (!tmp || tmp > MAX_BIDS_PER_BGID) return -EINVAL; memset(p, 0, sizeof(*p)); p->nbufs = tmp; p->bgid = READ_ONCE(sqe->buf_group); return 0; } int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { unsigned long size, tmp_check; struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); u64 tmp; if (sqe->rw_flags || sqe->splice_fd_in) return -EINVAL; tmp = READ_ONCE(sqe->fd); if (!tmp || tmp > MAX_BIDS_PER_BGID) return -E2BIG; p->nbufs = tmp; p->addr = READ_ONCE(sqe->addr); p->len = READ_ONCE(sqe->len); if (!p->len) return -EINVAL; if (check_mul_overflow((unsigned long)p->len, (unsigned long)p->nbufs, &size)) return -EOVERFLOW; if (check_add_overflow((unsigned long)p->addr, size, &tmp_check)) return -EOVERFLOW; if (!access_ok(u64_to_user_ptr(p->addr), size)) return -EFAULT; p->bgid = READ_ONCE(sqe->buf_group); tmp = READ_ONCE(sqe->off); if (tmp > USHRT_MAX) return -E2BIG; if (tmp + p->nbufs > MAX_BIDS_PER_BGID) return -EINVAL; p->bid = tmp; return 0; } static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf, struct io_buffer_list *bl) { struct io_buffer *buf; u64 addr = pbuf->addr; int ret = -ENOMEM, i, bid = pbuf->bid; for (i = 0; i < pbuf->nbufs; i++) { /* * Nonsensical to have more than sizeof(bid) buffers in a * buffer list, as the application then has no way of knowing * which duplicate bid refers to what buffer. */ if (bl->nbufs == USHRT_MAX) { ret = -EOVERFLOW; break; } buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT); if (!buf) break; list_add_tail(&buf->list, &bl->buf_list); bl->nbufs++; buf->addr = addr; buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT); buf->bid = bid; buf->bgid = pbuf->bgid; addr += pbuf->len; bid++; cond_resched(); } return i ? 0 : ret; } static int __io_manage_buffers_legacy(struct io_kiocb *req, struct io_buffer_list *bl) { struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); int ret; if (!bl) { if (req->opcode != IORING_OP_PROVIDE_BUFFERS) return -ENOENT; bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT); if (!bl) return -ENOMEM; INIT_LIST_HEAD(&bl->buf_list); ret = io_buffer_add_list(req->ctx, bl, p->bgid); if (ret) { kfree(bl); return ret; } } /* can't use provide/remove buffers command on mapped buffers */ if (bl->flags & IOBL_BUF_RING) return -EINVAL; if (req->opcode == IORING_OP_PROVIDE_BUFFERS) return io_add_buffers(req->ctx, p, bl); return io_remove_buffers_legacy(req->ctx, bl, p->nbufs); } int io_manage_buffers_legacy(struct io_kiocb *req, unsigned int issue_flags) { struct io_provide_buf *p = io_kiocb_to_cmd(req, struct io_provide_buf); struct io_ring_ctx *ctx = req->ctx; struct io_buffer_list *bl; int ret; io_ring_submit_lock(ctx, issue_flags); bl = io_buffer_get_list(ctx, p->bgid); ret = __io_manage_buffers_legacy(req, bl); io_ring_submit_unlock(ctx, issue_flags); if (ret < 0) req_set_fail(req); io_req_set_res(req, ret, 0); return IOU_COMPLETE; } int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_buf_reg reg; struct io_buffer_list *bl; struct io_uring_region_desc rd; struct io_uring_buf_ring *br; unsigned long mmap_offset; unsigned long ring_size; int ret; lockdep_assert_held(&ctx->uring_lock); if (copy_from_user(&reg, arg, sizeof(reg))) return -EFAULT; if (!mem_is_zero(reg.resv, sizeof(reg.resv))) return -EINVAL; if (reg.flags & ~(IOU_PBUF_RING_MMAP | IOU_PBUF_RING_INC)) return -EINVAL; if (!is_power_of_2(reg.ring_entries)) return -EINVAL; /* cannot disambiguate full vs empty due to head/tail size */ if (reg.ring_entries >= 65536) return -EINVAL; bl = io_buffer_get_list(ctx, reg.bgid); if (bl) { /* if mapped buffer ring OR classic exists, don't allow */ if (bl->flags & IOBL_BUF_RING || !list_empty(&bl->buf_list)) return -EEXIST; io_destroy_bl(ctx, bl); } bl = kzalloc(sizeof(*bl), GFP_KERNEL_ACCOUNT); if (!bl) return -ENOMEM; mmap_offset = (unsigned long)reg.bgid << IORING_OFF_PBUF_SHIFT; ring_size = flex_array_size(br, bufs, reg.ring_entries); memset(&rd, 0, sizeof(rd)); rd.size = PAGE_ALIGN(ring_size); if (!(reg.flags & IOU_PBUF_RING_MMAP)) { rd.user_addr = reg.ring_addr; rd.flags |= IORING_MEM_REGION_TYPE_USER; } ret = io_create_region_mmap_safe(ctx, &bl->region, &rd, mmap_offset); if (ret) goto fail; br = io_region_get_ptr(&bl->region); #ifdef SHM_COLOUR /* * On platforms that have specific aliasing requirements, SHM_COLOUR * is set and we must guarantee that the kernel and user side align * nicely. We cannot do that if IOU_PBUF_RING_MMAP isn't set and * the application mmap's the provided ring buffer. Fail the request * if we, by chance, don't end up with aligned addresses. The app * should use IOU_PBUF_RING_MMAP instead, and liburing will handle * this transparently. */ if (!(reg.flags & IOU_PBUF_RING_MMAP) && ((reg.ring_addr | (unsigned long)br) & (SHM_COLOUR - 1))) { ret = -EINVAL; goto fail; } #endif bl->nr_entries = reg.ring_entries; bl->mask = reg.ring_entries - 1; bl->flags |= IOBL_BUF_RING; bl->buf_ring = br; if (reg.flags & IOU_PBUF_RING_INC) bl->flags |= IOBL_INC; io_buffer_add_list(ctx, bl, reg.bgid); return 0; fail: io_free_region(ctx, &bl->region); kfree(bl); return ret; } int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_buf_reg reg; struct io_buffer_list *bl; lockdep_assert_held(&ctx->uring_lock); if (copy_from_user(&reg, arg, sizeof(reg))) return -EFAULT; if (!mem_is_zero(reg.resv, sizeof(reg.resv)) || reg.flags) return -EINVAL; bl = io_buffer_get_list(ctx, reg.bgid); if (!bl) return -ENOENT; if (!(bl->flags & IOBL_BUF_RING)) return -EINVAL; scoped_guard(mutex, &ctx->mmap_lock) xa_erase(&ctx->io_bl_xa, bl->bgid); io_put_bl(ctx, bl); return 0; } int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg) { struct io_uring_buf_status buf_status; struct io_buffer_list *bl; if (copy_from_user(&buf_status, arg, sizeof(buf_status))) return -EFAULT; if (!mem_is_zero(buf_status.resv, sizeof(buf_status.resv))) return -EINVAL; bl = io_buffer_get_list(ctx, buf_status.buf_group); if (!bl) return -ENOENT; if (!(bl->flags & IOBL_BUF_RING)) return -EINVAL; buf_status.head = bl->head; if (copy_to_user(arg, &buf_status, sizeof(buf_status))) return -EFAULT; return 0; } struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx, unsigned int bgid) { struct io_buffer_list *bl; lockdep_assert_held(&ctx->mmap_lock); bl = xa_load(&ctx->io_bl_xa, bgid); if (!bl || !(bl->flags & IOBL_BUF_RING)) return NULL; return &bl->region; }
1 2 2 2 2 1 6 6 6 6 6 6 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 // SPDX-License-Identifier: GPL-2.0-or-later /* delayacct.c - per-task delay accounting * * Copyright (C) Shailabh Nagar, IBM Corp. 2006 */ #include <linux/sched.h> #include <linux/sched/task.h> #include <linux/sched/cputime.h> #include <linux/sched/clock.h> #include <linux/slab.h> #include <linux/taskstats.h> #include <linux/sysctl.h> #include <linux/delayacct.h> #include <linux/module.h> #define UPDATE_DELAY(type) \ do { \ d->type##_delay_max = tsk->delays->type##_delay_max; \ d->type##_delay_min = tsk->delays->type##_delay_min; \ tmp = d->type##_delay_total + tsk->delays->type##_delay; \ d->type##_delay_total = (tmp < d->type##_delay_total) ? 0 : tmp; \ d->type##_count += tsk->delays->type##_count; \ } while (0) DEFINE_STATIC_KEY_FALSE(delayacct_key); int delayacct_on __read_mostly; /* Delay accounting turned on/off */ struct kmem_cache *delayacct_cache; static void set_delayacct(bool enabled) { if (enabled) { static_branch_enable(&delayacct_key); delayacct_on = 1; } else { delayacct_on = 0; static_branch_disable(&delayacct_key); } } static int __init delayacct_setup_enable(char *str) { delayacct_on = 1; return 1; } __setup("delayacct", delayacct_setup_enable); void delayacct_init(void) { delayacct_cache = KMEM_CACHE(task_delay_info, SLAB_PANIC|SLAB_ACCOUNT); delayacct_tsk_init(&init_task); set_delayacct(delayacct_on); } #ifdef CONFIG_PROC_SYSCTL static int sysctl_delayacct(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int state = delayacct_on; struct ctl_table t; int err; if (write && !capable(CAP_SYS_ADMIN)) return -EPERM; t = *table; t.data = &state; err = proc_dointvec_minmax(&t, write, buffer, lenp, ppos); if (err < 0) return err; if (write) set_delayacct(state); return err; } static const struct ctl_table kern_delayacct_table[] = { { .procname = "task_delayacct", .data = NULL, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = sysctl_delayacct, .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, }; static __init int kernel_delayacct_sysctls_init(void) { register_sysctl_init("kernel", kern_delayacct_table); return 0; } late_initcall(kernel_delayacct_sysctls_init); #endif void __delayacct_tsk_init(struct task_struct *tsk) { tsk->delays = kmem_cache_zalloc(delayacct_cache, GFP_KERNEL); if (tsk->delays) raw_spin_lock_init(&tsk->delays->lock); } /* * Finish delay accounting for a statistic using its timestamps (@start), * accumulator (@total) and @count */ static void delayacct_end(raw_spinlock_t *lock, u64 *start, u64 *total, u32 *count, u64 *max, u64 *min) { s64 ns = local_clock() - *start; unsigned long flags; if (ns > 0) { raw_spin_lock_irqsave(lock, flags); *total += ns; (*count)++; if (ns > *max) *max = ns; if (*min == 0 || ns < *min) *min = ns; raw_spin_unlock_irqrestore(lock, flags); } } void __delayacct_blkio_start(void) { current->delays->blkio_start = local_clock(); } /* * We cannot rely on the `current` macro, as we haven't yet switched back to * the process being woken. */ void __delayacct_blkio_end(struct task_struct *p) { delayacct_end(&p->delays->lock, &p->delays->blkio_start, &p->delays->blkio_delay, &p->delays->blkio_count, &p->delays->blkio_delay_max, &p->delays->blkio_delay_min); } int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) { u64 utime, stime, stimescaled, utimescaled; unsigned long long t2, t3; unsigned long flags, t1; s64 tmp; task_cputime(tsk, &utime, &stime); tmp = (s64)d->cpu_run_real_total; tmp += utime + stime; d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; task_cputime_scaled(tsk, &utimescaled, &stimescaled); tmp = (s64)d->cpu_scaled_run_real_total; tmp += utimescaled + stimescaled; d->cpu_scaled_run_real_total = (tmp < (s64)d->cpu_scaled_run_real_total) ? 0 : tmp; /* * No locking available for sched_info (and too expensive to add one) * Mitigate by taking snapshot of values */ t1 = tsk->sched_info.pcount; t2 = tsk->sched_info.run_delay; t3 = tsk->se.sum_exec_runtime; d->cpu_count += t1; d->cpu_delay_max = tsk->sched_info.max_run_delay; d->cpu_delay_min = tsk->sched_info.min_run_delay; tmp = (s64)d->cpu_delay_total + t2; d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp; tmp = (s64)d->cpu_run_virtual_total + t3; d->cpu_run_virtual_total = (tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp; if (!tsk->delays) return 0; /* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */ raw_spin_lock_irqsave(&tsk->delays->lock, flags); UPDATE_DELAY(blkio); UPDATE_DELAY(swapin); UPDATE_DELAY(freepages); UPDATE_DELAY(thrashing); UPDATE_DELAY(compact); UPDATE_DELAY(wpcopy); UPDATE_DELAY(irq); raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return 0; } __u64 __delayacct_blkio_ticks(struct task_struct *tsk) { __u64 ret; unsigned long flags; raw_spin_lock_irqsave(&tsk->delays->lock, flags); ret = nsec_to_clock_t(tsk->delays->blkio_delay); raw_spin_unlock_irqrestore(&tsk->delays->lock, flags); return ret; } void __delayacct_freepages_start(void) { current->delays->freepages_start = local_clock(); } void __delayacct_freepages_end(void) { delayacct_end(&current->delays->lock, &current->delays->freepages_start, &current->delays->freepages_delay, &current->delays->freepages_count, &current->delays->freepages_delay_max, &current->delays->freepages_delay_min); } void __delayacct_thrashing_start(bool *in_thrashing) { *in_thrashing = !!current->in_thrashing; if (*in_thrashing) return; current->in_thrashing = 1; current->delays->thrashing_start = local_clock(); } void __delayacct_thrashing_end(bool *in_thrashing) { if (*in_thrashing) return; current->in_thrashing = 0; delayacct_end(&current->delays->lock, &current->delays->thrashing_start, &current->delays->thrashing_delay, &current->delays->thrashing_count, &current->delays->thrashing_delay_max, &current->delays->thrashing_delay_min); } void __delayacct_swapin_start(void) { current->delays->swapin_start = local_clock(); } void __delayacct_swapin_end(void) { delayacct_end(&current->delays->lock, &current->delays->swapin_start, &current->delays->swapin_delay, &current->delays->swapin_count, &current->delays->swapin_delay_max, &current->delays->swapin_delay_min); } void __delayacct_compact_start(void) { current->delays->compact_start = local_clock(); } void __delayacct_compact_end(void) { delayacct_end(&current->delays->lock, &current->delays->compact_start, &current->delays->compact_delay, &current->delays->compact_count, &current->delays->compact_delay_max, &current->delays->compact_delay_min); } void __delayacct_wpcopy_start(void) { current->delays->wpcopy_start = local_clock(); } void __delayacct_wpcopy_end(void) { delayacct_end(&current->delays->lock, &current->delays->wpcopy_start, &current->delays->wpcopy_delay, &current->delays->wpcopy_count, &current->delays->wpcopy_delay_max, &current->delays->wpcopy_delay_min); } void __delayacct_irq(struct task_struct *task, u32 delta) { unsigned long flags; raw_spin_lock_irqsave(&task->delays->lock, flags); task->delays->irq_delay += delta; task->delays->irq_count++; if (delta > task->delays->irq_delay_max) task->delays->irq_delay_max = delta; if (delta && (!task->delays->irq_delay_min || delta < task->delays->irq_delay_min)) task->delays->irq_delay_min = delta; raw_spin_unlock_irqrestore(&task->delays->lock, flags); }
19 19 19 19 19 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 // SPDX-License-Identifier: GPL-2.0 /* * fs/partitions/sgi.c * * Code extracted from drivers/block/genhd.c */ #include "check.h" #define SGI_LABEL_MAGIC 0x0be5a941 enum { LINUX_RAID_PARTITION = 0xfd, /* autodetect RAID partition */ }; struct sgi_disklabel { __be32 magic_mushroom; /* Big fat spliff... */ __be16 root_part_num; /* Root partition number */ __be16 swap_part_num; /* Swap partition number */ s8 boot_file[16]; /* Name of boot file for ARCS */ u8 _unused0[48]; /* Device parameter useless crapola.. */ struct sgi_volume { s8 name[8]; /* Name of volume */ __be32 block_num; /* Logical block number */ __be32 num_bytes; /* How big, in bytes */ } volume[15]; struct sgi_partition { __be32 num_blocks; /* Size in logical blocks */ __be32 first_block; /* First logical block */ __be32 type; /* Type of this partition */ } partitions[16]; __be32 csum; /* Disk label checksum */ __be32 _unused1; /* Padding */ }; int sgi_partition(struct parsed_partitions *state) { int i, csum; __be32 magic; int slot = 1; unsigned int start, blocks; __be32 *ui, cs; Sector sect; struct sgi_disklabel *label; struct sgi_partition *p; label = read_part_sector(state, 0, &sect); if (!label) return -1; p = &label->partitions[0]; magic = label->magic_mushroom; if(be32_to_cpu(magic) != SGI_LABEL_MAGIC) { put_dev_sector(sect); return 0; } ui = ((__be32 *) (label + 1)) - 1; for(csum = 0; ui >= ((__be32 *) label);) { cs = *ui--; csum += be32_to_cpu(cs); } if(csum) { printk(KERN_WARNING "Dev %s SGI disklabel: csum bad, label corrupted\n", state->disk->disk_name); put_dev_sector(sect); return 0; } /* All SGI disk labels have 16 partitions, disks under Linux only * have 15 minor's. Luckily there are always a few zero length * partitions which we don't care about so we never overflow the * current_minor. */ for(i = 0; i < 16; i++, p++) { blocks = be32_to_cpu(p->num_blocks); start = be32_to_cpu(p->first_block); if (blocks) { put_partition(state, slot, start, blocks); if (be32_to_cpu(p->type) == LINUX_RAID_PARTITION) state->parts[slot].flags = ADDPART_FLAG_RAID; } slot++; } strlcat(state->pp_buf, "\n", PAGE_SIZE); put_dev_sector(sect); return 1; }
2761 2757 2766 2751 2758 2763 11 2754 1 4 4 1 1 1 1 1 4 5405 5426 5393 5405 5410 5407 5410 5414 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 // SPDX-License-Identifier: GPL-2.0 #include <linux/irq_work.h> #include <linux/spinlock.h> #include <linux/task_work.h> #include <linux/resume_user_mode.h> static struct callback_head work_exited; /* all we need is ->next == NULL */ #ifdef CONFIG_IRQ_WORK static void task_work_set_notify_irq(struct irq_work *entry) { test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); } static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = IRQ_WORK_INIT_HARD(task_work_set_notify_irq); #endif /** * task_work_add - ask the @task to execute @work->func() * @task: the task which should run the callback * @work: the callback to run * @notify: how to notify the targeted task * * Queue @work for task_work_run() below and notify the @task if @notify * is @TWA_RESUME, @TWA_SIGNAL, @TWA_SIGNAL_NO_IPI or @TWA_NMI_CURRENT. * * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted * task and run the task_work, regardless of whether the task is currently * running in the kernel or userspace. * @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a * reschedule IPI to force the targeted task to reschedule and run task_work. * This can be advantageous if there's no strict requirement that the * task_work be run as soon as possible, just whenever the task enters the * kernel anyway. * @TWA_RESUME work is run only when the task exits the kernel and returns to * user mode, or before entering guest mode. * @TWA_NMI_CURRENT works like @TWA_RESUME, except it can only be used for the * current @task and if the current context is NMI. * * Fails if the @task is exiting/exited and thus it can't process this @work. * Otherwise @work->func() will be called when the @task goes through one of * the aforementioned transitions, or exits. * * If the targeted task is exiting, then an error is returned and the work item * is not queued. It's up to the caller to arrange for an alternative mechanism * in that case. * * Note: there is no ordering guarantee on works queued here. The task_work * list is LIFO. * * RETURNS: * 0 if succeeds or -ESRCH. */ int task_work_add(struct task_struct *task, struct callback_head *work, enum task_work_notify_mode notify) { struct callback_head *head; if (notify == TWA_NMI_CURRENT) { if (WARN_ON_ONCE(task != current)) return -EINVAL; if (!IS_ENABLED(CONFIG_IRQ_WORK)) return -EINVAL; } else { kasan_record_aux_stack(work); } head = READ_ONCE(task->task_works); do { if (unlikely(head == &work_exited)) return -ESRCH; work->next = head; } while (!try_cmpxchg(&task->task_works, &head, work)); switch (notify) { case TWA_NONE: break; case TWA_RESUME: set_notify_resume(task); break; case TWA_SIGNAL: set_notify_signal(task); break; case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; #ifdef CONFIG_IRQ_WORK case TWA_NMI_CURRENT: irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); break; #endif default: WARN_ON_ONCE(1); break; } return 0; } /** * task_work_cancel_match - cancel a pending work added by task_work_add() * @task: the task which should execute the work * @match: match function to call * @data: data to be passed in to match function * * RETURNS: * The found work or NULL if not found. */ struct callback_head * task_work_cancel_match(struct task_struct *task, bool (*match)(struct callback_head *, void *data), void *data) { struct callback_head **pprev = &task->task_works; struct callback_head *work; unsigned long flags; if (likely(!task_work_pending(task))) return NULL; /* * If cmpxchg() fails we continue without updating pprev. * Either we raced with task_work_add() which added the * new entry before this work, we will find it again. Or * we raced with task_work_run(), *pprev == NULL/exited. */ raw_spin_lock_irqsave(&task->pi_lock, flags); work = READ_ONCE(*pprev); while (work) { if (!match(work, data)) { pprev = &work->next; work = READ_ONCE(*pprev); } else if (try_cmpxchg(pprev, &work, work->next)) break; } raw_spin_unlock_irqrestore(&task->pi_lock, flags); return work; } static bool task_work_func_match(struct callback_head *cb, void *data) { return cb->func == data; } /** * task_work_cancel_func - cancel a pending work matching a function added by task_work_add() * @task: the task which should execute the func's work * @func: identifies the func to match with a work to remove * * Find the last queued pending work with ->func == @func and remove * it from queue. * * RETURNS: * The found work or NULL if not found. */ struct callback_head * task_work_cancel_func(struct task_struct *task, task_work_func_t func) { return task_work_cancel_match(task, task_work_func_match, func); } static bool task_work_match(struct callback_head *cb, void *data) { return cb == data; } /** * task_work_cancel - cancel a pending work added by task_work_add() * @task: the task which should execute the work * @cb: the callback to remove if queued * * Remove a callback from a task's queue if queued. * * RETURNS: * True if the callback was queued and got cancelled, false otherwise. */ bool task_work_cancel(struct task_struct *task, struct callback_head *cb) { struct callback_head *ret; ret = task_work_cancel_match(task, task_work_match, cb); return ret == cb; } /** * task_work_run - execute the works added by task_work_add() * * Flush the pending works. Should be used by the core kernel code. * Called before the task returns to the user-mode or stops, or when * it exits. In the latter case task_work_add() can no longer add the * new work after task_work_run() returns. */ void task_work_run(void) { struct task_struct *task = current; struct callback_head *work, *head, *next; for (;;) { /* * work->func() can do task_work_add(), do not set * work_exited unless the list is empty. */ work = READ_ONCE(task->task_works); do { head = NULL; if (!work) { if (task->flags & PF_EXITING) head = &work_exited; else break; } } while (!try_cmpxchg(&task->task_works, &work, head)); if (!work) break; /* * Synchronize with task_work_cancel_match(). It can not remove * the first entry == work, cmpxchg(task_works) must fail. * But it can remove another entry from the ->next list. */ raw_spin_lock_irq(&task->pi_lock); raw_spin_unlock_irq(&task->pi_lock); do { next = work->next; work->func(work); work = next; cond_resched(); } while (work); } }
24 24 24 24 2 2 1 1 23 23 4 2 2 2 2 24 24 19 21 21 20 20 21 20 19 19 18 19 20 25 3 3 25 22 1 21 21 21 21 21 21 21 1 1 3 3 3 3 4 4 4 4 4 22 22 22 16 3 24 23 23 23 23 23 22 22 22 23 16 24 24 24 24 24 24 24 24 16 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 // SPDX-License-Identifier: GPL-2.0-only #include <linux/ethtool.h> #include <linux/phy.h> #include "netlink.h" #include "common.h" struct strset_info { bool per_dev; bool free_strings; unsigned int count; const char (*strings)[ETH_GSTRING_LEN]; }; static const struct strset_info info_template[] = { [ETH_SS_TEST] = { .per_dev = true, }, [ETH_SS_STATS] = { .per_dev = true, }, [ETH_SS_PRIV_FLAGS] = { .per_dev = true, }, [ETH_SS_FEATURES] = { .per_dev = false, .count = ARRAY_SIZE(netdev_features_strings), .strings = netdev_features_strings, }, [ETH_SS_RSS_HASH_FUNCS] = { .per_dev = false, .count = ARRAY_SIZE(rss_hash_func_strings), .strings = rss_hash_func_strings, }, [ETH_SS_TUNABLES] = { .per_dev = false, .count = ARRAY_SIZE(tunable_strings), .strings = tunable_strings, }, [ETH_SS_PHY_STATS] = { .per_dev = true, }, [ETH_SS_PHY_TUNABLES] = { .per_dev = false, .count = ARRAY_SIZE(phy_tunable_strings), .strings = phy_tunable_strings, }, [ETH_SS_LINK_MODES] = { .per_dev = false, .count = __ETHTOOL_LINK_MODE_MASK_NBITS, .strings = link_mode_names, }, [ETH_SS_MSG_CLASSES] = { .per_dev = false, .count = NETIF_MSG_CLASS_COUNT, .strings = netif_msg_class_names, }, [ETH_SS_WOL_MODES] = { .per_dev = false, .count = WOL_MODE_COUNT, .strings = wol_mode_names, }, [ETH_SS_SOF_TIMESTAMPING] = { .per_dev = false, .count = __SOF_TIMESTAMPING_CNT, .strings = sof_timestamping_names, }, [ETH_SS_TS_TX_TYPES] = { .per_dev = false, .count = __HWTSTAMP_TX_CNT, .strings = ts_tx_type_names, }, [ETH_SS_TS_RX_FILTERS] = { .per_dev = false, .count = __HWTSTAMP_FILTER_CNT, .strings = ts_rx_filter_names, }, [ETH_SS_TS_FLAGS] = { .per_dev = false, .count = __HWTSTAMP_FLAG_CNT, .strings = ts_flags_names, }, [ETH_SS_UDP_TUNNEL_TYPES] = { .per_dev = false, .count = __ETHTOOL_UDP_TUNNEL_TYPE_CNT, .strings = udp_tunnel_type_names, }, [ETH_SS_STATS_STD] = { .per_dev = false, .count = __ETHTOOL_STATS_CNT, .strings = stats_std_names, }, [ETH_SS_STATS_ETH_PHY] = { .per_dev = false, .count = __ETHTOOL_A_STATS_ETH_PHY_CNT, .strings = stats_eth_phy_names, }, [ETH_SS_STATS_ETH_MAC] = { .per_dev = false, .count = __ETHTOOL_A_STATS_ETH_MAC_CNT, .strings = stats_eth_mac_names, }, [ETH_SS_STATS_ETH_CTRL] = { .per_dev = false, .count = __ETHTOOL_A_STATS_ETH_CTRL_CNT, .strings = stats_eth_ctrl_names, }, [ETH_SS_STATS_RMON] = { .per_dev = false, .count = __ETHTOOL_A_STATS_RMON_CNT, .strings = stats_rmon_names, }, [ETH_SS_STATS_PHY] = { .per_dev = false, .count = __ETHTOOL_A_STATS_PHY_CNT, .strings = stats_phy_names, }, }; struct strset_req_info { struct ethnl_req_info base; u32 req_ids; bool counts_only; }; #define STRSET_REQINFO(__req_base) \ container_of(__req_base, struct strset_req_info, base) struct strset_reply_data { struct ethnl_reply_data base; struct strset_info sets[ETH_SS_COUNT]; }; #define STRSET_REPDATA(__reply_base) \ container_of(__reply_base, struct strset_reply_data, base) const struct nla_policy ethnl_strset_get_policy[] = { [ETHTOOL_A_STRSET_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_phy), [ETHTOOL_A_STRSET_STRINGSETS] = { .type = NLA_NESTED }, [ETHTOOL_A_STRSET_COUNTS_ONLY] = { .type = NLA_FLAG }, }; static const struct nla_policy get_stringset_policy[] = { [ETHTOOL_A_STRINGSET_ID] = { .type = NLA_U32 }, }; /** * strset_include() - test if a string set should be included in reply * @info: parsed client request * @data: pointer to request data structure * @id: id of string set to check (ETH_SS_* constants) */ static bool strset_include(const struct strset_req_info *info, const struct strset_reply_data *data, u32 id) { bool per_dev; BUILD_BUG_ON(ETH_SS_COUNT >= BITS_PER_BYTE * sizeof(info->req_ids)); if (info->req_ids) return info->req_ids & (1U << id); per_dev = data->sets[id].per_dev; if (!per_dev && !data->sets[id].strings) return false; return data->base.dev ? per_dev : !per_dev; } static int strset_get_id(const struct nlattr *nest, u32 *val, struct netlink_ext_ack *extack) { struct nlattr *tb[ARRAY_SIZE(get_stringset_policy)]; int ret; ret = nla_parse_nested(tb, ARRAY_SIZE(get_stringset_policy) - 1, nest, get_stringset_policy, extack); if (ret < 0) return ret; if (NL_REQ_ATTR_CHECK(extack, nest, tb, ETHTOOL_A_STRINGSET_ID)) return -EINVAL; *val = nla_get_u32(tb[ETHTOOL_A_STRINGSET_ID]); return 0; } static const struct nla_policy strset_stringsets_policy[] = { [ETHTOOL_A_STRINGSETS_STRINGSET] = { .type = NLA_NESTED }, }; static int strset_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, struct netlink_ext_ack *extack) { struct strset_req_info *req_info = STRSET_REQINFO(req_base); struct nlattr *nest = tb[ETHTOOL_A_STRSET_STRINGSETS]; struct nlattr *attr; int rem, ret; if (!nest) return 0; ret = nla_validate_nested(nest, ARRAY_SIZE(strset_stringsets_policy) - 1, strset_stringsets_policy, extack); if (ret < 0) return ret; req_info->counts_only = tb[ETHTOOL_A_STRSET_COUNTS_ONLY]; nla_for_each_nested(attr, nest, rem) { u32 id; if (WARN_ONCE(nla_type(attr) != ETHTOOL_A_STRINGSETS_STRINGSET, "unexpected attrtype %u in ETHTOOL_A_STRSET_STRINGSETS\n", nla_type(attr))) return -EINVAL; ret = strset_get_id(attr, &id, extack); if (ret < 0) return ret; if (id >= ETH_SS_COUNT) { NL_SET_ERR_MSG_ATTR(extack, attr, "unknown string set id"); return -EOPNOTSUPP; } req_info->req_ids |= (1U << id); } return 0; } static void strset_cleanup_data(struct ethnl_reply_data *reply_base) { struct strset_reply_data *data = STRSET_REPDATA(reply_base); unsigned int i; for (i = 0; i < ETH_SS_COUNT; i++) if (data->sets[i].free_strings) { kfree(data->sets[i].strings); data->sets[i].strings = NULL; data->sets[i].free_strings = false; } } static int strset_prepare_set(struct strset_info *info, struct net_device *dev, struct phy_device *phydev, unsigned int id, bool counts_only) { const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops; const struct ethtool_ops *ops = dev->ethtool_ops; void *strings; int count, ret; if (id == ETH_SS_PHY_STATS && phydev && !ops->get_ethtool_phy_stats && phy_ops && phy_ops->get_sset_count) ret = phy_ops->get_sset_count(phydev); else if (ops->get_sset_count && ops->get_strings) ret = ops->get_sset_count(dev, id); else ret = -EOPNOTSUPP; if (ret <= 0) { info->count = 0; return 0; } count = ret; if (!counts_only) { strings = kcalloc(count, ETH_GSTRING_LEN, GFP_KERNEL); if (!strings) return -ENOMEM; if (id == ETH_SS_PHY_STATS && phydev && !ops->get_ethtool_phy_stats && phy_ops && phy_ops->get_strings) phy_ops->get_strings(phydev, strings); else ops->get_strings(dev, id, strings); info->strings = strings; info->free_strings = true; } info->count = count; return 0; } static int strset_prepare_data(const struct ethnl_req_info *req_base, struct ethnl_reply_data *reply_base, const struct genl_info *info) { const struct strset_req_info *req_info = STRSET_REQINFO(req_base); struct strset_reply_data *data = STRSET_REPDATA(reply_base); struct net_device *dev = reply_base->dev; struct nlattr **tb = info->attrs; struct phy_device *phydev; unsigned int i; int ret; BUILD_BUG_ON(ARRAY_SIZE(info_template) != ETH_SS_COUNT); memcpy(&data->sets, &info_template, sizeof(data->sets)); if (!dev) { for (i = 0; i < ETH_SS_COUNT; i++) { if ((req_info->req_ids & (1U << i)) && data->sets[i].per_dev) { GENL_SET_ERR_MSG(info, "requested per device strings without dev"); return -EINVAL; } } return 0; } phydev = ethnl_req_get_phydev(req_base, tb, ETHTOOL_A_HEADER_FLAGS, info->extack); /* phydev can be NULL, check for errors only */ if (IS_ERR(phydev)) return PTR_ERR(phydev); ret = ethnl_ops_begin(dev); if (ret < 0) goto err_strset; for (i = 0; i < ETH_SS_COUNT; i++) { if (!strset_include(req_info, data, i) || !data->sets[i].per_dev) continue; ret = strset_prepare_set(&data->sets[i], dev, phydev, i, req_info->counts_only); if (ret < 0) goto err_ops; } ethnl_ops_complete(dev); return 0; err_ops: ethnl_ops_complete(dev); err_strset: strset_cleanup_data(reply_base); return ret; } /* calculate size of ETHTOOL_A_STRSET_STRINGSET nest for one string set */ static int strset_set_size(const struct strset_info *info, bool counts_only) { unsigned int len = 0; unsigned int i; if (info->count == 0) return 0; if (counts_only) return nla_total_size(2 * nla_total_size(sizeof(u32))); for (i = 0; i < info->count; i++) { const char *str = info->strings[i]; /* ETHTOOL_A_STRING_INDEX, ETHTOOL_A_STRING_VALUE, nest */ len += nla_total_size(nla_total_size(sizeof(u32)) + ethnl_strz_size(str)); } /* ETHTOOL_A_STRINGSET_ID, ETHTOOL_A_STRINGSET_COUNT */ len = 2 * nla_total_size(sizeof(u32)) + nla_total_size(len); return nla_total_size(len); } static int strset_reply_size(const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct strset_req_info *req_info = STRSET_REQINFO(req_base); const struct strset_reply_data *data = STRSET_REPDATA(reply_base); unsigned int i; int len = 0; int ret; len += nla_total_size(0); /* ETHTOOL_A_STRSET_STRINGSETS */ for (i = 0; i < ETH_SS_COUNT; i++) { const struct strset_info *set_info = &data->sets[i]; if (!strset_include(req_info, data, i)) continue; ret = strset_set_size(set_info, req_info->counts_only); if (ret < 0) return ret; len += ret; } return len; } /* fill one string into reply */ static int strset_fill_string(struct sk_buff *skb, const struct strset_info *set_info, u32 idx) { struct nlattr *string_attr; const char *value; value = set_info->strings[idx]; string_attr = nla_nest_start(skb, ETHTOOL_A_STRINGS_STRING); if (!string_attr) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_STRING_INDEX, idx) || ethnl_put_strz(skb, ETHTOOL_A_STRING_VALUE, value)) goto nla_put_failure; nla_nest_end(skb, string_attr); return 0; nla_put_failure: nla_nest_cancel(skb, string_attr); return -EMSGSIZE; } /* fill one string set into reply */ static int strset_fill_set(struct sk_buff *skb, const struct strset_info *set_info, u32 id, bool counts_only) { struct nlattr *stringset_attr; struct nlattr *strings_attr; unsigned int i; if (!set_info->per_dev && !set_info->strings) return -EOPNOTSUPP; if (set_info->count == 0) return 0; stringset_attr = nla_nest_start(skb, ETHTOOL_A_STRINGSETS_STRINGSET); if (!stringset_attr) return -EMSGSIZE; if (nla_put_u32(skb, ETHTOOL_A_STRINGSET_ID, id) || nla_put_u32(skb, ETHTOOL_A_STRINGSET_COUNT, set_info->count)) goto nla_put_failure; if (!counts_only) { strings_attr = nla_nest_start(skb, ETHTOOL_A_STRINGSET_STRINGS); if (!strings_attr) goto nla_put_failure; for (i = 0; i < set_info->count; i++) { if (strset_fill_string(skb, set_info, i) < 0) goto nla_put_failure; } nla_nest_end(skb, strings_attr); } nla_nest_end(skb, stringset_attr); return 0; nla_put_failure: nla_nest_cancel(skb, stringset_attr); return -EMSGSIZE; } static int strset_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct strset_req_info *req_info = STRSET_REQINFO(req_base); const struct strset_reply_data *data = STRSET_REPDATA(reply_base); struct nlattr *nest; unsigned int i; int ret; nest = nla_nest_start(skb, ETHTOOL_A_STRSET_STRINGSETS); if (!nest) return -EMSGSIZE; for (i = 0; i < ETH_SS_COUNT; i++) { if (strset_include(req_info, data, i)) { ret = strset_fill_set(skb, &data->sets[i], i, req_info->counts_only); if (ret < 0) goto nla_put_failure; } } nla_nest_end(skb, nest); return 0; nla_put_failure: nla_nest_cancel(skb, nest); return ret; } const struct ethnl_request_ops ethnl_strset_request_ops = { .request_cmd = ETHTOOL_MSG_STRSET_GET, .reply_cmd = ETHTOOL_MSG_STRSET_GET_REPLY, .hdr_attr = ETHTOOL_A_STRSET_HEADER, .req_info_size = sizeof(struct strset_req_info), .reply_data_size = sizeof(struct strset_reply_data), .allow_nodev_do = true, .parse_request = strset_parse_request, .prepare_data = strset_prepare_data, .reply_size = strset_reply_size, .fill_reply = strset_fill_reply, .cleanup_data = strset_cleanup_data, };
3 48 4 4 3 3 3 3 48 4 3 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 // SPDX-License-Identifier: GPL-2.0-or-later /* * Hardware dependent layer * Copyright (c) by Jaroslav Kysela <perex@perex.cz> */ #include <linux/major.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/time.h> #include <linux/mutex.h> #include <linux/module.h> #include <linux/sched/signal.h> #include <sound/core.h> #include <sound/control.h> #include <sound/minors.h> #include <sound/hwdep.h> #include <sound/info.h> MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>"); MODULE_DESCRIPTION("Hardware dependent layer"); MODULE_LICENSE("GPL"); static LIST_HEAD(snd_hwdep_devices); static DEFINE_MUTEX(register_mutex); static int snd_hwdep_dev_free(struct snd_device *device); static int snd_hwdep_dev_register(struct snd_device *device); static int snd_hwdep_dev_disconnect(struct snd_device *device); static struct snd_hwdep *snd_hwdep_search(struct snd_card *card, int device) { struct snd_hwdep *hwdep; list_for_each_entry(hwdep, &snd_hwdep_devices, list) if (hwdep->card == card && hwdep->device == device) return hwdep; return NULL; } static loff_t snd_hwdep_llseek(struct file * file, loff_t offset, int orig) { struct snd_hwdep *hw = file->private_data; if (hw->ops.llseek) return hw->ops.llseek(hw, file, offset, orig); return -ENXIO; } static ssize_t snd_hwdep_read(struct file * file, char __user *buf, size_t count, loff_t *offset) { struct snd_hwdep *hw = file->private_data; if (hw->ops.read) return hw->ops.read(hw, buf, count, offset); return -ENXIO; } static ssize_t snd_hwdep_write(struct file * file, const char __user *buf, size_t count, loff_t *offset) { struct snd_hwdep *hw = file->private_data; if (hw->ops.write) return hw->ops.write(hw, buf, count, offset); return -ENXIO; } static int snd_hwdep_open(struct inode *inode, struct file * file) { int major = imajor(inode); struct snd_hwdep *hw; int err; wait_queue_entry_t wait; if (major == snd_major) { hw = snd_lookup_minor_data(iminor(inode), SNDRV_DEVICE_TYPE_HWDEP); #ifdef CONFIG_SND_OSSEMUL } else if (major == SOUND_MAJOR) { hw = snd_lookup_oss_minor_data(iminor(inode), SNDRV_OSS_DEVICE_TYPE_DMFM); #endif } else return -ENXIO; if (hw == NULL) return -ENODEV; if (!try_module_get(hw->card->module)) { snd_card_unref(hw->card); return -EFAULT; } init_waitqueue_entry(&wait, current); add_wait_queue(&hw->open_wait, &wait); mutex_lock(&hw->open_mutex); while (1) { if (hw->exclusive && hw->used > 0) { err = -EBUSY; break; } if (!hw->ops.open) { err = 0; break; } err = hw->ops.open(hw, file); if (err >= 0) break; if (err == -EAGAIN) { if (file->f_flags & O_NONBLOCK) { err = -EBUSY; break; } } else break; set_current_state(TASK_INTERRUPTIBLE); mutex_unlock(&hw->open_mutex); schedule(); mutex_lock(&hw->open_mutex); if (hw->card->shutdown) { err = -ENODEV; break; } if (signal_pending(current)) { err = -ERESTARTSYS; break; } } remove_wait_queue(&hw->open_wait, &wait); if (err >= 0) { err = snd_card_file_add(hw->card, file); if (err >= 0) { file->private_data = hw; hw->used++; } else { if (hw->ops.release) hw->ops.release(hw, file); } } mutex_unlock(&hw->open_mutex); if (err < 0) module_put(hw->card->module); snd_card_unref(hw->card); return err; } static int snd_hwdep_release(struct inode *inode, struct file * file) { int err = 0; struct snd_hwdep *hw = file->private_data; struct module *mod = hw->card->module; scoped_guard(mutex, &hw->open_mutex) { if (hw->ops.release) err = hw->ops.release(hw, file); if (hw->used > 0) hw->used--; } wake_up(&hw->open_wait); snd_card_file_remove(hw->card, file); module_put(mod); return err; } static __poll_t snd_hwdep_poll(struct file * file, poll_table * wait) { struct snd_hwdep *hw = file->private_data; if (hw->ops.poll) return hw->ops.poll(hw, file, wait); return 0; } static int snd_hwdep_info(struct snd_hwdep *hw, struct snd_hwdep_info __user *_info) { struct snd_hwdep_info info; memset(&info, 0, sizeof(info)); info.card = hw->card->number; strscpy(info.id, hw->id, sizeof(info.id)); strscpy(info.name, hw->name, sizeof(info.name)); info.iface = hw->iface; if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_hwdep_dsp_status(struct snd_hwdep *hw, struct snd_hwdep_dsp_status __user *_info) { struct snd_hwdep_dsp_status info; int err; if (! hw->ops.dsp_status) return -ENXIO; memset(&info, 0, sizeof(info)); info.dsp_loaded = hw->dsp_loaded; err = hw->ops.dsp_status(hw, &info); if (err < 0) return err; if (copy_to_user(_info, &info, sizeof(info))) return -EFAULT; return 0; } static int snd_hwdep_dsp_load(struct snd_hwdep *hw, struct snd_hwdep_dsp_image *info) { int err; if (! hw->ops.dsp_load) return -ENXIO; if (info->index >= 32) return -EINVAL; /* check whether the dsp was already loaded */ if (hw->dsp_loaded & (1u << info->index)) return -EBUSY; err = hw->ops.dsp_load(hw, info); if (err < 0) return err; hw->dsp_loaded |= (1u << info->index); return 0; } static int snd_hwdep_dsp_load_user(struct snd_hwdep *hw, struct snd_hwdep_dsp_image __user *_info) { struct snd_hwdep_dsp_image info = {}; if (copy_from_user(&info, _info, sizeof(info))) return -EFAULT; return snd_hwdep_dsp_load(hw, &info); } static long snd_hwdep_ioctl(struct file * file, unsigned int cmd, unsigned long arg) { struct snd_hwdep *hw = file->private_data; void __user *argp = (void __user *)arg; switch (cmd) { case SNDRV_HWDEP_IOCTL_PVERSION: return put_user(SNDRV_HWDEP_VERSION, (int __user *)argp); case SNDRV_HWDEP_IOCTL_INFO: return snd_hwdep_info(hw, argp); case SNDRV_HWDEP_IOCTL_DSP_STATUS: return snd_hwdep_dsp_status(hw, argp); case SNDRV_HWDEP_IOCTL_DSP_LOAD: return snd_hwdep_dsp_load_user(hw, argp); } if (hw->ops.ioctl) return hw->ops.ioctl(hw, file, cmd, arg); return -ENOTTY; } static int snd_hwdep_mmap(struct file * file, struct vm_area_struct * vma) { struct snd_hwdep *hw = file->private_data; if (hw->ops.mmap) return hw->ops.mmap(hw, file, vma); return -ENXIO; } static int snd_hwdep_control_ioctl(struct snd_card *card, struct snd_ctl_file * control, unsigned int cmd, unsigned long arg) { switch (cmd) { case SNDRV_CTL_IOCTL_HWDEP_NEXT_DEVICE: { int device; if (get_user(device, (int __user *)arg)) return -EFAULT; scoped_guard(mutex, &register_mutex) { if (device < 0) device = 0; else if (device < SNDRV_MINOR_HWDEPS) device++; else device = SNDRV_MINOR_HWDEPS; while (device < SNDRV_MINOR_HWDEPS) { if (snd_hwdep_search(card, device)) break; device++; } if (device >= SNDRV_MINOR_HWDEPS) device = -1; } if (put_user(device, (int __user *)arg)) return -EFAULT; return 0; } case SNDRV_CTL_IOCTL_HWDEP_INFO: { struct snd_hwdep_info __user *info = (struct snd_hwdep_info __user *)arg; int device; struct snd_hwdep *hwdep; if (get_user(device, &info->device)) return -EFAULT; scoped_guard(mutex, &register_mutex) { hwdep = snd_hwdep_search(card, device); if (!hwdep) return -ENXIO; return snd_hwdep_info(hwdep, info); } break; } } return -ENOIOCTLCMD; } #ifdef CONFIG_COMPAT #include "hwdep_compat.c" #else #define snd_hwdep_ioctl_compat NULL #endif /* */ static const struct file_operations snd_hwdep_f_ops = { .owner = THIS_MODULE, .llseek = snd_hwdep_llseek, .read = snd_hwdep_read, .write = snd_hwdep_write, .open = snd_hwdep_open, .release = snd_hwdep_release, .poll = snd_hwdep_poll, .unlocked_ioctl = snd_hwdep_ioctl, .compat_ioctl = snd_hwdep_ioctl_compat, .mmap = snd_hwdep_mmap, }; static void snd_hwdep_free(struct snd_hwdep *hwdep) { if (!hwdep) return; if (hwdep->private_free) hwdep->private_free(hwdep); put_device(hwdep->dev); kfree(hwdep); } /** * snd_hwdep_new - create a new hwdep instance * @card: the card instance * @id: the id string * @device: the device index (zero-based) * @rhwdep: the pointer to store the new hwdep instance * * Creates a new hwdep instance with the given index on the card. * The callbacks (hwdep->ops) must be set on the returned instance * after this call manually by the caller. * * Return: Zero if successful, or a negative error code on failure. */ int snd_hwdep_new(struct snd_card *card, char *id, int device, struct snd_hwdep **rhwdep) { struct snd_hwdep *hwdep; int err; static const struct snd_device_ops ops = { .dev_free = snd_hwdep_dev_free, .dev_register = snd_hwdep_dev_register, .dev_disconnect = snd_hwdep_dev_disconnect, }; if (snd_BUG_ON(!card)) return -ENXIO; if (rhwdep) *rhwdep = NULL; hwdep = kzalloc(sizeof(*hwdep), GFP_KERNEL); if (!hwdep) return -ENOMEM; init_waitqueue_head(&hwdep->open_wait); mutex_init(&hwdep->open_mutex); hwdep->card = card; hwdep->device = device; if (id) strscpy(hwdep->id, id, sizeof(hwdep->id)); err = snd_device_alloc(&hwdep->dev, card); if (err < 0) { snd_hwdep_free(hwdep); return err; } dev_set_name(hwdep->dev, "hwC%iD%i", card->number, device); #ifdef CONFIG_SND_OSSEMUL hwdep->oss_type = -1; #endif err = snd_device_new(card, SNDRV_DEV_HWDEP, hwdep, &ops); if (err < 0) { snd_hwdep_free(hwdep); return err; } if (rhwdep) *rhwdep = hwdep; return 0; } EXPORT_SYMBOL(snd_hwdep_new); static int snd_hwdep_dev_free(struct snd_device *device) { snd_hwdep_free(device->device_data); return 0; } static int snd_hwdep_dev_register(struct snd_device *device) { struct snd_hwdep *hwdep = device->device_data; struct snd_card *card = hwdep->card; int err; guard(mutex)(&register_mutex); if (snd_hwdep_search(card, hwdep->device)) return -EBUSY; list_add_tail(&hwdep->list, &snd_hwdep_devices); err = snd_register_device(SNDRV_DEVICE_TYPE_HWDEP, hwdep->card, hwdep->device, &snd_hwdep_f_ops, hwdep, hwdep->dev); if (err < 0) { dev_err(hwdep->dev, "unable to register\n"); list_del(&hwdep->list); return err; } #ifdef CONFIG_SND_OSSEMUL hwdep->ossreg = 0; if (hwdep->oss_type >= 0) { if (hwdep->oss_type == SNDRV_OSS_DEVICE_TYPE_DMFM && hwdep->device) dev_warn(hwdep->dev, "only hwdep device 0 can be registered as OSS direct FM device!\n"); else if (snd_register_oss_device(hwdep->oss_type, card, hwdep->device, &snd_hwdep_f_ops, hwdep) < 0) dev_warn(hwdep->dev, "unable to register OSS compatibility device\n"); else hwdep->ossreg = 1; } #endif return 0; } static int snd_hwdep_dev_disconnect(struct snd_device *device) { struct snd_hwdep *hwdep = device->device_data; if (snd_BUG_ON(!hwdep)) return -ENXIO; guard(mutex)(&register_mutex); if (snd_hwdep_search(hwdep->card, hwdep->device) != hwdep) return -EINVAL; guard(mutex)(&hwdep->open_mutex); wake_up(&hwdep->open_wait); #ifdef CONFIG_SND_OSSEMUL if (hwdep->ossreg) snd_unregister_oss_device(hwdep->oss_type, hwdep->card, hwdep->device); #endif snd_unregister_device(hwdep->dev); list_del_init(&hwdep->list); return 0; } #ifdef CONFIG_SND_PROC_FS /* * Info interface */ static void snd_hwdep_proc_read(struct snd_info_entry *entry, struct snd_info_buffer *buffer) { struct snd_hwdep *hwdep; guard(mutex)(&register_mutex); list_for_each_entry(hwdep, &snd_hwdep_devices, list) snd_iprintf(buffer, "%02i-%02i: %s\n", hwdep->card->number, hwdep->device, hwdep->name); } static struct snd_info_entry *snd_hwdep_proc_entry; static void __init snd_hwdep_proc_init(void) { struct snd_info_entry *entry; entry = snd_info_create_module_entry(THIS_MODULE, "hwdep", NULL); if (entry) { entry->c.text.read = snd_hwdep_proc_read; if (snd_info_register(entry) < 0) { snd_info_free_entry(entry); entry = NULL; } } snd_hwdep_proc_entry = entry; } static void __exit snd_hwdep_proc_done(void) { snd_info_free_entry(snd_hwdep_proc_entry); } #else /* !CONFIG_SND_PROC_FS */ #define snd_hwdep_proc_init() #define snd_hwdep_proc_done() #endif /* CONFIG_SND_PROC_FS */ /* * ENTRY functions */ static int __init alsa_hwdep_init(void) { snd_hwdep_proc_init(); snd_ctl_register_ioctl(snd_hwdep_control_ioctl); snd_ctl_register_ioctl_compat(snd_hwdep_control_ioctl); return 0; } static void __exit alsa_hwdep_exit(void) { snd_ctl_unregister_ioctl(snd_hwdep_control_ioctl); snd_ctl_unregister_ioctl_compat(snd_hwdep_control_ioctl); snd_hwdep_proc_done(); } module_init(alsa_hwdep_init) module_exit(alsa_hwdep_exit)
49 49 49 49 15 49 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 /* * videobuf2-vmalloc.c - vmalloc memory allocator for videobuf2 * * Copyright (C) 2010 Samsung Electronics * * Author: Pawel Osciak <pawel@osciak.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation. */ #include <linux/io.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/refcount.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <media/videobuf2-v4l2.h> #include <media/videobuf2-vmalloc.h> #include <media/videobuf2-memops.h> struct vb2_vmalloc_buf { void *vaddr; struct frame_vector *vec; enum dma_data_direction dma_dir; unsigned long size; refcount_t refcount; struct vb2_vmarea_handler handler; struct dma_buf *dbuf; }; static void vb2_vmalloc_put(void *buf_priv); static void *vb2_vmalloc_alloc(struct vb2_buffer *vb, struct device *dev, unsigned long size) { struct vb2_vmalloc_buf *buf; buf = kzalloc(sizeof(*buf), GFP_KERNEL | vb->vb2_queue->gfp_flags); if (!buf) return ERR_PTR(-ENOMEM); buf->size = size; buf->vaddr = vmalloc_user(buf->size); if (!buf->vaddr) { pr_debug("vmalloc of size %ld failed\n", buf->size); kfree(buf); return ERR_PTR(-ENOMEM); } buf->dma_dir = vb->vb2_queue->dma_dir; buf->handler.refcount = &buf->refcount; buf->handler.put = vb2_vmalloc_put; buf->handler.arg = buf; refcount_set(&buf->refcount, 1); return buf; } static void vb2_vmalloc_put(void *buf_priv) { struct vb2_vmalloc_buf *buf = buf_priv; if (refcount_dec_and_test(&buf->refcount)) { vfree(buf->vaddr); kfree(buf); } } static void *vb2_vmalloc_get_userptr(struct vb2_buffer *vb, struct device *dev, unsigned long vaddr, unsigned long size) { struct vb2_vmalloc_buf *buf; struct frame_vector *vec; int n_pages, offset, i; int ret = -ENOMEM; buf = kzalloc(sizeof(*buf), GFP_KERNEL); if (!buf) return ERR_PTR(-ENOMEM); buf->dma_dir = vb->vb2_queue->dma_dir; offset = vaddr & ~PAGE_MASK; buf->size = size; vec = vb2_create_framevec(vaddr, size, buf->dma_dir == DMA_FROM_DEVICE || buf->dma_dir == DMA_BIDIRECTIONAL); if (IS_ERR(vec)) { ret = PTR_ERR(vec); goto fail_pfnvec_create; } buf->vec = vec; n_pages = frame_vector_count(vec); if (frame_vector_to_pages(vec) < 0) { unsigned long *nums = frame_vector_pfns(vec); /* * We cannot get page pointers for these pfns. Check memory is * physically contiguous and use direct mapping. */ for (i = 1; i < n_pages; i++) if (nums[i-1] + 1 != nums[i]) goto fail_map; buf->vaddr = (__force void *) ioremap(__pfn_to_phys(nums[0]), size + offset); } else { buf->vaddr = vm_map_ram(frame_vector_pages(vec), n_pages, -1); } if (!buf->vaddr) goto fail_map; buf->vaddr += offset; return buf; fail_map: vb2_destroy_framevec(vec); fail_pfnvec_create: kfree(buf); return ERR_PTR(ret); } static void vb2_vmalloc_put_userptr(void *buf_priv) { struct vb2_vmalloc_buf *buf = buf_priv; unsigned long vaddr = (unsigned long)buf->vaddr & PAGE_MASK; unsigned int i; struct page **pages; unsigned int n_pages; if (!buf->vec->is_pfns) { n_pages = frame_vector_count(buf->vec); if (vaddr) vm_unmap_ram((void *)vaddr, n_pages); if (buf->dma_dir == DMA_FROM_DEVICE || buf->dma_dir == DMA_BIDIRECTIONAL) { pages = frame_vector_pages(buf->vec); if (!WARN_ON_ONCE(IS_ERR(pages))) for (i = 0; i < n_pages; i++) set_page_dirty_lock(pages[i]); } } else { iounmap((__force void __iomem *)buf->vaddr); } vb2_destroy_framevec(buf->vec); kfree(buf); } static void *vb2_vmalloc_vaddr(struct vb2_buffer *vb, void *buf_priv) { struct vb2_vmalloc_buf *buf = buf_priv; if (!buf->vaddr) { pr_err("Address of an unallocated plane requested or cannot map user pointer\n"); return NULL; } return buf->vaddr; } static unsigned int vb2_vmalloc_num_users(void *buf_priv) { struct vb2_vmalloc_buf *buf = buf_priv; return refcount_read(&buf->refcount); } static int vb2_vmalloc_mmap(void *buf_priv, struct vm_area_struct *vma) { struct vb2_vmalloc_buf *buf = buf_priv; int ret; if (!buf) { pr_err("No memory to map\n"); return -EINVAL; } ret = remap_vmalloc_range(vma, buf->vaddr, 0); if (ret) { pr_err("Remapping vmalloc memory, error: %d\n", ret); return ret; } /* * Make sure that vm_areas for 2 buffers won't be merged together */ vm_flags_set(vma, VM_DONTEXPAND); /* * Use common vm_area operations to track buffer refcount. */ vma->vm_private_data = &buf->handler; vma->vm_ops = &vb2_common_vm_ops; vma->vm_ops->open(vma); return 0; } #ifdef CONFIG_HAS_DMA /*********************************************/ /* DMABUF ops for exporters */ /*********************************************/ struct vb2_vmalloc_attachment { struct sg_table sgt; enum dma_data_direction dma_dir; }; static int vb2_vmalloc_dmabuf_ops_attach(struct dma_buf *dbuf, struct dma_buf_attachment *dbuf_attach) { struct vb2_vmalloc_attachment *attach; struct vb2_vmalloc_buf *buf = dbuf->priv; int num_pages = PAGE_ALIGN(buf->size) / PAGE_SIZE; struct sg_table *sgt; struct scatterlist *sg; void *vaddr = buf->vaddr; int ret; int i; attach = kzalloc(sizeof(*attach), GFP_KERNEL); if (!attach) return -ENOMEM; sgt = &attach->sgt; ret = sg_alloc_table(sgt, num_pages, GFP_KERNEL); if (ret) { kfree(attach); return ret; } for_each_sgtable_sg(sgt, sg, i) { struct page *page = vmalloc_to_page(vaddr); if (!page) { sg_free_table(sgt); kfree(attach); return -ENOMEM; } sg_set_page(sg, page, PAGE_SIZE, 0); vaddr += PAGE_SIZE; } attach->dma_dir = DMA_NONE; dbuf_attach->priv = attach; return 0; } static void vb2_vmalloc_dmabuf_ops_detach(struct dma_buf *dbuf, struct dma_buf_attachment *db_attach) { struct vb2_vmalloc_attachment *attach = db_attach->priv; struct sg_table *sgt; if (!attach) return; sgt = &attach->sgt; /* release the scatterlist cache */ if (attach->dma_dir != DMA_NONE) dma_unmap_sgtable(db_attach->dev, sgt, attach->dma_dir, 0); sg_free_table(sgt); kfree(attach); db_attach->priv = NULL; } static struct sg_table *vb2_vmalloc_dmabuf_ops_map( struct dma_buf_attachment *db_attach, enum dma_data_direction dma_dir) { struct vb2_vmalloc_attachment *attach = db_attach->priv; struct sg_table *sgt; sgt = &attach->sgt; /* return previously mapped sg table */ if (attach->dma_dir == dma_dir) return sgt; /* release any previous cache */ if (attach->dma_dir != DMA_NONE) { dma_unmap_sgtable(db_attach->dev, sgt, attach->dma_dir, 0); attach->dma_dir = DMA_NONE; } /* mapping to the client with new direction */ if (dma_map_sgtable(db_attach->dev, sgt, dma_dir, 0)) { pr_err("failed to map scatterlist\n"); return ERR_PTR(-EIO); } attach->dma_dir = dma_dir; return sgt; } static void vb2_vmalloc_dmabuf_ops_unmap(struct dma_buf_attachment *db_attach, struct sg_table *sgt, enum dma_data_direction dma_dir) { /* nothing to be done here */ } static void vb2_vmalloc_dmabuf_ops_release(struct dma_buf *dbuf) { /* drop reference obtained in vb2_vmalloc_get_dmabuf */ vb2_vmalloc_put(dbuf->priv); } static int vb2_vmalloc_dmabuf_ops_vmap(struct dma_buf *dbuf, struct iosys_map *map) { struct vb2_vmalloc_buf *buf = dbuf->priv; iosys_map_set_vaddr(map, buf->vaddr); return 0; } static int vb2_vmalloc_dmabuf_ops_mmap(struct dma_buf *dbuf, struct vm_area_struct *vma) { return vb2_vmalloc_mmap(dbuf->priv, vma); } static const struct dma_buf_ops vb2_vmalloc_dmabuf_ops = { .attach = vb2_vmalloc_dmabuf_ops_attach, .detach = vb2_vmalloc_dmabuf_ops_detach, .map_dma_buf = vb2_vmalloc_dmabuf_ops_map, .unmap_dma_buf = vb2_vmalloc_dmabuf_ops_unmap, .vmap = vb2_vmalloc_dmabuf_ops_vmap, .mmap = vb2_vmalloc_dmabuf_ops_mmap, .release = vb2_vmalloc_dmabuf_ops_release, }; static struct dma_buf *vb2_vmalloc_get_dmabuf(struct vb2_buffer *vb, void *buf_priv, unsigned long flags) { struct vb2_vmalloc_buf *buf = buf_priv; struct dma_buf *dbuf; DEFINE_DMA_BUF_EXPORT_INFO(exp_info); exp_info.ops = &vb2_vmalloc_dmabuf_ops; exp_info.size = buf->size; exp_info.flags = flags; exp_info.priv = buf; if (WARN_ON(!buf->vaddr)) return NULL; dbuf = dma_buf_export(&exp_info); if (IS_ERR(dbuf)) return NULL; /* dmabuf keeps reference to vb2 buffer */ refcount_inc(&buf->refcount); return dbuf; } #endif /* CONFIG_HAS_DMA */ /*********************************************/ /* callbacks for DMABUF buffers */ /*********************************************/ static int vb2_vmalloc_map_dmabuf(void *mem_priv) { struct vb2_vmalloc_buf *buf = mem_priv; struct iosys_map map; int ret; ret = dma_buf_vmap_unlocked(buf->dbuf, &map); if (ret) return -EFAULT; buf->vaddr = map.vaddr; return 0; } static void vb2_vmalloc_unmap_dmabuf(void *mem_priv) { struct vb2_vmalloc_buf *buf = mem_priv; struct iosys_map map = IOSYS_MAP_INIT_VADDR(buf->vaddr); dma_buf_vunmap_unlocked(buf->dbuf, &map); buf->vaddr = NULL; } static void vb2_vmalloc_detach_dmabuf(void *mem_priv) { struct vb2_vmalloc_buf *buf = mem_priv; struct iosys_map map = IOSYS_MAP_INIT_VADDR(buf->vaddr); if (buf->vaddr) dma_buf_vunmap_unlocked(buf->dbuf, &map); kfree(buf); } static void *vb2_vmalloc_attach_dmabuf(struct vb2_buffer *vb, struct device *dev, struct dma_buf *dbuf, unsigned long size) { struct vb2_vmalloc_buf *buf; if (dbuf->size < size) return ERR_PTR(-EFAULT); buf = kzalloc(sizeof(*buf), GFP_KERNEL); if (!buf) return ERR_PTR(-ENOMEM); buf->dbuf = dbuf; buf->dma_dir = vb->vb2_queue->dma_dir; buf->size = size; return buf; } const struct vb2_mem_ops vb2_vmalloc_memops = { .alloc = vb2_vmalloc_alloc, .put = vb2_vmalloc_put, .get_userptr = vb2_vmalloc_get_userptr, .put_userptr = vb2_vmalloc_put_userptr, #ifdef CONFIG_HAS_DMA .get_dmabuf = vb2_vmalloc_get_dmabuf, #endif .map_dmabuf = vb2_vmalloc_map_dmabuf, .unmap_dmabuf = vb2_vmalloc_unmap_dmabuf, .attach_dmabuf = vb2_vmalloc_attach_dmabuf, .detach_dmabuf = vb2_vmalloc_detach_dmabuf, .vaddr = vb2_vmalloc_vaddr, .mmap = vb2_vmalloc_mmap, .num_users = vb2_vmalloc_num_users, }; EXPORT_SYMBOL_GPL(vb2_vmalloc_memops); MODULE_DESCRIPTION("vmalloc memory handling routines for videobuf2"); MODULE_AUTHOR("Pawel Osciak <pawel@osciak.com>"); MODULE_LICENSE("GPL"); MODULE_IMPORT_NS("DMA_BUF");
2 4 4 3 3 3 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 // SPDX-License-Identifier: GPL-2.0 /* * Configfs interface for the NVMe target. * Copyright (c) 2015-2016 HGST, a Western Digital Company. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kstrtox.h> #include <linux/kernel.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/stat.h> #include <linux/ctype.h> #include <linux/pci.h> #include <linux/pci-p2pdma.h> #ifdef CONFIG_NVME_TARGET_AUTH #include <linux/nvme-auth.h> #endif #include <linux/nvme-keyring.h> #include <crypto/hash.h> #include <crypto/kpp.h> #include <linux/nospec.h> #include "nvmet.h" static const struct config_item_type nvmet_host_type; static const struct config_item_type nvmet_subsys_type; static LIST_HEAD(nvmet_ports_list); struct list_head *nvmet_ports = &nvmet_ports_list; struct nvmet_type_name_map { u8 type; const char *name; }; static struct nvmet_type_name_map nvmet_transport[] = { { NVMF_TRTYPE_RDMA, "rdma" }, { NVMF_TRTYPE_FC, "fc" }, { NVMF_TRTYPE_TCP, "tcp" }, { NVMF_TRTYPE_PCI, "pci" }, { NVMF_TRTYPE_LOOP, "loop" }, }; static const struct nvmet_type_name_map nvmet_addr_family[] = { { NVMF_ADDR_FAMILY_PCI, "pcie" }, { NVMF_ADDR_FAMILY_IP4, "ipv4" }, { NVMF_ADDR_FAMILY_IP6, "ipv6" }, { NVMF_ADDR_FAMILY_IB, "ib" }, { NVMF_ADDR_FAMILY_FC, "fc" }, { NVMF_ADDR_FAMILY_PCI, "pci" }, { NVMF_ADDR_FAMILY_LOOP, "loop" }, }; static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller) { if (p->enabled) pr_err("Disable port '%u' before changing attribute in %s\n", le16_to_cpu(p->disc_addr.portid), caller); return p->enabled; } /* * nvmet_port Generic ConfigFS definitions. * Used in any place in the ConfigFS tree that refers to an address. */ static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page) { u8 adrfam = to_nvmet_port(item)->disc_addr.adrfam; int i; for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { if (nvmet_addr_family[i].type == adrfam) return snprintf(page, PAGE_SIZE, "%s\n", nvmet_addr_family[i].name); } return snprintf(page, PAGE_SIZE, "\n"); } static ssize_t nvmet_addr_adrfam_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); int i; if (nvmet_is_port_enabled(port, __func__)) return -EACCES; for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { if (sysfs_streq(page, nvmet_addr_family[i].name)) goto found; } pr_err("Invalid value '%s' for adrfam\n", page); return -EINVAL; found: port->disc_addr.adrfam = nvmet_addr_family[i].type; return count; } CONFIGFS_ATTR(nvmet_, addr_adrfam); static ssize_t nvmet_addr_portid_show(struct config_item *item, char *page) { __le16 portid = to_nvmet_port(item)->disc_addr.portid; return snprintf(page, PAGE_SIZE, "%d\n", le16_to_cpu(portid)); } static ssize_t nvmet_addr_portid_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); u16 portid = 0; if (kstrtou16(page, 0, &portid)) { pr_err("Invalid value '%s' for portid\n", page); return -EINVAL; } if (nvmet_is_port_enabled(port, __func__)) return -EACCES; port->disc_addr.portid = cpu_to_le16(portid); return count; } CONFIGFS_ATTR(nvmet_, addr_portid); static ssize_t nvmet_addr_traddr_show(struct config_item *item, char *page) { struct nvmet_port *port = to_nvmet_port(item); return snprintf(page, PAGE_SIZE, "%s\n", port->disc_addr.traddr); } static ssize_t nvmet_addr_traddr_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); if (count > NVMF_TRADDR_SIZE) { pr_err("Invalid value '%s' for traddr\n", page); return -EINVAL; } if (nvmet_is_port_enabled(port, __func__)) return -EACCES; if (sscanf(page, "%s\n", port->disc_addr.traddr) != 1) return -EINVAL; return count; } CONFIGFS_ATTR(nvmet_, addr_traddr); static const struct nvmet_type_name_map nvmet_addr_treq[] = { { NVMF_TREQ_NOT_SPECIFIED, "not specified" }, { NVMF_TREQ_REQUIRED, "required" }, { NVMF_TREQ_NOT_REQUIRED, "not required" }, }; static inline u8 nvmet_port_disc_addr_treq_mask(struct nvmet_port *port) { return (port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK); } static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) { u8 treq = nvmet_port_disc_addr_treq_secure_channel(to_nvmet_port(item)); int i; for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { if (treq == nvmet_addr_treq[i].type) return snprintf(page, PAGE_SIZE, "%s\n", nvmet_addr_treq[i].name); } return snprintf(page, PAGE_SIZE, "\n"); } static ssize_t nvmet_addr_treq_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); u8 treq = nvmet_port_disc_addr_treq_mask(port); int i; if (nvmet_is_port_enabled(port, __func__)) return -EACCES; for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { if (sysfs_streq(page, nvmet_addr_treq[i].name)) goto found; } pr_err("Invalid value '%s' for treq\n", page); return -EINVAL; found: if (port->disc_addr.trtype == NVMF_TRTYPE_TCP && port->disc_addr.tsas.tcp.sectype == NVMF_TCP_SECTYPE_TLS13) { switch (nvmet_addr_treq[i].type) { case NVMF_TREQ_NOT_SPECIFIED: pr_debug("treq '%s' not allowed for TLS1.3\n", nvmet_addr_treq[i].name); return -EINVAL; case NVMF_TREQ_NOT_REQUIRED: pr_warn("Allow non-TLS connections while TLS1.3 is enabled\n"); break; default: break; } } treq |= nvmet_addr_treq[i].type; port->disc_addr.treq = treq; return count; } CONFIGFS_ATTR(nvmet_, addr_treq); static ssize_t nvmet_addr_trsvcid_show(struct config_item *item, char *page) { struct nvmet_port *port = to_nvmet_port(item); return snprintf(page, PAGE_SIZE, "%s\n", port->disc_addr.trsvcid); } static ssize_t nvmet_addr_trsvcid_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); if (count > NVMF_TRSVCID_SIZE) { pr_err("Invalid value '%s' for trsvcid\n", page); return -EINVAL; } if (nvmet_is_port_enabled(port, __func__)) return -EACCES; if (sscanf(page, "%s\n", port->disc_addr.trsvcid) != 1) return -EINVAL; return count; } CONFIGFS_ATTR(nvmet_, addr_trsvcid); static ssize_t nvmet_param_inline_data_size_show(struct config_item *item, char *page) { struct nvmet_port *port = to_nvmet_port(item); return snprintf(page, PAGE_SIZE, "%d\n", port->inline_data_size); } static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); int ret; if (nvmet_is_port_enabled(port, __func__)) return -EACCES; ret = kstrtoint(page, 0, &port->inline_data_size); if (ret) { pr_err("Invalid value '%s' for inline_data_size\n", page); return -EINVAL; } return count; } CONFIGFS_ATTR(nvmet_, param_inline_data_size); static ssize_t nvmet_param_max_queue_size_show(struct config_item *item, char *page) { struct nvmet_port *port = to_nvmet_port(item); return snprintf(page, PAGE_SIZE, "%d\n", port->max_queue_size); } static ssize_t nvmet_param_max_queue_size_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); int ret; if (nvmet_is_port_enabled(port, __func__)) return -EACCES; ret = kstrtoint(page, 0, &port->max_queue_size); if (ret) { pr_err("Invalid value '%s' for max_queue_size\n", page); return -EINVAL; } return count; } CONFIGFS_ATTR(nvmet_, param_max_queue_size); #ifdef CONFIG_BLK_DEV_INTEGRITY static ssize_t nvmet_param_pi_enable_show(struct config_item *item, char *page) { struct nvmet_port *port = to_nvmet_port(item); return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable); } static ssize_t nvmet_param_pi_enable_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); bool val; if (kstrtobool(page, &val)) return -EINVAL; if (nvmet_is_port_enabled(port, __func__)) return -EACCES; port->pi_enable = val; return count; } CONFIGFS_ATTR(nvmet_, param_pi_enable); #endif static ssize_t nvmet_addr_trtype_show(struct config_item *item, char *page) { struct nvmet_port *port = to_nvmet_port(item); int i; for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { if (port->disc_addr.trtype == nvmet_transport[i].type) return snprintf(page, PAGE_SIZE, "%s\n", nvmet_transport[i].name); } return sprintf(page, "\n"); } static void nvmet_port_init_tsas_rdma(struct nvmet_port *port) { port->disc_addr.tsas.rdma.qptype = NVMF_RDMA_QPTYPE_CONNECTED; port->disc_addr.tsas.rdma.prtype = NVMF_RDMA_PRTYPE_NOT_SPECIFIED; port->disc_addr.tsas.rdma.cms = NVMF_RDMA_CMS_RDMA_CM; } static void nvmet_port_init_tsas_tcp(struct nvmet_port *port, int sectype) { port->disc_addr.tsas.tcp.sectype = sectype; } static ssize_t nvmet_addr_trtype_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); int i; if (nvmet_is_port_enabled(port, __func__)) return -EACCES; for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { if (sysfs_streq(page, nvmet_transport[i].name)) goto found; } pr_err("Invalid value '%s' for trtype\n", page); return -EINVAL; found: memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); port->disc_addr.trtype = nvmet_transport[i].type; if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) nvmet_port_init_tsas_rdma(port); else if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) nvmet_port_init_tsas_tcp(port, NVMF_TCP_SECTYPE_NONE); return count; } CONFIGFS_ATTR(nvmet_, addr_trtype); static const struct nvmet_type_name_map nvmet_addr_tsas_tcp[] = { { NVMF_TCP_SECTYPE_NONE, "none" }, { NVMF_TCP_SECTYPE_TLS13, "tls1.3" }, }; static const struct nvmet_type_name_map nvmet_addr_tsas_rdma[] = { { NVMF_RDMA_QPTYPE_CONNECTED, "connected" }, { NVMF_RDMA_QPTYPE_DATAGRAM, "datagram" }, }; static ssize_t nvmet_addr_tsas_show(struct config_item *item, char *page) { struct nvmet_port *port = to_nvmet_port(item); int i; if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) { for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) { if (port->disc_addr.tsas.tcp.sectype == nvmet_addr_tsas_tcp[i].type) return sprintf(page, "%s\n", nvmet_addr_tsas_tcp[i].name); } } else if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) { for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_rdma); i++) { if (port->disc_addr.tsas.rdma.qptype == nvmet_addr_tsas_rdma[i].type) return sprintf(page, "%s\n", nvmet_addr_tsas_rdma[i].name); } } return sprintf(page, "\n"); } static u8 nvmet_addr_tsas_rdma_store(const char *page) { int i; for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_rdma); i++) { if (sysfs_streq(page, nvmet_addr_tsas_rdma[i].name)) return nvmet_addr_tsas_rdma[i].type; } return NVMF_RDMA_QPTYPE_INVALID; } static u8 nvmet_addr_tsas_tcp_store(const char *page) { int i; for (i = 0; i < ARRAY_SIZE(nvmet_addr_tsas_tcp); i++) { if (sysfs_streq(page, nvmet_addr_tsas_tcp[i].name)) return nvmet_addr_tsas_tcp[i].type; } return NVMF_TCP_SECTYPE_INVALID; } static ssize_t nvmet_addr_tsas_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); u8 treq = nvmet_port_disc_addr_treq_mask(port); u8 sectype, qptype; if (nvmet_is_port_enabled(port, __func__)) return -EACCES; if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) { qptype = nvmet_addr_tsas_rdma_store(page); if (qptype == port->disc_addr.tsas.rdma.qptype) return count; } else if (port->disc_addr.trtype == NVMF_TRTYPE_TCP) { sectype = nvmet_addr_tsas_tcp_store(page); if (sectype != NVMF_TCP_SECTYPE_INVALID) goto found; } pr_err("Invalid value '%s' for tsas\n", page); return -EINVAL; found: if (sectype == NVMF_TCP_SECTYPE_TLS13) { if (!IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS)) { pr_err("TLS is not supported\n"); return -EINVAL; } if (!port->keyring) { pr_err("TLS keyring not configured\n"); return -EINVAL; } } nvmet_port_init_tsas_tcp(port, sectype); /* * If TLS is enabled TREQ should be set to 'required' per default */ if (sectype == NVMF_TCP_SECTYPE_TLS13) { u8 sc = nvmet_port_disc_addr_treq_secure_channel(port); if (sc == NVMF_TREQ_NOT_SPECIFIED) treq |= NVMF_TREQ_REQUIRED; else treq |= sc; } else { treq |= NVMF_TREQ_NOT_SPECIFIED; } port->disc_addr.treq = treq; return count; } CONFIGFS_ATTR(nvmet_, addr_tsas); /* * Namespace structures & file operation functions below */ static ssize_t nvmet_ns_device_path_show(struct config_item *item, char *page) { return sprintf(page, "%s\n", to_nvmet_ns(item)->device_path); } static ssize_t nvmet_ns_device_path_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ns *ns = to_nvmet_ns(item); struct nvmet_subsys *subsys = ns->subsys; size_t len; int ret; mutex_lock(&subsys->lock); ret = -EBUSY; if (ns->enabled) goto out_unlock; ret = -EINVAL; len = strcspn(page, "\n"); if (!len) goto out_unlock; kfree(ns->device_path); ret = -ENOMEM; ns->device_path = kmemdup_nul(page, len, GFP_KERNEL); if (!ns->device_path) goto out_unlock; mutex_unlock(&subsys->lock); return count; out_unlock: mutex_unlock(&subsys->lock); return ret; } CONFIGFS_ATTR(nvmet_ns_, device_path); #ifdef CONFIG_PCI_P2PDMA static ssize_t nvmet_ns_p2pmem_show(struct config_item *item, char *page) { struct nvmet_ns *ns = to_nvmet_ns(item); return pci_p2pdma_enable_show(page, ns->p2p_dev, ns->use_p2pmem); } static ssize_t nvmet_ns_p2pmem_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ns *ns = to_nvmet_ns(item); struct pci_dev *p2p_dev = NULL; bool use_p2pmem; int ret = count; int error; mutex_lock(&ns->subsys->lock); if (ns->enabled) { ret = -EBUSY; goto out_unlock; } error = pci_p2pdma_enable_store(page, &p2p_dev, &use_p2pmem); if (error) { ret = error; goto out_unlock; } ns->use_p2pmem = use_p2pmem; pci_dev_put(ns->p2p_dev); ns->p2p_dev = p2p_dev; out_unlock: mutex_unlock(&ns->subsys->lock); return ret; } CONFIGFS_ATTR(nvmet_ns_, p2pmem); #endif /* CONFIG_PCI_P2PDMA */ static ssize_t nvmet_ns_device_uuid_show(struct config_item *item, char *page) { return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->uuid); } static ssize_t nvmet_ns_device_uuid_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ns *ns = to_nvmet_ns(item); struct nvmet_subsys *subsys = ns->subsys; int ret = 0; mutex_lock(&subsys->lock); if (ns->enabled) { ret = -EBUSY; goto out_unlock; } if (uuid_parse(page, &ns->uuid)) ret = -EINVAL; out_unlock: mutex_unlock(&subsys->lock); return ret ? ret : count; } CONFIGFS_ATTR(nvmet_ns_, device_uuid); static ssize_t nvmet_ns_device_nguid_show(struct config_item *item, char *page) { return sprintf(page, "%pUb\n", &to_nvmet_ns(item)->nguid); } static ssize_t nvmet_ns_device_nguid_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ns *ns = to_nvmet_ns(item); struct nvmet_subsys *subsys = ns->subsys; u8 nguid[16]; const char *p = page; int i; int ret = 0; mutex_lock(&subsys->lock); if (ns->enabled) { ret = -EBUSY; goto out_unlock; } for (i = 0; i < 16; i++) { if (p + 2 > page + count) { ret = -EINVAL; goto out_unlock; } if (!isxdigit(p[0]) || !isxdigit(p[1])) { ret = -EINVAL; goto out_unlock; } nguid[i] = (hex_to_bin(p[0]) << 4) | hex_to_bin(p[1]); p += 2; if (*p == '-' || *p == ':') p++; } memcpy(&ns->nguid, nguid, sizeof(nguid)); out_unlock: mutex_unlock(&subsys->lock); return ret ? ret : count; } CONFIGFS_ATTR(nvmet_ns_, device_nguid); static ssize_t nvmet_ns_ana_grpid_show(struct config_item *item, char *page) { return sprintf(page, "%u\n", to_nvmet_ns(item)->anagrpid); } static ssize_t nvmet_ns_ana_grpid_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ns *ns = to_nvmet_ns(item); u32 oldgrpid, newgrpid; int ret; ret = kstrtou32(page, 0, &newgrpid); if (ret) return ret; if (newgrpid < 1 || newgrpid > NVMET_MAX_ANAGRPS) return -EINVAL; down_write(&nvmet_ana_sem); oldgrpid = ns->anagrpid; newgrpid = array_index_nospec(newgrpid, NVMET_MAX_ANAGRPS); nvmet_ana_group_enabled[newgrpid]++; ns->anagrpid = newgrpid; nvmet_ana_group_enabled[oldgrpid]--; nvmet_ana_chgcnt++; up_write(&nvmet_ana_sem); nvmet_send_ana_event(ns->subsys, NULL); return count; } CONFIGFS_ATTR(nvmet_ns_, ana_grpid); static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page) { return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled); } static ssize_t nvmet_ns_enable_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ns *ns = to_nvmet_ns(item); bool enable; int ret = 0; if (kstrtobool(page, &enable)) return -EINVAL; /* * take a global nvmet_config_sem because the disable routine has a * window where it releases the subsys-lock, giving a chance to * a parallel enable to concurrently execute causing the disable to * have a misaccounting of the ns percpu_ref. */ down_write(&nvmet_config_sem); if (enable) ret = nvmet_ns_enable(ns); else nvmet_ns_disable(ns); up_write(&nvmet_config_sem); return ret ? ret : count; } CONFIGFS_ATTR(nvmet_ns_, enable); static ssize_t nvmet_ns_buffered_io_show(struct config_item *item, char *page) { return sprintf(page, "%d\n", to_nvmet_ns(item)->buffered_io); } static ssize_t nvmet_ns_buffered_io_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ns *ns = to_nvmet_ns(item); bool val; if (kstrtobool(page, &val)) return -EINVAL; mutex_lock(&ns->subsys->lock); if (ns->enabled) { pr_err("disable ns before setting buffered_io value.\n"); mutex_unlock(&ns->subsys->lock); return -EINVAL; } ns->buffered_io = val; mutex_unlock(&ns->subsys->lock); return count; } CONFIGFS_ATTR(nvmet_ns_, buffered_io); static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ns *ns = to_nvmet_ns(item); bool val; if (kstrtobool(page, &val)) return -EINVAL; if (!val) return -EINVAL; mutex_lock(&ns->subsys->lock); if (!ns->enabled) { pr_err("enable ns before revalidate.\n"); mutex_unlock(&ns->subsys->lock); return -EINVAL; } if (nvmet_ns_revalidate(ns)) nvmet_ns_changed(ns->subsys, ns->nsid); mutex_unlock(&ns->subsys->lock); return count; } CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size); static ssize_t nvmet_ns_resv_enable_show(struct config_item *item, char *page) { return sysfs_emit(page, "%d\n", to_nvmet_ns(item)->pr.enable); } static ssize_t nvmet_ns_resv_enable_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ns *ns = to_nvmet_ns(item); bool val; if (kstrtobool(page, &val)) return -EINVAL; mutex_lock(&ns->subsys->lock); if (ns->enabled) { pr_err("the ns:%d is already enabled.\n", ns->nsid); mutex_unlock(&ns->subsys->lock); return -EINVAL; } ns->pr.enable = val; mutex_unlock(&ns->subsys->lock); return count; } CONFIGFS_ATTR(nvmet_ns_, resv_enable); static struct configfs_attribute *nvmet_ns_attrs[] = { &nvmet_ns_attr_device_path, &nvmet_ns_attr_device_nguid, &nvmet_ns_attr_device_uuid, &nvmet_ns_attr_ana_grpid, &nvmet_ns_attr_enable, &nvmet_ns_attr_buffered_io, &nvmet_ns_attr_revalidate_size, &nvmet_ns_attr_resv_enable, #ifdef CONFIG_PCI_P2PDMA &nvmet_ns_attr_p2pmem, #endif NULL, }; static void nvmet_ns_release(struct config_item *item) { struct nvmet_ns *ns = to_nvmet_ns(item); nvmet_ns_free(ns); } static struct configfs_item_operations nvmet_ns_item_ops = { .release = nvmet_ns_release, }; static const struct config_item_type nvmet_ns_type = { .ct_item_ops = &nvmet_ns_item_ops, .ct_attrs = nvmet_ns_attrs, .ct_owner = THIS_MODULE, }; static struct config_group *nvmet_ns_make(struct config_group *group, const char *name) { struct nvmet_subsys *subsys = namespaces_to_subsys(&group->cg_item); struct nvmet_ns *ns; int ret; u32 nsid; ret = kstrtou32(name, 0, &nsid); if (ret) goto out; ret = -EINVAL; if (nsid == 0 || nsid == NVME_NSID_ALL) { pr_err("invalid nsid %#x", nsid); goto out; } ret = -ENOMEM; ns = nvmet_ns_alloc(subsys, nsid); if (!ns) goto out; config_group_init_type_name(&ns->group, name, &nvmet_ns_type); pr_info("adding nsid %d to subsystem %s\n", nsid, subsys->subsysnqn); return &ns->group; out: return ERR_PTR(ret); } static struct configfs_group_operations nvmet_namespaces_group_ops = { .make_group = nvmet_ns_make, }; static const struct config_item_type nvmet_namespaces_type = { .ct_group_ops = &nvmet_namespaces_group_ops, .ct_owner = THIS_MODULE, }; #ifdef CONFIG_NVME_TARGET_PASSTHRU static ssize_t nvmet_passthru_device_path_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item->ci_parent); return snprintf(page, PAGE_SIZE, "%s\n", subsys->passthru_ctrl_path); } static ssize_t nvmet_passthru_device_path_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item->ci_parent); size_t len; int ret; mutex_lock(&subsys->lock); ret = -EBUSY; if (subsys->passthru_ctrl) goto out_unlock; ret = -EINVAL; len = strcspn(page, "\n"); if (!len) goto out_unlock; kfree(subsys->passthru_ctrl_path); ret = -ENOMEM; subsys->passthru_ctrl_path = kstrndup(page, len, GFP_KERNEL); if (!subsys->passthru_ctrl_path) goto out_unlock; mutex_unlock(&subsys->lock); return count; out_unlock: mutex_unlock(&subsys->lock); return ret; } CONFIGFS_ATTR(nvmet_passthru_, device_path); static ssize_t nvmet_passthru_enable_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item->ci_parent); return sprintf(page, "%d\n", subsys->passthru_ctrl ? 1 : 0); } static ssize_t nvmet_passthru_enable_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item->ci_parent); bool enable; int ret = 0; if (kstrtobool(page, &enable)) return -EINVAL; if (enable) ret = nvmet_passthru_ctrl_enable(subsys); else nvmet_passthru_ctrl_disable(subsys); return ret ? ret : count; } CONFIGFS_ATTR(nvmet_passthru_, enable); static ssize_t nvmet_passthru_admin_timeout_show(struct config_item *item, char *page) { return sprintf(page, "%u\n", to_subsys(item->ci_parent)->admin_timeout); } static ssize_t nvmet_passthru_admin_timeout_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item->ci_parent); unsigned int timeout; if (kstrtouint(page, 0, &timeout)) return -EINVAL; subsys->admin_timeout = timeout; return count; } CONFIGFS_ATTR(nvmet_passthru_, admin_timeout); static ssize_t nvmet_passthru_io_timeout_show(struct config_item *item, char *page) { return sprintf(page, "%u\n", to_subsys(item->ci_parent)->io_timeout); } static ssize_t nvmet_passthru_io_timeout_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item->ci_parent); unsigned int timeout; if (kstrtouint(page, 0, &timeout)) return -EINVAL; subsys->io_timeout = timeout; return count; } CONFIGFS_ATTR(nvmet_passthru_, io_timeout); static ssize_t nvmet_passthru_clear_ids_show(struct config_item *item, char *page) { return sprintf(page, "%u\n", to_subsys(item->ci_parent)->clear_ids); } static ssize_t nvmet_passthru_clear_ids_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item->ci_parent); unsigned int clear_ids; if (kstrtouint(page, 0, &clear_ids)) return -EINVAL; subsys->clear_ids = clear_ids; return count; } CONFIGFS_ATTR(nvmet_passthru_, clear_ids); static struct configfs_attribute *nvmet_passthru_attrs[] = { &nvmet_passthru_attr_device_path, &nvmet_passthru_attr_enable, &nvmet_passthru_attr_admin_timeout, &nvmet_passthru_attr_io_timeout, &nvmet_passthru_attr_clear_ids, NULL, }; static const struct config_item_type nvmet_passthru_type = { .ct_attrs = nvmet_passthru_attrs, .ct_owner = THIS_MODULE, }; static void nvmet_add_passthru_group(struct nvmet_subsys *subsys) { config_group_init_type_name(&subsys->passthru_group, "passthru", &nvmet_passthru_type); configfs_add_default_group(&subsys->passthru_group, &subsys->group); } #else /* CONFIG_NVME_TARGET_PASSTHRU */ static void nvmet_add_passthru_group(struct nvmet_subsys *subsys) { } #endif /* CONFIG_NVME_TARGET_PASSTHRU */ static int nvmet_port_subsys_allow_link(struct config_item *parent, struct config_item *target) { struct nvmet_port *port = to_nvmet_port(parent->ci_parent); struct nvmet_subsys *subsys; struct nvmet_subsys_link *link, *p; int ret; if (target->ci_type != &nvmet_subsys_type) { pr_err("can only link subsystems into the subsystems dir.!\n"); return -EINVAL; } subsys = to_subsys(target); link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) return -ENOMEM; link->subsys = subsys; down_write(&nvmet_config_sem); ret = -EEXIST; list_for_each_entry(p, &port->subsystems, entry) { if (p->subsys == subsys) goto out_free_link; } if (list_empty(&port->subsystems)) { ret = nvmet_enable_port(port); if (ret) goto out_free_link; } list_add_tail(&link->entry, &port->subsystems); nvmet_port_disc_changed(port, subsys); up_write(&nvmet_config_sem); return 0; out_free_link: up_write(&nvmet_config_sem); kfree(link); return ret; } static void nvmet_port_subsys_drop_link(struct config_item *parent, struct config_item *target) { struct nvmet_port *port = to_nvmet_port(parent->ci_parent); struct nvmet_subsys *subsys = to_subsys(target); struct nvmet_subsys_link *p; down_write(&nvmet_config_sem); list_for_each_entry(p, &port->subsystems, entry) { if (p->subsys == subsys) goto found; } up_write(&nvmet_config_sem); return; found: list_del(&p->entry); nvmet_port_del_ctrls(port, subsys); nvmet_port_disc_changed(port, subsys); if (list_empty(&port->subsystems)) nvmet_disable_port(port); up_write(&nvmet_config_sem); kfree(p); } static struct configfs_item_operations nvmet_port_subsys_item_ops = { .allow_link = nvmet_port_subsys_allow_link, .drop_link = nvmet_port_subsys_drop_link, }; static const struct config_item_type nvmet_port_subsys_type = { .ct_item_ops = &nvmet_port_subsys_item_ops, .ct_owner = THIS_MODULE, }; static int nvmet_allowed_hosts_allow_link(struct config_item *parent, struct config_item *target) { struct nvmet_subsys *subsys = to_subsys(parent->ci_parent); struct nvmet_host *host; struct nvmet_host_link *link, *p; int ret; if (target->ci_type != &nvmet_host_type) { pr_err("can only link hosts into the allowed_hosts directory!\n"); return -EINVAL; } host = to_host(target); link = kmalloc(sizeof(*link), GFP_KERNEL); if (!link) return -ENOMEM; link->host = host; down_write(&nvmet_config_sem); ret = -EINVAL; if (subsys->allow_any_host) { pr_err("can't add hosts when allow_any_host is set!\n"); goto out_free_link; } ret = -EEXIST; list_for_each_entry(p, &subsys->hosts, entry) { if (!strcmp(nvmet_host_name(p->host), nvmet_host_name(host))) goto out_free_link; } list_add_tail(&link->entry, &subsys->hosts); nvmet_subsys_disc_changed(subsys, host); up_write(&nvmet_config_sem); return 0; out_free_link: up_write(&nvmet_config_sem); kfree(link); return ret; } static void nvmet_allowed_hosts_drop_link(struct config_item *parent, struct config_item *target) { struct nvmet_subsys *subsys = to_subsys(parent->ci_parent); struct nvmet_host *host = to_host(target); struct nvmet_host_link *p; down_write(&nvmet_config_sem); list_for_each_entry(p, &subsys->hosts, entry) { if (!strcmp(nvmet_host_name(p->host), nvmet_host_name(host))) goto found; } up_write(&nvmet_config_sem); return; found: list_del(&p->entry); nvmet_subsys_disc_changed(subsys, host); up_write(&nvmet_config_sem); kfree(p); } static struct configfs_item_operations nvmet_allowed_hosts_item_ops = { .allow_link = nvmet_allowed_hosts_allow_link, .drop_link = nvmet_allowed_hosts_drop_link, }; static const struct config_item_type nvmet_allowed_hosts_type = { .ct_item_ops = &nvmet_allowed_hosts_item_ops, .ct_owner = THIS_MODULE, }; static ssize_t nvmet_subsys_attr_allow_any_host_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->allow_any_host); } static ssize_t nvmet_subsys_attr_allow_any_host_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item); bool allow_any_host; int ret = 0; if (kstrtobool(page, &allow_any_host)) return -EINVAL; down_write(&nvmet_config_sem); if (allow_any_host && !list_empty(&subsys->hosts)) { pr_err("Can't set allow_any_host when explicit hosts are set!\n"); ret = -EINVAL; goto out_unlock; } if (subsys->allow_any_host != allow_any_host) { subsys->allow_any_host = allow_any_host; nvmet_subsys_disc_changed(subsys, NULL); } out_unlock: up_write(&nvmet_config_sem); return ret ? ret : count; } CONFIGFS_ATTR(nvmet_subsys_, attr_allow_any_host); static ssize_t nvmet_subsys_attr_version_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item); if (NVME_TERTIARY(subsys->ver)) return snprintf(page, PAGE_SIZE, "%llu.%llu.%llu\n", NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver), NVME_TERTIARY(subsys->ver)); return snprintf(page, PAGE_SIZE, "%llu.%llu\n", NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver)); } static ssize_t nvmet_subsys_attr_version_store_locked(struct nvmet_subsys *subsys, const char *page, size_t count) { int major, minor, tertiary = 0; int ret; if (subsys->subsys_discovered) { if (NVME_TERTIARY(subsys->ver)) pr_err("Can't set version number. %llu.%llu.%llu is already assigned\n", NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver), NVME_TERTIARY(subsys->ver)); else pr_err("Can't set version number. %llu.%llu is already assigned\n", NVME_MAJOR(subsys->ver), NVME_MINOR(subsys->ver)); return -EINVAL; } /* passthru subsystems use the underlying controller's version */ if (nvmet_is_passthru_subsys(subsys)) return -EINVAL; ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary); if (ret != 2 && ret != 3) return -EINVAL; subsys->ver = NVME_VS(major, minor, tertiary); return count; } static ssize_t nvmet_subsys_attr_version_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item); ssize_t ret; down_write(&nvmet_config_sem); mutex_lock(&subsys->lock); ret = nvmet_subsys_attr_version_store_locked(subsys, page, count); mutex_unlock(&subsys->lock); up_write(&nvmet_config_sem); return ret; } CONFIGFS_ATTR(nvmet_subsys_, attr_version); /* See Section 1.5 of NVMe 1.4 */ static bool nvmet_is_ascii(const char c) { return c >= 0x20 && c <= 0x7e; } static ssize_t nvmet_subsys_attr_serial_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item); return snprintf(page, PAGE_SIZE, "%.*s\n", NVMET_SN_MAX_SIZE, subsys->serial); } static ssize_t nvmet_subsys_attr_serial_store_locked(struct nvmet_subsys *subsys, const char *page, size_t count) { int pos, len = strcspn(page, "\n"); if (subsys->subsys_discovered) { pr_err("Can't set serial number. %s is already assigned\n", subsys->serial); return -EINVAL; } if (!len || len > NVMET_SN_MAX_SIZE) { pr_err("Serial Number can not be empty or exceed %d Bytes\n", NVMET_SN_MAX_SIZE); return -EINVAL; } for (pos = 0; pos < len; pos++) { if (!nvmet_is_ascii(page[pos])) { pr_err("Serial Number must contain only ASCII strings\n"); return -EINVAL; } } memcpy_and_pad(subsys->serial, NVMET_SN_MAX_SIZE, page, len, ' '); return count; } static ssize_t nvmet_subsys_attr_serial_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item); ssize_t ret; down_write(&nvmet_config_sem); mutex_lock(&subsys->lock); ret = nvmet_subsys_attr_serial_store_locked(subsys, page, count); mutex_unlock(&subsys->lock); up_write(&nvmet_config_sem); return ret; } CONFIGFS_ATTR(nvmet_subsys_, attr_serial); static ssize_t nvmet_subsys_attr_cntlid_min_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_min); } static ssize_t nvmet_subsys_attr_cntlid_min_store(struct config_item *item, const char *page, size_t cnt) { u16 cntlid_min; if (sscanf(page, "%hu\n", &cntlid_min) != 1) return -EINVAL; if (cntlid_min == 0) return -EINVAL; down_write(&nvmet_config_sem); if (cntlid_min > to_subsys(item)->cntlid_max) goto out_unlock; to_subsys(item)->cntlid_min = cntlid_min; up_write(&nvmet_config_sem); return cnt; out_unlock: up_write(&nvmet_config_sem); return -EINVAL; } CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_min); static ssize_t nvmet_subsys_attr_cntlid_max_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->cntlid_max); } static ssize_t nvmet_subsys_attr_cntlid_max_store(struct config_item *item, const char *page, size_t cnt) { u16 cntlid_max; if (sscanf(page, "%hu\n", &cntlid_max) != 1) return -EINVAL; if (cntlid_max == 0) return -EINVAL; down_write(&nvmet_config_sem); if (cntlid_max < to_subsys(item)->cntlid_min) goto out_unlock; to_subsys(item)->cntlid_max = cntlid_max; up_write(&nvmet_config_sem); return cnt; out_unlock: up_write(&nvmet_config_sem); return -EINVAL; } CONFIGFS_ATTR(nvmet_subsys_, attr_cntlid_max); static ssize_t nvmet_subsys_attr_vendor_id_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, "0x%x\n", to_subsys(item)->vendor_id); } static ssize_t nvmet_subsys_attr_vendor_id_store(struct config_item *item, const char *page, size_t count) { u16 vid; if (kstrtou16(page, 0, &vid)) return -EINVAL; down_write(&nvmet_config_sem); to_subsys(item)->vendor_id = vid; up_write(&nvmet_config_sem); return count; } CONFIGFS_ATTR(nvmet_subsys_, attr_vendor_id); static ssize_t nvmet_subsys_attr_subsys_vendor_id_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, "0x%x\n", to_subsys(item)->subsys_vendor_id); } static ssize_t nvmet_subsys_attr_subsys_vendor_id_store(struct config_item *item, const char *page, size_t count) { u16 ssvid; if (kstrtou16(page, 0, &ssvid)) return -EINVAL; down_write(&nvmet_config_sem); to_subsys(item)->subsys_vendor_id = ssvid; up_write(&nvmet_config_sem); return count; } CONFIGFS_ATTR(nvmet_subsys_, attr_subsys_vendor_id); static ssize_t nvmet_subsys_attr_model_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item); return snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number); } static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys, const char *page, size_t count) { int pos = 0, len; char *val; if (subsys->subsys_discovered) { pr_err("Can't set model number. %s is already assigned\n", subsys->model_number); return -EINVAL; } len = strcspn(page, "\n"); if (!len) return -EINVAL; if (len > NVMET_MN_MAX_SIZE) { pr_err("Model number size can not exceed %d Bytes\n", NVMET_MN_MAX_SIZE); return -EINVAL; } for (pos = 0; pos < len; pos++) { if (!nvmet_is_ascii(page[pos])) return -EINVAL; } val = kmemdup_nul(page, len, GFP_KERNEL); if (!val) return -ENOMEM; kfree(subsys->model_number); subsys->model_number = val; return count; } static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item); ssize_t ret; down_write(&nvmet_config_sem); mutex_lock(&subsys->lock); ret = nvmet_subsys_attr_model_store_locked(subsys, page, count); mutex_unlock(&subsys->lock); up_write(&nvmet_config_sem); return ret; } CONFIGFS_ATTR(nvmet_subsys_, attr_model); static ssize_t nvmet_subsys_attr_ieee_oui_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item); return sysfs_emit(page, "0x%06x\n", subsys->ieee_oui); } static ssize_t nvmet_subsys_attr_ieee_oui_store_locked(struct nvmet_subsys *subsys, const char *page, size_t count) { uint32_t val = 0; int ret; if (subsys->subsys_discovered) { pr_err("Can't set IEEE OUI. 0x%06x is already assigned\n", subsys->ieee_oui); return -EINVAL; } ret = kstrtou32(page, 0, &val); if (ret < 0) return ret; if (val >= 0x1000000) return -EINVAL; subsys->ieee_oui = val; return count; } static ssize_t nvmet_subsys_attr_ieee_oui_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item); ssize_t ret; down_write(&nvmet_config_sem); mutex_lock(&subsys->lock); ret = nvmet_subsys_attr_ieee_oui_store_locked(subsys, page, count); mutex_unlock(&subsys->lock); up_write(&nvmet_config_sem); return ret; } CONFIGFS_ATTR(nvmet_subsys_, attr_ieee_oui); static ssize_t nvmet_subsys_attr_firmware_show(struct config_item *item, char *page) { struct nvmet_subsys *subsys = to_subsys(item); return sysfs_emit(page, "%s\n", subsys->firmware_rev); } static ssize_t nvmet_subsys_attr_firmware_store_locked(struct nvmet_subsys *subsys, const char *page, size_t count) { int pos = 0, len; char *val; if (subsys->subsys_discovered) { pr_err("Can't set firmware revision. %s is already assigned\n", subsys->firmware_rev); return -EINVAL; } len = strcspn(page, "\n"); if (!len) return -EINVAL; if (len > NVMET_FR_MAX_SIZE) { pr_err("Firmware revision size can not exceed %d Bytes\n", NVMET_FR_MAX_SIZE); return -EINVAL; } for (pos = 0; pos < len; pos++) { if (!nvmet_is_ascii(page[pos])) return -EINVAL; } val = kmemdup_nul(page, len, GFP_KERNEL); if (!val) return -ENOMEM; kfree(subsys->firmware_rev); subsys->firmware_rev = val; return count; } static ssize_t nvmet_subsys_attr_firmware_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item); ssize_t ret; down_write(&nvmet_config_sem); mutex_lock(&subsys->lock); ret = nvmet_subsys_attr_firmware_store_locked(subsys, page, count); mutex_unlock(&subsys->lock); up_write(&nvmet_config_sem); return ret; } CONFIGFS_ATTR(nvmet_subsys_, attr_firmware); #ifdef CONFIG_BLK_DEV_INTEGRITY static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support); } static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item, const char *page, size_t count) { struct nvmet_subsys *subsys = to_subsys(item); bool pi_enable; if (kstrtobool(page, &pi_enable)) return -EINVAL; subsys->pi_support = pi_enable; return count; } CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable); #endif static ssize_t nvmet_subsys_attr_qid_max_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, "%u\n", to_subsys(item)->max_qid); } static ssize_t nvmet_subsys_attr_qid_max_store(struct config_item *item, const char *page, size_t cnt) { struct nvmet_subsys *subsys = to_subsys(item); struct nvmet_ctrl *ctrl; u16 qid_max; if (sscanf(page, "%hu\n", &qid_max) != 1) return -EINVAL; if (qid_max < 1 || qid_max > NVMET_NR_QUEUES) return -EINVAL; down_write(&nvmet_config_sem); subsys->max_qid = qid_max; /* Force reconnect */ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) ctrl->ops->delete_ctrl(ctrl); up_write(&nvmet_config_sem); return cnt; } CONFIGFS_ATTR(nvmet_subsys_, attr_qid_max); static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_version, &nvmet_subsys_attr_attr_serial, &nvmet_subsys_attr_attr_cntlid_min, &nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_vendor_id, &nvmet_subsys_attr_attr_subsys_vendor_id, &nvmet_subsys_attr_attr_model, &nvmet_subsys_attr_attr_qid_max, &nvmet_subsys_attr_attr_ieee_oui, &nvmet_subsys_attr_attr_firmware, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_subsys_attr_attr_pi_enable, #endif NULL, }; /* * Subsystem structures & folder operation functions below */ static void nvmet_subsys_release(struct config_item *item) { struct nvmet_subsys *subsys = to_subsys(item); nvmet_subsys_del_ctrls(subsys); nvmet_subsys_put(subsys); } static struct configfs_item_operations nvmet_subsys_item_ops = { .release = nvmet_subsys_release, }; static const struct config_item_type nvmet_subsys_type = { .ct_item_ops = &nvmet_subsys_item_ops, .ct_attrs = nvmet_subsys_attrs, .ct_owner = THIS_MODULE, }; static struct config_group *nvmet_subsys_make(struct config_group *group, const char *name) { struct nvmet_subsys *subsys; if (sysfs_streq(name, NVME_DISC_SUBSYS_NAME)) { pr_err("can't create discovery subsystem through configfs\n"); return ERR_PTR(-EINVAL); } if (sysfs_streq(name, nvmet_disc_subsys->subsysnqn)) { pr_err("can't create subsystem using unique discovery NQN\n"); return ERR_PTR(-EINVAL); } subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME); if (IS_ERR(subsys)) return ERR_CAST(subsys); config_group_init_type_name(&subsys->group, name, &nvmet_subsys_type); config_group_init_type_name(&subsys->namespaces_group, "namespaces", &nvmet_namespaces_type); configfs_add_default_group(&subsys->namespaces_group, &subsys->group); config_group_init_type_name(&subsys->allowed_hosts_group, "allowed_hosts", &nvmet_allowed_hosts_type); configfs_add_default_group(&subsys->allowed_hosts_group, &subsys->group); nvmet_add_passthru_group(subsys); return &subsys->group; } static struct configfs_group_operations nvmet_subsystems_group_ops = { .make_group = nvmet_subsys_make, }; static const struct config_item_type nvmet_subsystems_type = { .ct_group_ops = &nvmet_subsystems_group_ops, .ct_owner = THIS_MODULE, }; static ssize_t nvmet_referral_enable_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, "%d\n", to_nvmet_port(item)->enabled); } static ssize_t nvmet_referral_enable_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent); struct nvmet_port *port = to_nvmet_port(item); bool enable; if (kstrtobool(page, &enable)) goto inval; if (enable) nvmet_referral_enable(parent, port); else nvmet_referral_disable(parent, port); return count; inval: pr_err("Invalid value '%s' for enable\n", page); return -EINVAL; } CONFIGFS_ATTR(nvmet_referral_, enable); /* * Discovery Service subsystem definitions */ static struct configfs_attribute *nvmet_referral_attrs[] = { &nvmet_attr_addr_adrfam, &nvmet_attr_addr_portid, &nvmet_attr_addr_treq, &nvmet_attr_addr_traddr, &nvmet_attr_addr_trsvcid, &nvmet_attr_addr_trtype, &nvmet_referral_attr_enable, NULL, }; static void nvmet_referral_notify(struct config_group *group, struct config_item *item) { struct nvmet_port *parent = to_nvmet_port(item->ci_parent->ci_parent); struct nvmet_port *port = to_nvmet_port(item); nvmet_referral_disable(parent, port); } static void nvmet_referral_release(struct config_item *item) { struct nvmet_port *port = to_nvmet_port(item); kfree(port); } static struct configfs_item_operations nvmet_referral_item_ops = { .release = nvmet_referral_release, }; static const struct config_item_type nvmet_referral_type = { .ct_owner = THIS_MODULE, .ct_attrs = nvmet_referral_attrs, .ct_item_ops = &nvmet_referral_item_ops, }; static struct config_group *nvmet_referral_make( struct config_group *group, const char *name) { struct nvmet_port *port; port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&port->entry); port->disc_addr.trtype = NVMF_TRTYPE_MAX; config_group_init_type_name(&port->group, name, &nvmet_referral_type); return &port->group; } static struct configfs_group_operations nvmet_referral_group_ops = { .make_group = nvmet_referral_make, .disconnect_notify = nvmet_referral_notify, }; static const struct config_item_type nvmet_referrals_type = { .ct_owner = THIS_MODULE, .ct_group_ops = &nvmet_referral_group_ops, }; static struct nvmet_type_name_map nvmet_ana_state[] = { { NVME_ANA_OPTIMIZED, "optimized" }, { NVME_ANA_NONOPTIMIZED, "non-optimized" }, { NVME_ANA_INACCESSIBLE, "inaccessible" }, { NVME_ANA_PERSISTENT_LOSS, "persistent-loss" }, { NVME_ANA_CHANGE, "change" }, }; static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item, char *page) { struct nvmet_ana_group *grp = to_ana_group(item); enum nvme_ana_state state = grp->port->ana_state[grp->grpid]; int i; for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) { if (state == nvmet_ana_state[i].type) return sprintf(page, "%s\n", nvmet_ana_state[i].name); } return sprintf(page, "\n"); } static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ana_group *grp = to_ana_group(item); enum nvme_ana_state *ana_state = grp->port->ana_state; int i; for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) { if (sysfs_streq(page, nvmet_ana_state[i].name)) goto found; } pr_err("Invalid value '%s' for ana_state\n", page); return -EINVAL; found: down_write(&nvmet_ana_sem); ana_state[grp->grpid] = (enum nvme_ana_state) nvmet_ana_state[i].type; nvmet_ana_chgcnt++; up_write(&nvmet_ana_sem); nvmet_port_send_ana_event(grp->port); return count; } CONFIGFS_ATTR(nvmet_ana_group_, ana_state); static struct configfs_attribute *nvmet_ana_group_attrs[] = { &nvmet_ana_group_attr_ana_state, NULL, }; static void nvmet_ana_group_release(struct config_item *item) { struct nvmet_ana_group *grp = to_ana_group(item); if (grp == &grp->port->ana_default_group) return; down_write(&nvmet_ana_sem); grp->port->ana_state[grp->grpid] = NVME_ANA_INACCESSIBLE; nvmet_ana_group_enabled[grp->grpid]--; up_write(&nvmet_ana_sem); nvmet_port_send_ana_event(grp->port); kfree(grp); } static struct configfs_item_operations nvmet_ana_group_item_ops = { .release = nvmet_ana_group_release, }; static const struct config_item_type nvmet_ana_group_type = { .ct_item_ops = &nvmet_ana_group_item_ops, .ct_attrs = nvmet_ana_group_attrs, .ct_owner = THIS_MODULE, }; static struct config_group *nvmet_ana_groups_make_group( struct config_group *group, const char *name) { struct nvmet_port *port = ana_groups_to_port(&group->cg_item); struct nvmet_ana_group *grp; u32 grpid; int ret; ret = kstrtou32(name, 0, &grpid); if (ret) goto out; ret = -EINVAL; if (grpid <= 1 || grpid > NVMET_MAX_ANAGRPS) goto out; ret = -ENOMEM; grp = kzalloc(sizeof(*grp), GFP_KERNEL); if (!grp) goto out; grp->port = port; grp->grpid = grpid; down_write(&nvmet_ana_sem); grpid = array_index_nospec(grpid, NVMET_MAX_ANAGRPS); nvmet_ana_group_enabled[grpid]++; up_write(&nvmet_ana_sem); nvmet_port_send_ana_event(grp->port); config_group_init_type_name(&grp->group, name, &nvmet_ana_group_type); return &grp->group; out: return ERR_PTR(ret); } static struct configfs_group_operations nvmet_ana_groups_group_ops = { .make_group = nvmet_ana_groups_make_group, }; static const struct config_item_type nvmet_ana_groups_type = { .ct_group_ops = &nvmet_ana_groups_group_ops, .ct_owner = THIS_MODULE, }; /* * Ports definitions. */ static void nvmet_port_release(struct config_item *item) { struct nvmet_port *port = to_nvmet_port(item); /* Let inflight controllers teardown complete */ flush_workqueue(nvmet_wq); list_del(&port->global_entry); key_put(port->keyring); kfree(port->ana_state); kfree(port); } static struct configfs_attribute *nvmet_port_attrs[] = { &nvmet_attr_addr_adrfam, &nvmet_attr_addr_treq, &nvmet_attr_addr_traddr, &nvmet_attr_addr_trsvcid, &nvmet_attr_addr_trtype, &nvmet_attr_addr_tsas, &nvmet_attr_param_inline_data_size, &nvmet_attr_param_max_queue_size, #ifdef CONFIG_BLK_DEV_INTEGRITY &nvmet_attr_param_pi_enable, #endif NULL, }; static struct configfs_item_operations nvmet_port_item_ops = { .release = nvmet_port_release, }; static const struct config_item_type nvmet_port_type = { .ct_attrs = nvmet_port_attrs, .ct_item_ops = &nvmet_port_item_ops, .ct_owner = THIS_MODULE, }; static struct config_group *nvmet_ports_make(struct config_group *group, const char *name) { struct nvmet_port *port; u16 portid; u32 i; if (kstrtou16(name, 0, &portid)) return ERR_PTR(-EINVAL); port = kzalloc(sizeof(*port), GFP_KERNEL); if (!port) return ERR_PTR(-ENOMEM); port->ana_state = kcalloc(NVMET_MAX_ANAGRPS + 1, sizeof(*port->ana_state), GFP_KERNEL); if (!port->ana_state) { kfree(port); return ERR_PTR(-ENOMEM); } if (IS_ENABLED(CONFIG_NVME_TARGET_TCP_TLS) && nvme_keyring_id()) { port->keyring = key_lookup(nvme_keyring_id()); if (IS_ERR(port->keyring)) { pr_warn("NVMe keyring not available, disabling TLS\n"); port->keyring = NULL; } } for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) { if (i == NVMET_DEFAULT_ANA_GRPID) port->ana_state[1] = NVME_ANA_OPTIMIZED; else port->ana_state[i] = NVME_ANA_INACCESSIBLE; } list_add(&port->global_entry, &nvmet_ports_list); INIT_LIST_HEAD(&port->entry); INIT_LIST_HEAD(&port->subsystems); INIT_LIST_HEAD(&port->referrals); port->inline_data_size = -1; /* < 0 == let the transport choose */ port->max_queue_size = -1; /* < 0 == let the transport choose */ port->disc_addr.trtype = NVMF_TRTYPE_MAX; port->disc_addr.portid = cpu_to_le16(portid); port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX; port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW; config_group_init_type_name(&port->group, name, &nvmet_port_type); config_group_init_type_name(&port->subsys_group, "subsystems", &nvmet_port_subsys_type); configfs_add_default_group(&port->subsys_group, &port->group); config_group_init_type_name(&port->referrals_group, "referrals", &nvmet_referrals_type); configfs_add_default_group(&port->referrals_group, &port->group); config_group_init_type_name(&port->ana_groups_group, "ana_groups", &nvmet_ana_groups_type); configfs_add_default_group(&port->ana_groups_group, &port->group); port->ana_default_group.port = port; port->ana_default_group.grpid = NVMET_DEFAULT_ANA_GRPID; config_group_init_type_name(&port->ana_default_group.group, __stringify(NVMET_DEFAULT_ANA_GRPID), &nvmet_ana_group_type); configfs_add_default_group(&port->ana_default_group.group, &port->ana_groups_group); return &port->group; } static struct configfs_group_operations nvmet_ports_group_ops = { .make_group = nvmet_ports_make, }; static const struct config_item_type nvmet_ports_type = { .ct_group_ops = &nvmet_ports_group_ops, .ct_owner = THIS_MODULE, }; static struct config_group nvmet_subsystems_group; static struct config_group nvmet_ports_group; #ifdef CONFIG_NVME_TARGET_AUTH static ssize_t nvmet_host_dhchap_key_show(struct config_item *item, char *page) { u8 *dhchap_secret; ssize_t ret; down_read(&nvmet_config_sem); dhchap_secret = to_host(item)->dhchap_secret; if (!dhchap_secret) ret = sprintf(page, "\n"); else ret = sprintf(page, "%s\n", dhchap_secret); up_read(&nvmet_config_sem); return ret; } static ssize_t nvmet_host_dhchap_key_store(struct config_item *item, const char *page, size_t count) { struct nvmet_host *host = to_host(item); int ret; ret = nvmet_auth_set_key(host, page, false); /* * Re-authentication is a soft state, so keep the * current authentication valid until the host * requests re-authentication. */ return ret < 0 ? ret : count; } CONFIGFS_ATTR(nvmet_host_, dhchap_key); static ssize_t nvmet_host_dhchap_ctrl_key_show(struct config_item *item, char *page) { u8 *dhchap_secret = to_host(item)->dhchap_ctrl_secret; ssize_t ret; down_read(&nvmet_config_sem); dhchap_secret = to_host(item)->dhchap_ctrl_secret; if (!dhchap_secret) ret = sprintf(page, "\n"); else ret = sprintf(page, "%s\n", dhchap_secret); up_read(&nvmet_config_sem); return ret; } static ssize_t nvmet_host_dhchap_ctrl_key_store(struct config_item *item, const char *page, size_t count) { struct nvmet_host *host = to_host(item); int ret; ret = nvmet_auth_set_key(host, page, true); /* * Re-authentication is a soft state, so keep the * current authentication valid until the host * requests re-authentication. */ return ret < 0 ? ret : count; } CONFIGFS_ATTR(nvmet_host_, dhchap_ctrl_key); static ssize_t nvmet_host_dhchap_hash_show(struct config_item *item, char *page) { struct nvmet_host *host = to_host(item); const char *hash_name = nvme_auth_hmac_name(host->dhchap_hash_id); return sprintf(page, "%s\n", hash_name ? hash_name : "none"); } static ssize_t nvmet_host_dhchap_hash_store(struct config_item *item, const char *page, size_t count) { struct nvmet_host *host = to_host(item); u8 hmac_id; hmac_id = nvme_auth_hmac_id(page); if (hmac_id == NVME_AUTH_HASH_INVALID) return -EINVAL; if (!crypto_has_shash(nvme_auth_hmac_name(hmac_id), 0, 0)) return -ENOTSUPP; host->dhchap_hash_id = hmac_id; return count; } CONFIGFS_ATTR(nvmet_host_, dhchap_hash); static ssize_t nvmet_host_dhchap_dhgroup_show(struct config_item *item, char *page) { struct nvmet_host *host = to_host(item); const char *dhgroup = nvme_auth_dhgroup_name(host->dhchap_dhgroup_id); return sprintf(page, "%s\n", dhgroup ? dhgroup : "none"); } static ssize_t nvmet_host_dhchap_dhgroup_store(struct config_item *item, const char *page, size_t count) { struct nvmet_host *host = to_host(item); int dhgroup_id; dhgroup_id = nvme_auth_dhgroup_id(page); if (dhgroup_id == NVME_AUTH_DHGROUP_INVALID) return -EINVAL; if (dhgroup_id != NVME_AUTH_DHGROUP_NULL) { const char *kpp = nvme_auth_dhgroup_kpp(dhgroup_id); if (!crypto_has_kpp(kpp, 0, 0)) return -EINVAL; } host->dhchap_dhgroup_id = dhgroup_id; return count; } CONFIGFS_ATTR(nvmet_host_, dhchap_dhgroup); static struct configfs_attribute *nvmet_host_attrs[] = { &nvmet_host_attr_dhchap_key, &nvmet_host_attr_dhchap_ctrl_key, &nvmet_host_attr_dhchap_hash, &nvmet_host_attr_dhchap_dhgroup, NULL, }; #endif /* CONFIG_NVME_TARGET_AUTH */ static void nvmet_host_release(struct config_item *item) { struct nvmet_host *host = to_host(item); #ifdef CONFIG_NVME_TARGET_AUTH kfree(host->dhchap_secret); kfree(host->dhchap_ctrl_secret); #endif kfree(host); } static struct configfs_item_operations nvmet_host_item_ops = { .release = nvmet_host_release, }; static const struct config_item_type nvmet_host_type = { .ct_item_ops = &nvmet_host_item_ops, #ifdef CONFIG_NVME_TARGET_AUTH .ct_attrs = nvmet_host_attrs, #endif .ct_owner = THIS_MODULE, }; static struct config_group *nvmet_hosts_make_group(struct config_group *group, const char *name) { struct nvmet_host *host; host = kzalloc(sizeof(*host), GFP_KERNEL); if (!host) return ERR_PTR(-ENOMEM); #ifdef CONFIG_NVME_TARGET_AUTH /* Default to SHA256 */ host->dhchap_hash_id = NVME_AUTH_HASH_SHA256; #endif config_group_init_type_name(&host->group, name, &nvmet_host_type); return &host->group; } static struct configfs_group_operations nvmet_hosts_group_ops = { .make_group = nvmet_hosts_make_group, }; static const struct config_item_type nvmet_hosts_type = { .ct_group_ops = &nvmet_hosts_group_ops, .ct_owner = THIS_MODULE, }; static struct config_group nvmet_hosts_group; static ssize_t nvmet_root_discovery_nqn_show(struct config_item *item, char *page) { return snprintf(page, PAGE_SIZE, "%s\n", nvmet_disc_subsys->subsysnqn); } static ssize_t nvmet_root_discovery_nqn_store(struct config_item *item, const char *page, size_t count) { struct list_head *entry; char *old_nqn, *new_nqn; size_t len; len = strcspn(page, "\n"); if (!len || len > NVMF_NQN_FIELD_LEN - 1) return -EINVAL; new_nqn = kstrndup(page, len, GFP_KERNEL); if (!new_nqn) return -ENOMEM; down_write(&nvmet_config_sem); list_for_each(entry, &nvmet_subsystems_group.cg_children) { struct config_item *item = container_of(entry, struct config_item, ci_entry); if (!strncmp(config_item_name(item), page, len)) { pr_err("duplicate NQN %s\n", config_item_name(item)); up_write(&nvmet_config_sem); kfree(new_nqn); return -EINVAL; } } old_nqn = nvmet_disc_subsys->subsysnqn; nvmet_disc_subsys->subsysnqn = new_nqn; up_write(&nvmet_config_sem); kfree(old_nqn); return len; } CONFIGFS_ATTR(nvmet_root_, discovery_nqn); static struct configfs_attribute *nvmet_root_attrs[] = { &nvmet_root_attr_discovery_nqn, NULL, }; static const struct config_item_type nvmet_root_type = { .ct_attrs = nvmet_root_attrs, .ct_owner = THIS_MODULE, }; static struct configfs_subsystem nvmet_configfs_subsystem = { .su_group = { .cg_item = { .ci_namebuf = "nvmet", .ci_type = &nvmet_root_type, }, }, }; int __init nvmet_init_configfs(void) { int ret; config_group_init(&nvmet_configfs_subsystem.su_group); mutex_init(&nvmet_configfs_subsystem.su_mutex); config_group_init_type_name(&nvmet_subsystems_group, "subsystems", &nvmet_subsystems_type); configfs_add_default_group(&nvmet_subsystems_group, &nvmet_configfs_subsystem.su_group); config_group_init_type_name(&nvmet_ports_group, "ports", &nvmet_ports_type); configfs_add_default_group(&nvmet_ports_group, &nvmet_configfs_subsystem.su_group); config_group_init_type_name(&nvmet_hosts_group, "hosts", &nvmet_hosts_type); configfs_add_default_group(&nvmet_hosts_group, &nvmet_configfs_subsystem.su_group); ret = configfs_register_subsystem(&nvmet_configfs_subsystem); if (ret) { pr_err("configfs_register_subsystem: %d\n", ret); return ret; } return 0; } void __exit nvmet_exit_configfs(void) { configfs_unregister_subsystem(&nvmet_configfs_subsystem); }
1 2 1 2 7 7 17 16 7 7 15 15 1 15 8 17 8 10 9 9 6 8 8 8 8 6 2 1 1 1 1 2 1 2 2 1 27 27 27 27 1 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 // SPDX-License-Identifier: GPL-2.0 /* * linux/drivers/char/mem.c * * Copyright (C) 1991, 1992 Linus Torvalds * * Added devfs support. * Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu> * Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com> */ #include <linux/mm.h> #include <linux/miscdevice.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mman.h> #include <linux/random.h> #include <linux/init.h> #include <linux/tty.h> #include <linux/capability.h> #include <linux/ptrace.h> #include <linux/device.h> #include <linux/highmem.h> #include <linux/backing-dev.h> #include <linux/shmem_fs.h> #include <linux/splice.h> #include <linux/pfn.h> #include <linux/export.h> #include <linux/io.h> #include <linux/uio.h> #include <linux/uaccess.h> #include <linux/security.h> #define DEVMEM_MINOR 1 #define DEVPORT_MINOR 4 static inline unsigned long size_inside_page(unsigned long start, unsigned long size) { unsigned long sz; sz = PAGE_SIZE - (start & (PAGE_SIZE - 1)); return min(sz, size); } #ifndef ARCH_HAS_VALID_PHYS_ADDR_RANGE static inline int valid_phys_addr_range(phys_addr_t addr, size_t count) { return addr + count <= __pa(high_memory); } static inline int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) { return 1; } #endif #ifdef CONFIG_STRICT_DEVMEM static inline int page_is_allowed(unsigned long pfn) { return devmem_is_allowed(pfn); } #else static inline int page_is_allowed(unsigned long pfn) { return 1; } #endif static inline bool should_stop_iteration(void) { if (need_resched()) cond_resched(); return signal_pending(current); } /* * This funcion reads the *physical* memory. The f_pos points directly to the * memory location. */ static ssize_t read_mem(struct file *file, char __user *buf, size_t count, loff_t *ppos) { phys_addr_t p = *ppos; ssize_t read, sz; void *ptr; char *bounce; int err; if (p != *ppos) return 0; if (!valid_phys_addr_range(p, count)) return -EFAULT; read = 0; #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED /* we don't have page 0 mapped on sparc and m68k.. */ if (p < PAGE_SIZE) { sz = size_inside_page(p, count); if (sz > 0) { if (clear_user(buf, sz)) return -EFAULT; buf += sz; p += sz; count -= sz; read += sz; } } #endif bounce = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!bounce) return -ENOMEM; while (count > 0) { unsigned long remaining; int allowed, probe; sz = size_inside_page(p, count); err = -EPERM; allowed = page_is_allowed(p >> PAGE_SHIFT); if (!allowed) goto failed; err = -EFAULT; if (allowed == 2) { /* Show zeros for restricted memory. */ remaining = clear_user(buf, sz); } else { /* * On ia64 if a page has been mapped somewhere as * uncached, then it must also be accessed uncached * by the kernel or data corruption may occur. */ ptr = xlate_dev_mem_ptr(p); if (!ptr) goto failed; probe = copy_from_kernel_nofault(bounce, ptr, sz); unxlate_dev_mem_ptr(p, ptr); if (probe) goto failed; remaining = copy_to_user(buf, bounce, sz); } if (remaining) goto failed; buf += sz; p += sz; count -= sz; read += sz; if (should_stop_iteration()) break; } kfree(bounce); *ppos += read; return read; failed: kfree(bounce); return err; } static ssize_t write_mem(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { phys_addr_t p = *ppos; ssize_t written, sz; unsigned long copied; void *ptr; if (p != *ppos) return -EFBIG; if (!valid_phys_addr_range(p, count)) return -EFAULT; written = 0; #ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED /* we don't have page 0 mapped on sparc and m68k.. */ if (p < PAGE_SIZE) { sz = size_inside_page(p, count); /* Hmm. Do something? */ buf += sz; p += sz; count -= sz; written += sz; } #endif while (count > 0) { int allowed; sz = size_inside_page(p, count); allowed = page_is_allowed(p >> PAGE_SHIFT); if (!allowed) return -EPERM; /* Skip actual writing when a page is marked as restricted. */ if (allowed == 1) { /* * On ia64 if a page has been mapped somewhere as * uncached, then it must also be accessed uncached * by the kernel or data corruption may occur. */ ptr = xlate_dev_mem_ptr(p); if (!ptr) { if (written) break; return -EFAULT; } copied = copy_from_user(ptr, buf, sz); unxlate_dev_mem_ptr(p, ptr); if (copied) { written += sz - copied; if (written) break; return -EFAULT; } } buf += sz; p += sz; count -= sz; written += sz; if (should_stop_iteration()) break; } *ppos += written; return written; } int __weak phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, unsigned long size, pgprot_t *vma_prot) { return 1; } #ifndef __HAVE_PHYS_MEM_ACCESS_PROT /* * Architectures vary in how they handle caching for addresses * outside of main memory. * */ #ifdef pgprot_noncached static int uncached_access(struct file *file, phys_addr_t addr) { /* * Accessing memory above the top the kernel knows about or through a * file pointer * that was marked O_DSYNC will be done non-cached. */ if (file->f_flags & O_DSYNC) return 1; return addr >= __pa(high_memory); } #endif static pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { #ifdef pgprot_noncached phys_addr_t offset = pfn << PAGE_SHIFT; if (uncached_access(file, offset)) return pgprot_noncached(vma_prot); #endif return vma_prot; } #endif #ifndef CONFIG_MMU static unsigned long get_unmapped_area_mem(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { if (!valid_mmap_phys_addr_range(pgoff, len)) return (unsigned long) -EINVAL; return pgoff << PAGE_SHIFT; } /* permit direct mmap, for read, write or exec */ static unsigned memory_mmap_capabilities(struct file *file) { return NOMMU_MAP_DIRECT | NOMMU_MAP_READ | NOMMU_MAP_WRITE | NOMMU_MAP_EXEC; } static unsigned zero_mmap_capabilities(struct file *file) { return NOMMU_MAP_COPY; } /* can't do an in-place private mapping if there's no MMU */ static inline int private_mapping_ok(struct vm_area_struct *vma) { return is_nommu_shared_mapping(vma->vm_flags); } #else static inline int private_mapping_ok(struct vm_area_struct *vma) { return 1; } #endif static const struct vm_operations_struct mmap_mem_ops = { #ifdef CONFIG_HAVE_IOREMAP_PROT .access = generic_access_phys #endif }; static int mmap_mem(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; phys_addr_t offset = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; /* Does it even fit in phys_addr_t? */ if (offset >> PAGE_SHIFT != vma->vm_pgoff) return -EINVAL; /* It's illegal to wrap around the end of the physical address space. */ if (offset + (phys_addr_t)size - 1 < offset) return -EINVAL; if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) return -EINVAL; if (!private_mapping_ok(vma)) return -ENOSYS; if (!range_is_allowed(vma->vm_pgoff, size)) return -EPERM; if (!phys_mem_access_prot_allowed(file, vma->vm_pgoff, size, &vma->vm_page_prot)) return -EINVAL; vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff, size, vma->vm_page_prot); vma->vm_ops = &mmap_mem_ops; /* Remap-pfn-range will mark the range VM_IO */ if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, size, vma->vm_page_prot)) { return -EAGAIN; } return 0; } #ifdef CONFIG_DEVPORT static ssize_t read_port(struct file *file, char __user *buf, size_t count, loff_t *ppos) { unsigned long i = *ppos; char __user *tmp = buf; if (!access_ok(buf, count)) return -EFAULT; while (count-- > 0 && i < 65536) { if (__put_user(inb(i), tmp) < 0) return -EFAULT; i++; tmp++; } *ppos = i; return tmp-buf; } static ssize_t write_port(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { unsigned long i = *ppos; const char __user *tmp = buf; if (!access_ok(buf, count)) return -EFAULT; while (count-- > 0 && i < 65536) { char c; if (__get_user(c, tmp)) { if (tmp > buf) break; return -EFAULT; } outb(c, i); i++; tmp++; } *ppos = i; return tmp-buf; } #endif static ssize_t read_null(struct file *file, char __user *buf, size_t count, loff_t *ppos) { return 0; } static ssize_t write_null(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { return count; } static ssize_t read_iter_null(struct kiocb *iocb, struct iov_iter *to) { return 0; } static ssize_t write_iter_null(struct kiocb *iocb, struct iov_iter *from) { size_t count = iov_iter_count(from); iov_iter_advance(from, count); return count; } static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf, struct splice_desc *sd) { return sd->len; } static ssize_t splice_write_null(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_null); } static int uring_cmd_null(struct io_uring_cmd *ioucmd, unsigned int issue_flags) { return 0; } static ssize_t read_iter_zero(struct kiocb *iocb, struct iov_iter *iter) { size_t written = 0; while (iov_iter_count(iter)) { size_t chunk = iov_iter_count(iter), n; if (chunk > PAGE_SIZE) chunk = PAGE_SIZE; /* Just for latency reasons */ n = iov_iter_zero(chunk, iter); if (!n && iov_iter_count(iter)) return written ? written : -EFAULT; written += n; if (signal_pending(current)) return written ? written : -ERESTARTSYS; if (!need_resched()) continue; if (iocb->ki_flags & IOCB_NOWAIT) return written ? written : -EAGAIN; cond_resched(); } return written; } static ssize_t read_zero(struct file *file, char __user *buf, size_t count, loff_t *ppos) { size_t cleared = 0; while (count) { size_t chunk = min_t(size_t, count, PAGE_SIZE); size_t left; left = clear_user(buf + cleared, chunk); if (unlikely(left)) { cleared += (chunk - left); if (!cleared) return -EFAULT; break; } cleared += chunk; count -= chunk; if (signal_pending(current)) break; cond_resched(); } return cleared; } static int mmap_zero(struct file *file, struct vm_area_struct *vma) { #ifndef CONFIG_MMU return -ENOSYS; #endif if (vma->vm_flags & VM_SHARED) return shmem_zero_setup(vma); vma_set_anonymous(vma); return 0; } #ifndef CONFIG_MMU static unsigned long get_unmapped_area_zero(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { return -ENOSYS; } #else static unsigned long get_unmapped_area_zero(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { if (flags & MAP_SHARED) { /* * mmap_zero() will call shmem_zero_setup() to create a file, * so use shmem's get_unmapped_area in case it can be huge; * and pass NULL for file as in mmap.c's get_unmapped_area(), * so as not to confuse shmem with our handle on "/dev/zero". */ return shmem_get_unmapped_area(NULL, addr, len, pgoff, flags); } /* * Otherwise flags & MAP_PRIVATE: with no shmem object beneath it, * attempt to map aligned to huge page size if possible, otherwise we * fall back to system page size mappings. */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE return thp_get_unmapped_area(file, addr, len, pgoff, flags); #else return mm_get_unmapped_area(current->mm, file, addr, len, pgoff, flags); #endif } #endif /* CONFIG_MMU */ static ssize_t write_full(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { return -ENOSPC; } /* * Special lseek() function for /dev/null and /dev/zero. Most notably, you * can fopen() both devices with "a" now. This was previously impossible. * -- SRB. */ static loff_t null_lseek(struct file *file, loff_t offset, int orig) { return file->f_pos = 0; } /* * The memory devices use the full 32/64 bits of the offset, and so we cannot * check against negative addresses: they are ok. The return value is weird, * though, in that case (0). * * also note that seeking relative to the "end of file" isn't supported: * it has no meaning, so it returns -EINVAL. */ static loff_t memory_lseek(struct file *file, loff_t offset, int orig) { loff_t ret; inode_lock(file_inode(file)); switch (orig) { case SEEK_CUR: offset += file->f_pos; fallthrough; case SEEK_SET: /* to avoid userland mistaking f_pos=-9 as -EBADF=-9 */ if ((unsigned long long)offset >= -MAX_ERRNO) { ret = -EOVERFLOW; break; } file->f_pos = offset; ret = file->f_pos; force_successful_syscall_return(); break; default: ret = -EINVAL; } inode_unlock(file_inode(file)); return ret; } static int open_port(struct inode *inode, struct file *filp) { int rc; if (!capable(CAP_SYS_RAWIO)) return -EPERM; rc = security_locked_down(LOCKDOWN_DEV_MEM); if (rc) return rc; if (iminor(inode) != DEVMEM_MINOR) return 0; /* * Use a unified address space to have a single point to manage * revocations when drivers want to take over a /dev/mem mapped * range. */ filp->f_mapping = iomem_get_mapping(); return 0; } #define zero_lseek null_lseek #define full_lseek null_lseek #define write_zero write_null #define write_iter_zero write_iter_null #define splice_write_zero splice_write_null #define open_mem open_port static const struct file_operations __maybe_unused mem_fops = { .llseek = memory_lseek, .read = read_mem, .write = write_mem, .mmap = mmap_mem, .open = open_mem, #ifndef CONFIG_MMU .get_unmapped_area = get_unmapped_area_mem, .mmap_capabilities = memory_mmap_capabilities, #endif .fop_flags = FOP_UNSIGNED_OFFSET, }; static const struct file_operations null_fops = { .llseek = null_lseek, .read = read_null, .write = write_null, .read_iter = read_iter_null, .write_iter = write_iter_null, .splice_write = splice_write_null, .uring_cmd = uring_cmd_null, }; #ifdef CONFIG_DEVPORT static const struct file_operations port_fops = { .llseek = memory_lseek, .read = read_port, .write = write_port, .open = open_port, }; #endif static const struct file_operations zero_fops = { .llseek = zero_lseek, .write = write_zero, .read_iter = read_iter_zero, .read = read_zero, .write_iter = write_iter_zero, .splice_read = copy_splice_read, .splice_write = splice_write_zero, .mmap = mmap_zero, .get_unmapped_area = get_unmapped_area_zero, #ifndef CONFIG_MMU .mmap_capabilities = zero_mmap_capabilities, #endif }; static const struct file_operations full_fops = { .llseek = full_lseek, .read_iter = read_iter_zero, .write = write_full, .splice_read = copy_splice_read, }; static const struct memdev { const char *name; const struct file_operations *fops; fmode_t fmode; umode_t mode; } devlist[] = { #ifdef CONFIG_DEVMEM [DEVMEM_MINOR] = { "mem", &mem_fops, 0, 0 }, #endif [3] = { "null", &null_fops, FMODE_NOWAIT, 0666 }, #ifdef CONFIG_DEVPORT [4] = { "port", &port_fops, 0, 0 }, #endif [5] = { "zero", &zero_fops, FMODE_NOWAIT, 0666 }, [7] = { "full", &full_fops, 0, 0666 }, [8] = { "random", &random_fops, FMODE_NOWAIT, 0666 }, [9] = { "urandom", &urandom_fops, FMODE_NOWAIT, 0666 }, #ifdef CONFIG_PRINTK [11] = { "kmsg", &kmsg_fops, 0, 0644 }, #endif }; static int memory_open(struct inode *inode, struct file *filp) { int minor; const struct memdev *dev; minor = iminor(inode); if (minor >= ARRAY_SIZE(devlist)) return -ENXIO; dev = &devlist[minor]; if (!dev->fops) return -ENXIO; filp->f_op = dev->fops; filp->f_mode |= dev->fmode; if (dev->fops->open) return dev->fops->open(inode, filp); return 0; } static const struct file_operations memory_fops = { .open = memory_open, .llseek = noop_llseek, }; static char *mem_devnode(const struct device *dev, umode_t *mode) { if (mode && devlist[MINOR(dev->devt)].mode) *mode = devlist[MINOR(dev->devt)].mode; return NULL; } static const struct class mem_class = { .name = "mem", .devnode = mem_devnode, }; static int __init chr_dev_init(void) { int retval; int minor; if (register_chrdev(MEM_MAJOR, "mem", &memory_fops)) printk("unable to get major %d for memory devs\n", MEM_MAJOR); retval = class_register(&mem_class); if (retval) return retval; for (minor = 1; minor < ARRAY_SIZE(devlist); minor++) { if (!devlist[minor].name) continue; /* * Create /dev/port? */ if ((minor == DEVPORT_MINOR) && !arch_has_dev_port()) continue; device_create(&mem_class, NULL, MKDEV(MEM_MAJOR, minor), NULL, devlist[minor].name); } return tty_init(); } fs_initcall(chr_dev_init);
57 21 20 8 55 15 24 24 24 8 8 27 7 6 51 43 43 32 26 21 23 21 2 3 1 2 1 2 1 1 1 2 20 5 1 1 1 8 5 1 1 1 1 1 1 1 1 10 3 55 3 1 1 1 28 1 28 1 34 1 34 1 2 2 38 1 6 29 75 75 30 12 29 29 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 // SPDX-License-Identifier: GPL-2.0-or-later /* * OSS compatible sequencer driver * * Copyright (C) 1998,99 Takashi Iwai <tiwai@suse.de> */ #include "seq_oss_device.h" #include "seq_oss_synth.h" #include "seq_oss_midi.h" #include "seq_oss_event.h" #include "seq_oss_timer.h" #include <sound/seq_oss_legacy.h> #include "seq_oss_readq.h" #include "seq_oss_writeq.h" #include <linux/nospec.h> /* * prototypes */ static int extended_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev); static int chn_voice_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int chn_common_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int timing_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int local_event(struct seq_oss_devinfo *dp, union evrec *event_rec, struct snd_seq_event *ev); static int old_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev); static int note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev); static int note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev); static int set_note_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int note, int vel, struct snd_seq_event *ev); static int set_control_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int param, int val, struct snd_seq_event *ev); static int set_echo_event(struct seq_oss_devinfo *dp, union evrec *rec, struct snd_seq_event *ev); /* * convert an OSS event to ALSA event * return 0 : enqueued * non-zero : invalid - ignored */ int snd_seq_oss_process_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { switch (q->s.code) { case SEQ_EXTENDED: return extended_event(dp, q, ev); case EV_CHN_VOICE: return chn_voice_event(dp, q, ev); case EV_CHN_COMMON: return chn_common_event(dp, q, ev); case EV_TIMING: return timing_event(dp, q, ev); case EV_SEQ_LOCAL: return local_event(dp, q, ev); case EV_SYSEX: return snd_seq_oss_synth_sysex(dp, q->x.dev, q->x.buf, ev); case SEQ_MIDIPUTC: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; /* put a midi byte */ if (! is_write_mode(dp->file_mode)) break; if (snd_seq_oss_midi_open(dp, q->s.dev, SNDRV_SEQ_OSS_FILE_WRITE)) break; if (snd_seq_oss_midi_filemode(dp, q->s.dev) & SNDRV_SEQ_OSS_FILE_WRITE) return snd_seq_oss_midi_putc(dp, q->s.dev, q->s.parm1, ev); break; case SEQ_ECHO: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; return set_echo_event(dp, q, ev); case SEQ_PRIVATE: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; return snd_seq_oss_synth_raw_event(dp, q->c[1], q->c, ev); default: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return -EINVAL; return old_event(dp, q, ev); } return -EINVAL; } /* old type events: mode1 only */ static int old_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { switch (q->s.code) { case SEQ_NOTEOFF: return note_off_event(dp, 0, q->n.chn, q->n.note, q->n.vel, ev); case SEQ_NOTEON: return note_on_event(dp, 0, q->n.chn, q->n.note, q->n.vel, ev); case SEQ_WAIT: /* skip */ break; case SEQ_PGMCHANGE: return set_control_event(dp, 0, SNDRV_SEQ_EVENT_PGMCHANGE, q->n.chn, 0, q->n.note, ev); case SEQ_SYNCTIMER: return snd_seq_oss_timer_reset(dp->timer); } return -EINVAL; } /* 8bytes extended event: mode1 only */ static int extended_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { int val; switch (q->e.cmd) { case SEQ_NOTEOFF: return note_off_event(dp, q->e.dev, q->e.chn, q->e.p1, q->e.p2, ev); case SEQ_NOTEON: return note_on_event(dp, q->e.dev, q->e.chn, q->e.p1, q->e.p2, ev); case SEQ_PGMCHANGE: return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_PGMCHANGE, q->e.chn, 0, q->e.p1, ev); case SEQ_AFTERTOUCH: return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_CHANPRESS, q->e.chn, 0, q->e.p1, ev); case SEQ_BALANCE: /* convert -128:127 to 0:127 */ val = (char)q->e.p1; val = (val + 128) / 2; return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_CONTROLLER, q->e.chn, CTL_PAN, val, ev); case SEQ_CONTROLLER: val = ((short)q->e.p3 << 8) | (short)q->e.p2; switch (q->e.p1) { case CTRL_PITCH_BENDER: /* SEQ1 V2 control */ /* -0x2000:0x1fff */ return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_PITCHBEND, q->e.chn, 0, val, ev); case CTRL_PITCH_BENDER_RANGE: /* conversion: 100/semitone -> 128/semitone */ return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_REGPARAM, q->e.chn, 0, val*128/100, ev); default: return set_control_event(dp, q->e.dev, SNDRV_SEQ_EVENT_CONTROL14, q->e.chn, q->e.p1, val, ev); } case SEQ_VOLMODE: return snd_seq_oss_synth_raw_event(dp, q->e.dev, q->c, ev); } return -EINVAL; } /* channel voice events: mode1 and 2 */ static int chn_voice_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { if (q->v.chn >= 32) return -EINVAL; switch (q->v.cmd) { case MIDI_NOTEON: return note_on_event(dp, q->v.dev, q->v.chn, q->v.note, q->v.parm, ev); case MIDI_NOTEOFF: return note_off_event(dp, q->v.dev, q->v.chn, q->v.note, q->v.parm, ev); case MIDI_KEY_PRESSURE: return set_note_event(dp, q->v.dev, SNDRV_SEQ_EVENT_KEYPRESS, q->v.chn, q->v.note, q->v.parm, ev); } return -EINVAL; } /* channel common events: mode1 and 2 */ static int chn_common_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { if (q->l.chn >= 32) return -EINVAL; switch (q->l.cmd) { case MIDI_PGM_CHANGE: return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_PGMCHANGE, q->l.chn, 0, q->l.p1, ev); case MIDI_CTL_CHANGE: return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_CONTROLLER, q->l.chn, q->l.p1, q->l.val, ev); case MIDI_PITCH_BEND: /* conversion: 0:0x3fff -> -0x2000:0x1fff */ return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_PITCHBEND, q->l.chn, 0, q->l.val - 8192, ev); case MIDI_CHN_PRESSURE: return set_control_event(dp, q->l.dev, SNDRV_SEQ_EVENT_CHANPRESS, q->l.chn, 0, q->l.val, ev); } return -EINVAL; } /* timer events: mode1 and mode2 */ static int timing_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { switch (q->t.cmd) { case TMR_ECHO: if (dp->seq_mode == SNDRV_SEQ_OSS_MODE_MUSIC) return set_echo_event(dp, q, ev); else { union evrec tmp; memset(&tmp, 0, sizeof(tmp)); /* XXX: only for little-endian! */ tmp.echo = (q->t.time << 8) | SEQ_ECHO; return set_echo_event(dp, &tmp, ev); } case TMR_STOP: if (dp->seq_mode) return snd_seq_oss_timer_stop(dp->timer); return 0; case TMR_CONTINUE: if (dp->seq_mode) return snd_seq_oss_timer_continue(dp->timer); return 0; case TMR_TEMPO: if (dp->seq_mode) return snd_seq_oss_timer_tempo(dp->timer, q->t.time); return 0; } return -EINVAL; } /* local events: mode1 and 2 */ static int local_event(struct seq_oss_devinfo *dp, union evrec *q, struct snd_seq_event *ev) { return -EINVAL; } /* * process note-on event for OSS synth * three different modes are available: * - SNDRV_SEQ_OSS_PROCESS_EVENTS (for one-voice per channel mode) * Accept note 255 as volume change. * - SNDRV_SEQ_OSS_PASS_EVENTS * Pass all events to lowlevel driver anyway * - SNDRV_SEQ_OSS_PROCESS_KEYPRESS (mostly for Emu8000) * Use key-pressure if note >= 128 */ static int note_on_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev) { struct seq_oss_synthinfo *info; info = snd_seq_oss_synth_info(dp, dev); if (!info) return -ENXIO; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { /* pass directly */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } ch = array_index_nospec(ch, info->nr_voices); if (note == 255 && info->ch[ch].note >= 0) { /* volume control */ int type; if (info->ch[ch].vel) /* sample already started -- volume change */ type = SNDRV_SEQ_EVENT_KEYPRESS; else /* sample not started -- start now */ type = SNDRV_SEQ_EVENT_NOTEON; info->ch[ch].vel = vel; return set_note_event(dp, dev, type, ch, info->ch[ch].note, vel, ev); } else if (note >= 128) return -EINVAL; /* invalid */ if (note != info->ch[ch].note && info->ch[ch].note >= 0) /* note changed - note off at beginning */ set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEOFF, ch, info->ch[ch].note, 0, ev); /* set current status */ info->ch[ch].note = note; info->ch[ch].vel = vel; if (vel) /* non-zero velocity - start the note now */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); return -EINVAL; case SNDRV_SEQ_OSS_PASS_EVENTS: /* pass the event anyway */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); case SNDRV_SEQ_OSS_PROCESS_KEYPRESS: if (note >= 128) /* key pressure: shifted by 128 */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_KEYPRESS, ch, note - 128, vel, ev); else /* normal note-on event */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } return -EINVAL; } /* * process note-off event for OSS synth */ static int note_off_event(struct seq_oss_devinfo *dp, int dev, int ch, int note, int vel, struct snd_seq_event *ev) { struct seq_oss_synthinfo *info; info = snd_seq_oss_synth_info(dp, dev); if (!info) return -ENXIO; switch (info->arg.event_passing) { case SNDRV_SEQ_OSS_PROCESS_EVENTS: if (! info->ch || ch < 0 || ch >= info->nr_voices) { /* pass directly */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEON, ch, note, vel, ev); } ch = array_index_nospec(ch, info->nr_voices); if (info->ch[ch].note >= 0) { note = info->ch[ch].note; info->ch[ch].vel = 0; info->ch[ch].note = -1; return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEOFF, ch, note, vel, ev); } return -EINVAL; /* invalid */ case SNDRV_SEQ_OSS_PASS_EVENTS: case SNDRV_SEQ_OSS_PROCESS_KEYPRESS: /* pass the event anyway */ return set_note_event(dp, dev, SNDRV_SEQ_EVENT_NOTEOFF, ch, note, vel, ev); } return -EINVAL; } /* * create a note event */ static int set_note_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int note, int vel, struct snd_seq_event *ev) { if (!snd_seq_oss_synth_info(dp, dev)) return -ENXIO; ev->type = type; snd_seq_oss_synth_addr(dp, dev, ev); ev->data.note.channel = ch; ev->data.note.note = note; ev->data.note.velocity = vel; return 0; } /* * create a control event */ static int set_control_event(struct seq_oss_devinfo *dp, int dev, int type, int ch, int param, int val, struct snd_seq_event *ev) { if (!snd_seq_oss_synth_info(dp, dev)) return -ENXIO; ev->type = type; snd_seq_oss_synth_addr(dp, dev, ev); ev->data.control.channel = ch; ev->data.control.param = param; ev->data.control.value = val; return 0; } /* * create an echo event */ static int set_echo_event(struct seq_oss_devinfo *dp, union evrec *rec, struct snd_seq_event *ev) { ev->type = SNDRV_SEQ_EVENT_ECHO; /* echo back to itself */ snd_seq_oss_fill_addr(dp, ev, dp->addr.client, dp->addr.port); memcpy(&ev->data, rec, LONG_EVENT_SIZE); return 0; } /* * event input callback from ALSA sequencer: * the echo event is processed here. */ int snd_seq_oss_event_input(struct snd_seq_event *ev, int direct, void *private_data, int atomic, int hop) { struct seq_oss_devinfo *dp = (struct seq_oss_devinfo *)private_data; union evrec *rec; if (ev->type != SNDRV_SEQ_EVENT_ECHO) return snd_seq_oss_midi_input(ev, direct, private_data); if (ev->source.client != dp->cseq) return 0; /* ignored */ rec = (union evrec*)&ev->data; if (rec->s.code == SEQ_SYNCTIMER) { /* sync echo back */ snd_seq_oss_writeq_wakeup(dp->writeq, rec->t.time); } else { /* echo back event */ if (dp->readq == NULL) return 0; snd_seq_oss_readq_put_event(dp->readq, rec); } return 0; }
18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 69 78 65 65 69 65 69 78 78 78 78 78 78 78 78 78 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 /* * Linear conversion Plug-In * Copyright (c) 1999 by Jaroslav Kysela <perex@perex.cz>, * Abramo Bagnara <abramo@alsa-project.org> * * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Library General Public License as * published by the Free Software Foundation; either version 2 of * the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Library General Public License for more details. * * You should have received a copy of the GNU Library General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include <linux/time.h> #include <sound/core.h> #include <sound/pcm.h> #include "pcm_plugin.h" /* * Basic linear conversion plugin */ struct linear_priv { int cvt_endian; /* need endian conversion? */ unsigned int src_ofs; /* byte offset in source format */ unsigned int dst_ofs; /* byte soffset in destination format */ unsigned int copy_ofs; /* byte offset in temporary u32 data */ unsigned int dst_bytes; /* byte size of destination format */ unsigned int copy_bytes; /* bytes to copy per conversion */ unsigned int flip; /* MSB flip for signeness, done after endian conv */ }; static inline void do_convert(struct linear_priv *data, unsigned char *dst, unsigned char *src) { unsigned int tmp = 0; unsigned char *p = (unsigned char *)&tmp; memcpy(p + data->copy_ofs, src + data->src_ofs, data->copy_bytes); if (data->cvt_endian) tmp = swab32(tmp); tmp ^= data->flip; memcpy(dst, p + data->dst_ofs, data->dst_bytes); } static void convert(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { struct linear_priv *data = (struct linear_priv *)plugin->extra_data; int channel; int nchannels = plugin->src_format.channels; for (channel = 0; channel < nchannels; ++channel) { char *src; char *dst; int src_step, dst_step; snd_pcm_uframes_t frames1; if (!src_channels[channel].enabled) { if (dst_channels[channel].wanted) snd_pcm_area_silence(&dst_channels[channel].area, 0, frames, plugin->dst_format.format); dst_channels[channel].enabled = 0; continue; } dst_channels[channel].enabled = 1; src = src_channels[channel].area.addr + src_channels[channel].area.first / 8; dst = dst_channels[channel].area.addr + dst_channels[channel].area.first / 8; src_step = src_channels[channel].area.step / 8; dst_step = dst_channels[channel].area.step / 8; frames1 = frames; while (frames1-- > 0) { do_convert(data, dst, src); src += src_step; dst += dst_step; } } } static snd_pcm_sframes_t linear_transfer(struct snd_pcm_plugin *plugin, const struct snd_pcm_plugin_channel *src_channels, struct snd_pcm_plugin_channel *dst_channels, snd_pcm_uframes_t frames) { if (snd_BUG_ON(!plugin || !src_channels || !dst_channels)) return -ENXIO; if (frames == 0) return 0; #ifdef CONFIG_SND_DEBUG { unsigned int channel; for (channel = 0; channel < plugin->src_format.channels; channel++) { if (snd_BUG_ON(src_channels[channel].area.first % 8 || src_channels[channel].area.step % 8)) return -ENXIO; if (snd_BUG_ON(dst_channels[channel].area.first % 8 || dst_channels[channel].area.step % 8)) return -ENXIO; } } #endif if (frames > dst_channels[0].frames) frames = dst_channels[0].frames; convert(plugin, src_channels, dst_channels, frames); return frames; } static void init_data(struct linear_priv *data, snd_pcm_format_t src_format, snd_pcm_format_t dst_format) { int src_le, dst_le, src_bytes, dst_bytes; src_bytes = snd_pcm_format_width(src_format) / 8; dst_bytes = snd_pcm_format_width(dst_format) / 8; src_le = snd_pcm_format_little_endian(src_format) > 0; dst_le = snd_pcm_format_little_endian(dst_format) > 0; data->dst_bytes = dst_bytes; data->cvt_endian = src_le != dst_le; data->copy_bytes = src_bytes < dst_bytes ? src_bytes : dst_bytes; if (src_le) { data->copy_ofs = 4 - data->copy_bytes; data->src_ofs = src_bytes - data->copy_bytes; } else data->src_ofs = snd_pcm_format_physical_width(src_format) / 8 - src_bytes; if (dst_le) data->dst_ofs = 4 - data->dst_bytes; else data->dst_ofs = snd_pcm_format_physical_width(dst_format) / 8 - dst_bytes; if (snd_pcm_format_signed(src_format) != snd_pcm_format_signed(dst_format)) { if (dst_le) data->flip = (__force u32)cpu_to_le32(0x80000000); else data->flip = (__force u32)cpu_to_be32(0x80000000); } } int snd_pcm_plugin_build_linear(struct snd_pcm_substream *plug, struct snd_pcm_plugin_format *src_format, struct snd_pcm_plugin_format *dst_format, struct snd_pcm_plugin **r_plugin) { int err; struct linear_priv *data; struct snd_pcm_plugin *plugin; if (snd_BUG_ON(!r_plugin)) return -ENXIO; *r_plugin = NULL; if (snd_BUG_ON(src_format->rate != dst_format->rate)) return -ENXIO; if (snd_BUG_ON(src_format->channels != dst_format->channels)) return -ENXIO; if (snd_BUG_ON(!snd_pcm_format_linear(src_format->format) || !snd_pcm_format_linear(dst_format->format))) return -ENXIO; err = snd_pcm_plugin_build(plug, "linear format conversion", src_format, dst_format, sizeof(struct linear_priv), &plugin); if (err < 0) return err; data = (struct linear_priv *)plugin->extra_data; init_data(data, src_format->format, dst_format->format); plugin->transfer = linear_transfer; *r_plugin = plugin; return 0; }
13 13 13 11 11 11 10 10 11 11 11 5 11 5 11 11 11 11 11 13 13 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 /* * Copyright (c) 2006 Oracle. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include <linux/highmem.h> #include <linux/gfp.h> #include <linux/cpu.h> #include <linux/export.h> #include "rds.h" struct rds_page_remainder { struct page *r_page; unsigned long r_offset; local_lock_t bh_lock; }; static DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; /** * rds_page_remainder_alloc - build up regions of a message. * * @scat: Scatter list for message * @bytes: the number of bytes needed. * @gfp: the waiting behaviour of the allocation * * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to * kmap the pages, etc. * * If @bytes is at least a full page then this just returns a page from * alloc_page(). * * If @bytes is a partial page then this stores the unused region of the * page in a per-cpu structure. Future partial-page allocations may be * satisfied from that cached region. This lets us waste less memory on * small allocations with minimal complexity. It works because the transmit * path passes read-only page regions down to devices. They hold a page * reference until they are done with the region. */ int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, gfp_t gfp) { struct rds_page_remainder *rem; struct page *page; int ret; gfp |= __GFP_HIGHMEM; /* jump straight to allocation if we're trying for a huge page */ if (bytes >= PAGE_SIZE) { page = alloc_page(gfp); if (!page) { ret = -ENOMEM; } else { sg_set_page(scat, page, PAGE_SIZE, 0); ret = 0; } goto out; } local_bh_disable(); local_lock_nested_bh(&rds_page_remainders.bh_lock); rem = this_cpu_ptr(&rds_page_remainders); while (1) { /* avoid a tiny region getting stuck by tossing it */ if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) { rds_stats_inc(s_page_remainder_miss); __free_page(rem->r_page); rem->r_page = NULL; } /* hand out a fragment from the cached page */ if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) { sg_set_page(scat, rem->r_page, bytes, rem->r_offset); get_page(sg_page(scat)); if (rem->r_offset != 0) rds_stats_inc(s_page_remainder_hit); rem->r_offset += ALIGN(bytes, 8); if (rem->r_offset >= PAGE_SIZE) { __free_page(rem->r_page); rem->r_page = NULL; } ret = 0; break; } /* alloc if there is nothing for us to use */ local_unlock_nested_bh(&rds_page_remainders.bh_lock); local_bh_enable(); page = alloc_page(gfp); local_bh_disable(); local_lock_nested_bh(&rds_page_remainders.bh_lock); rem = this_cpu_ptr(&rds_page_remainders); if (!page) { ret = -ENOMEM; break; } /* did someone race to fill the remainder before us? */ if (rem->r_page) { __free_page(page); continue; } /* otherwise install our page and loop around to alloc */ rem->r_page = page; rem->r_offset = 0; } local_unlock_nested_bh(&rds_page_remainders.bh_lock); local_bh_enable(); out: rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret, ret ? NULL : sg_page(scat), ret ? 0 : scat->offset, ret ? 0 : scat->length); return ret; } EXPORT_SYMBOL_GPL(rds_page_remainder_alloc); void rds_page_exit(void) { unsigned int cpu; for_each_possible_cpu(cpu) { struct rds_page_remainder *rem; rem = &per_cpu(rds_page_remainders, cpu); rdsdebug("cpu %u\n", cpu); if (rem->r_page) __free_page(rem->r_page); rem->r_page = NULL; } }
59 6 70 59 59 59 68 63 64 63 64 63 64 63 62 63 63 63 63 62 63 63 63 63 62 4 10 62 63 63 14 53 63 63 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 // SPDX-License-Identifier: GPL-2.0+ #include <linux/dma-fence.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_managed.h> #include <drm/drm_probe_helper.h> #include <drm/drm_vblank.h> #include "vkms_drv.h" static enum hrtimer_restart vkms_vblank_simulate(struct hrtimer *timer) { struct vkms_output *output = container_of(timer, struct vkms_output, vblank_hrtimer); struct drm_crtc *crtc = &output->crtc; struct vkms_crtc_state *state; u64 ret_overrun; bool ret, fence_cookie; fence_cookie = dma_fence_begin_signalling(); ret_overrun = hrtimer_forward_now(&output->vblank_hrtimer, output->period_ns); if (ret_overrun != 1) pr_warn("%s: vblank timer overrun\n", __func__); spin_lock(&output->lock); ret = drm_crtc_handle_vblank(crtc); if (!ret) DRM_ERROR("vkms failure on handling vblank"); state = output->composer_state; spin_unlock(&output->lock); if (state && output->composer_enabled) { u64 frame = drm_crtc_accurate_vblank_count(crtc); /* update frame_start only if a queued vkms_composer_worker() * has read the data */ spin_lock(&output->composer_lock); if (!state->crc_pending) state->frame_start = frame; else DRM_DEBUG_DRIVER("crc worker falling behind, frame_start: %llu, frame_end: %llu\n", state->frame_start, frame); state->frame_end = frame; state->crc_pending = true; spin_unlock(&output->composer_lock); ret = queue_work(output->composer_workq, &state->composer_work); if (!ret) DRM_DEBUG_DRIVER("Composer worker already queued\n"); } dma_fence_end_signalling(fence_cookie); return HRTIMER_RESTART; } static int vkms_enable_vblank(struct drm_crtc *crtc) { struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); struct vkms_output *out = drm_crtc_to_vkms_output(crtc); hrtimer_setup(&out->vblank_hrtimer, &vkms_vblank_simulate, CLOCK_MONOTONIC, HRTIMER_MODE_REL); out->period_ns = ktime_set(0, vblank->framedur_ns); hrtimer_start(&out->vblank_hrtimer, out->period_ns, HRTIMER_MODE_REL); return 0; } static void vkms_disable_vblank(struct drm_crtc *crtc) { struct vkms_output *out = drm_crtc_to_vkms_output(crtc); hrtimer_cancel(&out->vblank_hrtimer); } static bool vkms_get_vblank_timestamp(struct drm_crtc *crtc, int *max_error, ktime_t *vblank_time, bool in_vblank_irq) { struct vkms_output *output = drm_crtc_to_vkms_output(crtc); struct drm_vblank_crtc *vblank = drm_crtc_vblank_crtc(crtc); if (!READ_ONCE(vblank->enabled)) { *vblank_time = ktime_get(); return true; } *vblank_time = READ_ONCE(output->vblank_hrtimer.node.expires); if (WARN_ON(*vblank_time == vblank->time)) return true; /* * To prevent races we roll the hrtimer forward before we do any * interrupt processing - this is how real hw works (the interrupt is * only generated after all the vblank registers are updated) and what * the vblank core expects. Therefore we need to always correct the * timestampe by one frame. */ *vblank_time -= output->period_ns; return true; } static struct drm_crtc_state * vkms_atomic_crtc_duplicate_state(struct drm_crtc *crtc) { struct vkms_crtc_state *vkms_state; if (WARN_ON(!crtc->state)) return NULL; vkms_state = kzalloc(sizeof(*vkms_state), GFP_KERNEL); if (!vkms_state) return NULL; __drm_atomic_helper_crtc_duplicate_state(crtc, &vkms_state->base); INIT_WORK(&vkms_state->composer_work, vkms_composer_worker); return &vkms_state->base; } static void vkms_atomic_crtc_destroy_state(struct drm_crtc *crtc, struct drm_crtc_state *state) { struct vkms_crtc_state *vkms_state = to_vkms_crtc_state(state); __drm_atomic_helper_crtc_destroy_state(state); WARN_ON(work_pending(&vkms_state->composer_work)); kfree(vkms_state->active_planes); kfree(vkms_state); } static void vkms_atomic_crtc_reset(struct drm_crtc *crtc) { struct vkms_crtc_state *vkms_state = kzalloc(sizeof(*vkms_state), GFP_KERNEL); if (crtc->state) vkms_atomic_crtc_destroy_state(crtc, crtc->state); __drm_atomic_helper_crtc_reset(crtc, &vkms_state->base); if (vkms_state) INIT_WORK(&vkms_state->composer_work, vkms_composer_worker); } static const struct drm_crtc_funcs vkms_crtc_funcs = { .set_config = drm_atomic_helper_set_config, .page_flip = drm_atomic_helper_page_flip, .reset = vkms_atomic_crtc_reset, .atomic_duplicate_state = vkms_atomic_crtc_duplicate_state, .atomic_destroy_state = vkms_atomic_crtc_destroy_state, .enable_vblank = vkms_enable_vblank, .disable_vblank = vkms_disable_vblank, .get_vblank_timestamp = vkms_get_vblank_timestamp, .get_crc_sources = vkms_get_crc_sources, .set_crc_source = vkms_set_crc_source, .verify_crc_source = vkms_verify_crc_source, }; static int vkms_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state) { struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc); struct vkms_crtc_state *vkms_state = to_vkms_crtc_state(crtc_state); struct drm_plane *plane; struct drm_plane_state *plane_state; int i = 0, ret; if (vkms_state->active_planes) return 0; ret = drm_atomic_add_affected_planes(crtc_state->state, crtc); if (ret < 0) return ret; drm_for_each_plane_mask(plane, crtc->dev, crtc_state->plane_mask) { plane_state = drm_atomic_get_existing_plane_state(crtc_state->state, plane); WARN_ON(!plane_state); if (!plane_state->visible) continue; i++; } vkms_state->active_planes = kcalloc(i, sizeof(*vkms_state->active_planes), GFP_KERNEL); if (!vkms_state->active_planes) return -ENOMEM; vkms_state->num_active_planes = i; i = 0; drm_for_each_plane_mask(plane, crtc->dev, crtc_state->plane_mask) { plane_state = drm_atomic_get_existing_plane_state(crtc_state->state, plane); if (!plane_state->visible) continue; vkms_state->active_planes[i++] = to_vkms_plane_state(plane_state); } return 0; } static void vkms_crtc_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state) { drm_crtc_vblank_on(crtc); } static void vkms_crtc_atomic_disable(struct drm_crtc *crtc, struct drm_atomic_state *state) { drm_crtc_vblank_off(crtc); } static void vkms_crtc_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state) __acquires(&vkms_output->lock) { struct vkms_output *vkms_output = drm_crtc_to_vkms_output(crtc); /* This lock is held across the atomic commit to block vblank timer * from scheduling vkms_composer_worker until the composer is updated */ spin_lock_irq(&vkms_output->lock); } static void vkms_crtc_atomic_flush(struct drm_crtc *crtc, struct drm_atomic_state *state) __releases(&vkms_output->lock) { struct vkms_output *vkms_output = drm_crtc_to_vkms_output(crtc); if (crtc->state->event) { spin_lock(&crtc->dev->event_lock); if (drm_crtc_vblank_get(crtc) != 0) drm_crtc_send_vblank_event(crtc, crtc->state->event); else drm_crtc_arm_vblank_event(crtc, crtc->state->event); spin_unlock(&crtc->dev->event_lock); crtc->state->event = NULL; } vkms_output->composer_state = to_vkms_crtc_state(crtc->state); spin_unlock_irq(&vkms_output->lock); } static const struct drm_crtc_helper_funcs vkms_crtc_helper_funcs = { .atomic_check = vkms_crtc_atomic_check, .atomic_begin = vkms_crtc_atomic_begin, .atomic_flush = vkms_crtc_atomic_flush, .atomic_enable = vkms_crtc_atomic_enable, .atomic_disable = vkms_crtc_atomic_disable, }; struct vkms_output *vkms_crtc_init(struct drm_device *dev, struct drm_plane *primary, struct drm_plane *cursor) { struct vkms_output *vkms_out; struct drm_crtc *crtc; int ret; vkms_out = drmm_crtc_alloc_with_planes(dev, struct vkms_output, crtc, primary, cursor, &vkms_crtc_funcs, NULL); if (IS_ERR(vkms_out)) { DRM_DEV_ERROR(dev->dev, "Failed to init CRTC\n"); return vkms_out; } crtc = &vkms_out->crtc; drm_crtc_helper_add(crtc, &vkms_crtc_helper_funcs); ret = drm_mode_crtc_set_gamma_size(crtc, VKMS_LUT_SIZE); if (ret) { DRM_ERROR("Failed to set gamma size\n"); return ERR_PTR(ret); } drm_crtc_enable_color_mgmt(crtc, 0, false, VKMS_LUT_SIZE); spin_lock_init(&vkms_out->lock); spin_lock_init(&vkms_out->composer_lock); vkms_out->composer_workq = drmm_alloc_ordered_workqueue(dev, "vkms_composer", 0); if (IS_ERR(vkms_out->composer_workq)) return ERR_CAST(vkms_out->composer_workq); return vkms_out; }
13 10 10 10 10 10 10 10 10 10 10 10 14 14 14 14 14 10 10 14 14 14 14 14 14 14 13 14 7 7 7 7 7 349 350 350 350 350 350 350 350 350 350 389 55 69 55 374 323 374 51 34 17 17 51 368 365 367 367 366 364 365 367 367 366 367 367 367 367 366 367 367 365 51 315 316 6 6 367 368 366 366 365 367 367 366 366 366 366 367 366 364 365 365 364 366 367 366 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 1999 Eric Youngdale * Copyright (C) 2014 Christoph Hellwig * * SCSI queueing library. * Initial versions: Eric Youngdale (eric@andante.org). * Based upon conversations with large numbers * of people at Linux Expo. */ #include <linux/bio.h> #include <linux/bitops.h> #include <linux/blkdev.h> #include <linux/completion.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/init.h> #include <linux/pci.h> #include <linux/delay.h> #include <linux/hardirq.h> #include <linux/scatterlist.h> #include <linux/blk-mq.h> #include <linux/blk-integrity.h> #include <linux/ratelimit.h> #include <linux/unaligned.h> #include <scsi/scsi.h> #include <scsi/scsi_cmnd.h> #include <scsi/scsi_dbg.h> #include <scsi/scsi_device.h> #include <scsi/scsi_driver.h> #include <scsi/scsi_eh.h> #include <scsi/scsi_host.h> #include <scsi/scsi_transport.h> /* scsi_init_limits() */ #include <scsi/scsi_dh.h> #include <trace/events/scsi.h> #include "scsi_debugfs.h" #include "scsi_priv.h" #include "scsi_logging.h" /* * Size of integrity metadata is usually small, 1 inline sg should * cover normal cases. */ #ifdef CONFIG_ARCH_NO_SG_CHAIN #define SCSI_INLINE_PROT_SG_CNT 0 #define SCSI_INLINE_SG_CNT 0 #else #define SCSI_INLINE_PROT_SG_CNT 1 #define SCSI_INLINE_SG_CNT 2 #endif static struct kmem_cache *scsi_sense_cache; static DEFINE_MUTEX(scsi_sense_cache_mutex); static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd); int scsi_init_sense_cache(struct Scsi_Host *shost) { int ret = 0; mutex_lock(&scsi_sense_cache_mutex); if (!scsi_sense_cache) { scsi_sense_cache = kmem_cache_create_usercopy("scsi_sense_cache", SCSI_SENSE_BUFFERSIZE, 0, SLAB_HWCACHE_ALIGN, 0, SCSI_SENSE_BUFFERSIZE, NULL); if (!scsi_sense_cache) ret = -ENOMEM; } mutex_unlock(&scsi_sense_cache_mutex); return ret; } static void scsi_set_blocked(struct scsi_cmnd *cmd, int reason) { struct Scsi_Host *host = cmd->device->host; struct scsi_device *device = cmd->device; struct scsi_target *starget = scsi_target(device); /* * Set the appropriate busy bit for the device/host. * * If the host/device isn't busy, assume that something actually * completed, and that we should be able to queue a command now. * * Note that the prior mid-layer assumption that any host could * always queue at least one command is now broken. The mid-layer * will implement a user specifiable stall (see * scsi_host.max_host_blocked and scsi_device.max_device_blocked) * if a command is requeued with no other commands outstanding * either for the device or for the host. */ switch (reason) { case SCSI_MLQUEUE_HOST_BUSY: atomic_set(&host->host_blocked, host->max_host_blocked); break; case SCSI_MLQUEUE_DEVICE_BUSY: case SCSI_MLQUEUE_EH_RETRY: atomic_set(&device->device_blocked, device->max_device_blocked); break; case SCSI_MLQUEUE_TARGET_BUSY: atomic_set(&starget->target_blocked, starget->max_target_blocked); break; } } static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long msecs) { struct request *rq = scsi_cmd_to_rq(cmd); if (rq->rq_flags & RQF_DONTPREP) { rq->rq_flags &= ~RQF_DONTPREP; scsi_mq_uninit_cmd(cmd); } else { WARN_ON_ONCE(true); } blk_mq_requeue_request(rq, false); if (!scsi_host_in_recovery(cmd->device->host)) blk_mq_delay_kick_requeue_list(rq->q, msecs); } /** * __scsi_queue_insert - private queue insertion * @cmd: The SCSI command being requeued * @reason: The reason for the requeue * @unbusy: Whether the queue should be unbusied * * This is a private queue insertion. The public interface * scsi_queue_insert() always assumes the queue should be unbusied * because it's always called before the completion. This function is * for a requeue after completion, which should only occur in this * file. */ static void __scsi_queue_insert(struct scsi_cmnd *cmd, int reason, bool unbusy) { struct scsi_device *device = cmd->device; SCSI_LOG_MLQUEUE(1, scmd_printk(KERN_INFO, cmd, "Inserting command %p into mlqueue\n", cmd)); scsi_set_blocked(cmd, reason); /* * Decrement the counters, since these commands are no longer * active on the host/device. */ if (unbusy) scsi_device_unbusy(device, cmd); /* * Requeue this command. It will go before all other commands * that are already in the queue. Schedule requeue work under * lock such that the kblockd_schedule_work() call happens * before blk_mq_destroy_queue() finishes. */ cmd->result = 0; blk_mq_requeue_request(scsi_cmd_to_rq(cmd), !scsi_host_in_recovery(cmd->device->host)); } /** * scsi_queue_insert - Reinsert a command in the queue. * @cmd: command that we are adding to queue. * @reason: why we are inserting command to queue. * * We do this for one of two cases. Either the host is busy and it cannot accept * any more commands for the time being, or the device returned QUEUE_FULL and * can accept no more commands. * * Context: This could be called either from an interrupt context or a normal * process context. */ void scsi_queue_insert(struct scsi_cmnd *cmd, int reason) { __scsi_queue_insert(cmd, reason, true); } /** * scsi_failures_reset_retries - reset all failures to zero * @failures: &struct scsi_failures with specific failure modes set */ void scsi_failures_reset_retries(struct scsi_failures *failures) { struct scsi_failure *failure; failures->total_retries = 0; for (failure = failures->failure_definitions; failure->result; failure++) failure->retries = 0; } EXPORT_SYMBOL_GPL(scsi_failures_reset_retries); /** * scsi_check_passthrough - Determine if passthrough scsi_cmnd needs a retry. * @scmd: scsi_cmnd to check. * @failures: scsi_failures struct that lists failures to check for. * * Returns -EAGAIN if the caller should retry else 0. */ static int scsi_check_passthrough(struct scsi_cmnd *scmd, struct scsi_failures *failures) { struct scsi_failure *failure; struct scsi_sense_hdr sshdr; enum sam_status status; if (!scmd->result) return 0; if (!failures) return 0; for (failure = failures->failure_definitions; failure->result; failure++) { if (failure->result == SCMD_FAILURE_RESULT_ANY) goto maybe_retry; if (host_byte(scmd->result) && host_byte(scmd->result) == host_byte(failure->result)) goto maybe_retry; status = status_byte(scmd->result); if (!status) continue; if (failure->result == SCMD_FAILURE_STAT_ANY && !scsi_status_is_good(scmd->result)) goto maybe_retry; if (status != status_byte(failure->result)) continue; if (status_byte(failure->result) != SAM_STAT_CHECK_CONDITION || failure->sense == SCMD_FAILURE_SENSE_ANY) goto maybe_retry; if (!scsi_command_normalize_sense(scmd, &sshdr)) return 0; if (failure->sense != sshdr.sense_key) continue; if (failure->asc == SCMD_FAILURE_ASC_ANY) goto maybe_retry; if (failure->asc != sshdr.asc) continue; if (failure->ascq == SCMD_FAILURE_ASCQ_ANY || failure->ascq == sshdr.ascq) goto maybe_retry; } return 0; maybe_retry: if (failure->allowed) { if (failure->allowed == SCMD_FAILURE_NO_LIMIT || ++failure->retries <= failure->allowed) return -EAGAIN; } else { if (failures->total_allowed == SCMD_FAILURE_NO_LIMIT || ++failures->total_retries <= failures->total_allowed) return -EAGAIN; } return 0; } /** * scsi_execute_cmd - insert request and wait for the result * @sdev: scsi_device * @cmd: scsi command * @opf: block layer request cmd_flags * @buffer: data buffer * @bufflen: len of buffer * @timeout: request timeout in HZ * @ml_retries: number of times SCSI midlayer will retry request * @args: Optional args. See struct definition for field descriptions * * Returns the scsi_cmnd result field if a command was executed, or a negative * Linux error code if we didn't get that far. */ int scsi_execute_cmd(struct scsi_device *sdev, const unsigned char *cmd, blk_opf_t opf, void *buffer, unsigned int bufflen, int timeout, int ml_retries, const struct scsi_exec_args *args) { static const struct scsi_exec_args default_args; struct request *req; struct scsi_cmnd *scmd; int ret; if (!args) args = &default_args; else if (WARN_ON_ONCE(args->sense && args->sense_len != SCSI_SENSE_BUFFERSIZE)) return -EINVAL; retry: req = scsi_alloc_request(sdev->request_queue, opf, args->req_flags); if (IS_ERR(req)) return PTR_ERR(req); if (bufflen) { ret = blk_rq_map_kern(req, buffer, bufflen, GFP_NOIO); if (ret) goto out; } scmd = blk_mq_rq_to_pdu(req); scmd->cmd_len = COMMAND_SIZE(cmd[0]); memcpy(scmd->cmnd, cmd, scmd->cmd_len); scmd->allowed = ml_retries; scmd->flags |= args->scmd_flags; req->timeout = timeout; req->rq_flags |= RQF_QUIET; /* * head injection *required* here otherwise quiesce won't work */ blk_execute_rq(req, true); if (scsi_check_passthrough(scmd, args->failures) == -EAGAIN) { blk_mq_free_request(req); goto retry; } /* * Some devices (USB mass-storage in particular) may transfer * garbage data together with a residue indicating that the data * is invalid. Prevent the garbage from being misinterpreted * and prevent security leaks by zeroing out the excess data. */ if (unlikely(scmd->resid_len > 0 && scmd->resid_len <= bufflen)) memset(buffer + bufflen - scmd->resid_len, 0, scmd->resid_len); if (args->resid) *args->resid = scmd->resid_len; if (args->sense) memcpy(args->sense, scmd->sense_buffer, SCSI_SENSE_BUFFERSIZE); if (args->sshdr) scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len, args->sshdr); ret = scmd->result; out: blk_mq_free_request(req); return ret; } EXPORT_SYMBOL(scsi_execute_cmd); /* * Wake up the error handler if necessary. Avoid as follows that the error * handler is not woken up if host in-flight requests number == * shost->host_failed: use call_rcu() in scsi_eh_scmd_add() in combination * with an RCU read lock in this function to ensure that this function in * its entirety either finishes before scsi_eh_scmd_add() increases the * host_failed counter or that it notices the shost state change made by * scsi_eh_scmd_add(). */ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) { unsigned long flags; rcu_read_lock(); __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state); if (unlikely(scsi_host_in_recovery(shost))) { unsigned int busy = scsi_host_busy(shost); spin_lock_irqsave(shost->host_lock, flags); if (shost->host_failed || shost->host_eh_scheduled) scsi_eh_wakeup(shost, busy); spin_unlock_irqrestore(shost->host_lock, flags); } rcu_read_unlock(); } void scsi_device_unbusy(struct scsi_device *sdev, struct scsi_cmnd *cmd) { struct Scsi_Host *shost = sdev->host; struct scsi_target *starget = scsi_target(sdev); scsi_dec_host_busy(shost, cmd); if (starget->can_queue > 0) atomic_dec(&starget->target_busy); sbitmap_put(&sdev->budget_map, cmd->budget_token); cmd->budget_token = -1; } /* * Kick the queue of SCSI device @sdev if @sdev != current_sdev. Called with * interrupts disabled. */ static void scsi_kick_sdev_queue(struct scsi_device *sdev, void *data) { struct scsi_device *current_sdev = data; if (sdev != current_sdev) blk_mq_run_hw_queues(sdev->request_queue, true); } /* * Called for single_lun devices on IO completion. Clear starget_sdev_user, * and call blk_run_queue for all the scsi_devices on the target - * including current_sdev first. * * Called with *no* scsi locks held. */ static void scsi_single_lun_run(struct scsi_device *current_sdev) { struct Scsi_Host *shost = current_sdev->host; struct scsi_target *starget = scsi_target(current_sdev); unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); starget->starget_sdev_user = NULL; spin_unlock_irqrestore(shost->host_lock, flags); /* * Call blk_run_queue for all LUNs on the target, starting with * current_sdev. We race with others (to set starget_sdev_user), * but in most cases, we will be first. Ideally, each LU on the * target would get some limited time or requests on the target. */ blk_mq_run_hw_queues(current_sdev->request_queue, shost->queuecommand_may_block); spin_lock_irqsave(shost->host_lock, flags); if (!starget->starget_sdev_user) __starget_for_each_device(starget, current_sdev, scsi_kick_sdev_queue); spin_unlock_irqrestore(shost->host_lock, flags); } static inline bool scsi_device_is_busy(struct scsi_device *sdev) { if (scsi_device_busy(sdev) >= sdev->queue_depth) return true; if (atomic_read(&sdev->device_blocked) > 0) return true; return false; } static inline bool scsi_target_is_busy(struct scsi_target *starget) { if (starget->can_queue > 0) { if (atomic_read(&starget->target_busy) >= starget->can_queue) return true; if (atomic_read(&starget->target_blocked) > 0) return true; } return false; } static inline bool scsi_host_is_busy(struct Scsi_Host *shost) { if (atomic_read(&shost->host_blocked) > 0) return true; if (shost->host_self_blocked) return true; return false; } static void scsi_starved_list_run(struct Scsi_Host *shost) { LIST_HEAD(starved_list); struct scsi_device *sdev; unsigned long flags; spin_lock_irqsave(shost->host_lock, flags); list_splice_init(&shost->starved_list, &starved_list); while (!list_empty(&starved_list)) { struct request_queue *slq; /* * As long as shost is accepting commands and we have * starved queues, call blk_run_queue. scsi_request_fn * drops the queue_lock and can add us back to the * starved_list. * * host_lock protects the starved_list and starved_entry. * scsi_request_fn must get the host_lock before checking * or modifying starved_list or starved_entry. */ if (scsi_host_is_busy(shost)) break; sdev = list_entry(starved_list.next, struct scsi_device, starved_entry); list_del_init(&sdev->starved_entry); if (scsi_target_is_busy(scsi_target(sdev))) { list_move_tail(&sdev->starved_entry, &shost->starved_list); continue; } /* * Once we drop the host lock, a racing scsi_remove_device() * call may remove the sdev from the starved list and destroy * it and the queue. Mitigate by taking a reference to the * queue and never touching the sdev again after we drop the * host lock. Note: if __scsi_remove_device() invokes * blk_mq_destroy_queue() before the queue is run from this * function then blk_run_queue() will return immediately since * blk_mq_destroy_queue() marks the queue with QUEUE_FLAG_DYING. */ slq = sdev->request_queue; if (!blk_get_queue(slq)) continue; spin_unlock_irqrestore(shost->host_lock, flags); blk_mq_run_hw_queues(slq, false); blk_put_queue(slq); spin_lock_irqsave(shost->host_lock, flags); } /* put any unprocessed entries back */ list_splice(&starved_list, &shost->starved_list); spin_unlock_irqrestore(shost->host_lock, flags); } /** * scsi_run_queue - Select a proper request queue to serve next. * @q: last request's queue * * The previous command was completely finished, start a new one if possible. */ static void scsi_run_queue(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; if (scsi_target(sdev)->single_lun) scsi_single_lun_run(sdev); if (!list_empty(&sdev->host->starved_list)) scsi_starved_list_run(sdev->host); /* Note: blk_mq_kick_requeue_list() runs the queue asynchronously. */ blk_mq_kick_requeue_list(q); } void scsi_requeue_run_queue(struct work_struct *work) { struct scsi_device *sdev; struct request_queue *q; sdev = container_of(work, struct scsi_device, requeue_work); q = sdev->request_queue; scsi_run_queue(q); } void scsi_run_host_queues(struct Scsi_Host *shost) { struct scsi_device *sdev; shost_for_each_device(sdev, shost) scsi_run_queue(sdev->request_queue); } static void scsi_uninit_cmd(struct scsi_cmnd *cmd) { if (!blk_rq_is_passthrough(scsi_cmd_to_rq(cmd))) { struct scsi_driver *drv = scsi_cmd_to_driver(cmd); if (drv->uninit_command) drv->uninit_command(cmd); } } void scsi_free_sgtables(struct scsi_cmnd *cmd) { if (cmd->sdb.table.nents) sg_free_table_chained(&cmd->sdb.table, SCSI_INLINE_SG_CNT); if (scsi_prot_sg_count(cmd)) sg_free_table_chained(&cmd->prot_sdb->table, SCSI_INLINE_PROT_SG_CNT); } EXPORT_SYMBOL_GPL(scsi_free_sgtables); static void scsi_mq_uninit_cmd(struct scsi_cmnd *cmd) { scsi_free_sgtables(cmd); scsi_uninit_cmd(cmd); } static void scsi_run_queue_async(struct scsi_device *sdev) { if (scsi_host_in_recovery(sdev->host)) return; if (scsi_target(sdev)->single_lun || !list_empty(&sdev->host->starved_list)) { kblockd_schedule_work(&sdev->requeue_work); } else { /* * smp_mb() present in sbitmap_queue_clear() or implied in * .end_io is for ordering writing .device_busy in * scsi_device_unbusy() and reading sdev->restarts. */ int old = atomic_read(&sdev->restarts); /* * ->restarts has to be kept as non-zero if new budget * contention occurs. * * No need to run queue when either another re-run * queue wins in updating ->restarts or a new budget * contention occurs. */ if (old && atomic_cmpxchg(&sdev->restarts, old, 0) == old) blk_mq_run_hw_queues(sdev->request_queue, true); } } /* Returns false when no more bytes to process, true if there are more */ static bool scsi_end_request(struct request *req, blk_status_t error, unsigned int bytes) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); struct scsi_device *sdev = cmd->device; struct request_queue *q = sdev->request_queue; if (blk_update_request(req, error, bytes)) return true; if (q->limits.features & BLK_FEAT_ADD_RANDOM) add_disk_randomness(req->q->disk); WARN_ON_ONCE(!blk_rq_is_passthrough(req) && !(cmd->flags & SCMD_INITIALIZED)); cmd->flags = 0; /* * Calling rcu_barrier() is not necessary here because the * SCSI error handler guarantees that the function called by * call_rcu() has been called before scsi_end_request() is * called. */ destroy_rcu_head(&cmd->rcu); /* * In the MQ case the command gets freed by __blk_mq_end_request, * so we have to do all cleanup that depends on it earlier. * * We also can't kick the queues from irq context, so we * will have to defer it to a workqueue. */ scsi_mq_uninit_cmd(cmd); /* * queue is still alive, so grab the ref for preventing it * from being cleaned up during running queue. */ percpu_ref_get(&q->q_usage_counter); __blk_mq_end_request(req, error); scsi_run_queue_async(sdev); percpu_ref_put(&q->q_usage_counter); return false; } /** * scsi_result_to_blk_status - translate a SCSI result code into blk_status_t * @result: scsi error code * * Translate a SCSI result code into a blk_status_t value. */ static blk_status_t scsi_result_to_blk_status(int result) { /* * Check the scsi-ml byte first in case we converted a host or status * byte. */ switch (scsi_ml_byte(result)) { case SCSIML_STAT_OK: break; case SCSIML_STAT_RESV_CONFLICT: return BLK_STS_RESV_CONFLICT; case SCSIML_STAT_NOSPC: return BLK_STS_NOSPC; case SCSIML_STAT_MED_ERROR: return BLK_STS_MEDIUM; case SCSIML_STAT_TGT_FAILURE: return BLK_STS_TARGET; case SCSIML_STAT_DL_TIMEOUT: return BLK_STS_DURATION_LIMIT; } switch (host_byte(result)) { case DID_OK: if (scsi_status_is_good(result)) return BLK_STS_OK; return BLK_STS_IOERR; case DID_TRANSPORT_FAILFAST: case DID_TRANSPORT_MARGINAL: return BLK_STS_TRANSPORT; default: return BLK_STS_IOERR; } } /** * scsi_rq_err_bytes - determine number of bytes till the next failure boundary * @rq: request to examine * * Description: * A request could be merge of IOs which require different failure * handling. This function determines the number of bytes which * can be failed from the beginning of the request without * crossing into area which need to be retried further. * * Return: * The number of bytes to fail. */ static unsigned int scsi_rq_err_bytes(const struct request *rq) { blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK; unsigned int bytes = 0; struct bio *bio; if (!(rq->rq_flags & RQF_MIXED_MERGE)) return blk_rq_bytes(rq); /* * Currently the only 'mixing' which can happen is between * different fastfail types. We can safely fail portions * which have all the failfast bits that the first one has - * the ones which are at least as eager to fail as the first * one. */ for (bio = rq->bio; bio; bio = bio->bi_next) { if ((bio->bi_opf & ff) != ff) break; bytes += bio->bi_iter.bi_size; } /* this could lead to infinite loop */ BUG_ON(blk_rq_bytes(rq) && !bytes); return bytes; } static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd) { struct request *req = scsi_cmd_to_rq(cmd); unsigned long wait_for; if (cmd->allowed == SCSI_CMD_RETRIES_NO_LIMIT) return false; wait_for = (cmd->allowed + 1) * req->timeout; if (time_before(cmd->jiffies_at_alloc + wait_for, jiffies)) { scmd_printk(KERN_ERR, cmd, "timing out command, waited %lus\n", wait_for/HZ); return true; } return false; } /* * When ALUA transition state is returned, reprep the cmd to * use the ALUA handler's transition timeout. Delay the reprep * 1 sec to avoid aggressive retries of the target in that * state. */ #define ALUA_TRANSITION_REPREP_DELAY 1000 /* Helper for scsi_io_completion() when special action required. */ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result) { struct request *req = scsi_cmd_to_rq(cmd); int level = 0; enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP, ACTION_RETRY, ACTION_DELAYED_RETRY} action; struct scsi_sense_hdr sshdr; bool sense_valid; bool sense_current = true; /* false implies "deferred sense" */ blk_status_t blk_stat; sense_valid = scsi_command_normalize_sense(cmd, &sshdr); if (sense_valid) sense_current = !scsi_sense_is_deferred(&sshdr); blk_stat = scsi_result_to_blk_status(result); if (host_byte(result) == DID_RESET) { /* Third party bus reset or reset for error recovery * reasons. Just retry the command and see what * happens. */ action = ACTION_RETRY; } else if (sense_valid && sense_current) { switch (sshdr.sense_key) { case UNIT_ATTENTION: if (cmd->device->removable) { /* Detected disc change. Set a bit * and quietly refuse further access. */ cmd->device->changed = 1; action = ACTION_FAIL; } else { /* Must have been a power glitch, or a * bus reset. Could not have been a * media change, so we just retry the * command and see what happens. */ action = ACTION_RETRY; } break; case ILLEGAL_REQUEST: /* If we had an ILLEGAL REQUEST returned, then * we may have performed an unsupported * command. The only thing this should be * would be a ten byte read where only a six * byte read was supported. Also, on a system * where READ CAPACITY failed, we may have * read past the end of the disk. */ if ((cmd->device->use_10_for_rw && sshdr.asc == 0x20 && sshdr.ascq == 0x00) && (cmd->cmnd[0] == READ_10 || cmd->cmnd[0] == WRITE_10)) { /* This will issue a new 6-byte command. */ cmd->device->use_10_for_rw = 0; action = ACTION_REPREP; } else if (sshdr.asc == 0x10) /* DIX */ { action = ACTION_FAIL; blk_stat = BLK_STS_PROTECTION; /* INVALID COMMAND OPCODE or INVALID FIELD IN CDB */ } else if (sshdr.asc == 0x20 || sshdr.asc == 0x24) { action = ACTION_FAIL; blk_stat = BLK_STS_TARGET; } else action = ACTION_FAIL; break; case ABORTED_COMMAND: action = ACTION_FAIL; if (sshdr.asc == 0x10) /* DIF */ blk_stat = BLK_STS_PROTECTION; break; case NOT_READY: /* If the device is in the process of becoming * ready, or has a temporary blockage, retry. */ if (sshdr.asc == 0x04) { switch (sshdr.ascq) { case 0x01: /* becoming ready */ case 0x04: /* format in progress */ case 0x05: /* rebuild in progress */ case 0x06: /* recalculation in progress */ case 0x07: /* operation in progress */ case 0x08: /* Long write in progress */ case 0x09: /* self test in progress */ case 0x11: /* notify (enable spinup) required */ case 0x14: /* space allocation in progress */ case 0x1a: /* start stop unit in progress */ case 0x1b: /* sanitize in progress */ case 0x1d: /* configuration in progress */ action = ACTION_DELAYED_RETRY; break; case 0x0a: /* ALUA state transition */ action = ACTION_DELAYED_REPREP; break; /* * Depopulation might take many hours, * thus it is not worthwhile to retry. */ case 0x24: /* depopulation in progress */ case 0x25: /* depopulation restore in progress */ fallthrough; default: action = ACTION_FAIL; break; } } else action = ACTION_FAIL; break; case VOLUME_OVERFLOW: /* See SSC3rXX or current. */ action = ACTION_FAIL; break; case DATA_PROTECT: action = ACTION_FAIL; if ((sshdr.asc == 0x0C && sshdr.ascq == 0x12) || (sshdr.asc == 0x55 && (sshdr.ascq == 0x0E || sshdr.ascq == 0x0F))) { /* Insufficient zone resources */ blk_stat = BLK_STS_ZONE_OPEN_RESOURCE; } break; case COMPLETED: fallthrough; default: action = ACTION_FAIL; break; } } else action = ACTION_FAIL; if (action != ACTION_FAIL && scsi_cmd_runtime_exceeced(cmd)) action = ACTION_FAIL; switch (action) { case ACTION_FAIL: /* Give up and fail the remainder of the request */ if (!(req->rq_flags & RQF_QUIET)) { static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); if (unlikely(scsi_logging_level)) level = SCSI_LOG_LEVEL(SCSI_LOG_MLCOMPLETE_SHIFT, SCSI_LOG_MLCOMPLETE_BITS); /* * if logging is enabled the failure will be printed * in scsi_log_completion(), so avoid duplicate messages */ if (!level && __ratelimit(&_rs)) { scsi_print_result(cmd, NULL, FAILED); if (sense_valid) scsi_print_sense(cmd); scsi_print_command(cmd); } } if (!scsi_end_request(req, blk_stat, scsi_rq_err_bytes(req))) return; fallthrough; case ACTION_REPREP: scsi_mq_requeue_cmd(cmd, 0); break; case ACTION_DELAYED_REPREP: scsi_mq_requeue_cmd(cmd, ALUA_TRANSITION_REPREP_DELAY); break; case ACTION_RETRY: /* Retry the same command immediately */ __scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY, false); break; case ACTION_DELAYED_RETRY: /* Retry the same command after a delay */ __scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY, false); break; } } /* * Helper for scsi_io_completion() when cmd->result is non-zero. Returns a * new result that may suppress further error checking. Also modifies * *blk_statp in some cases. */ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result, blk_status_t *blk_statp) { bool sense_valid; bool sense_current = true; /* false implies "deferred sense" */ struct request *req = scsi_cmd_to_rq(cmd); struct scsi_sense_hdr sshdr; sense_valid = scsi_command_normalize_sense(cmd, &sshdr); if (sense_valid) sense_current = !scsi_sense_is_deferred(&sshdr); if (blk_rq_is_passthrough(req)) { if (sense_valid) { /* * SG_IO wants current and deferred errors */ cmd->sense_len = min(8 + cmd->sense_buffer[7], SCSI_SENSE_BUFFERSIZE); } if (sense_current) *blk_statp = scsi_result_to_blk_status(result); } else if (blk_rq_bytes(req) == 0 && sense_current) { /* * Flush commands do not transfers any data, and thus cannot use * good_bytes != blk_rq_bytes(req) as the signal for an error. * This sets *blk_statp explicitly for the problem case. */ *blk_statp = scsi_result_to_blk_status(result); } /* * Recovered errors need reporting, but they're always treated as * success, so fiddle the result code here. For passthrough requests * we already took a copy of the original into sreq->result which * is what gets returned to the user */ if (sense_valid && (sshdr.sense_key == RECOVERED_ERROR)) { bool do_print = true; /* * if ATA PASS-THROUGH INFORMATION AVAILABLE [0x0, 0x1d] * skip print since caller wants ATA registers. Only occurs * on SCSI ATA PASS_THROUGH commands when CK_COND=1 */ if ((sshdr.asc == 0x0) && (sshdr.ascq == 0x1d)) do_print = false; else if (req->rq_flags & RQF_QUIET) do_print = false; if (do_print) scsi_print_sense(cmd); result = 0; /* for passthrough, *blk_statp may be set */ *blk_statp = BLK_STS_OK; } /* * Another corner case: the SCSI status byte is non-zero but 'good'. * Example: PRE-FETCH command returns SAM_STAT_CONDITION_MET when * it is able to fit nominated LBs in its cache (and SAM_STAT_GOOD * if it can't fit). Treat SAM_STAT_CONDITION_MET and the related * intermediate statuses (both obsolete in SAM-4) as good. */ if ((result & 0xff) && scsi_status_is_good(result)) { result = 0; *blk_statp = BLK_STS_OK; } return result; } /** * scsi_io_completion - Completion processing for SCSI commands. * @cmd: command that is finished. * @good_bytes: number of processed bytes. * * We will finish off the specified number of sectors. If we are done, the * command block will be released and the queue function will be goosed. If we * are not done then we have to figure out what to do next: * * a) We can call scsi_mq_requeue_cmd(). The request will be * unprepared and put back on the queue. Then a new command will * be created for it. This should be used if we made forward * progress, or if we want to switch from READ(10) to READ(6) for * example. * * b) We can call scsi_io_completion_action(). The request will be * put back on the queue and retried using the same command as * before, possibly after a delay. * * c) We can call scsi_end_request() with blk_stat other than * BLK_STS_OK, to fail the remainder of the request. */ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) { int result = cmd->result; struct request *req = scsi_cmd_to_rq(cmd); blk_status_t blk_stat = BLK_STS_OK; if (unlikely(result)) /* a nz result may or may not be an error */ result = scsi_io_completion_nz_result(cmd, result, &blk_stat); /* * Next deal with any sectors which we were able to correctly * handle. */ SCSI_LOG_HLCOMPLETE(1, scmd_printk(KERN_INFO, cmd, "%u sectors total, %d bytes done.\n", blk_rq_sectors(req), good_bytes)); /* * Failed, zero length commands always need to drop down * to retry code. Fast path should return in this block. */ if (likely(blk_rq_bytes(req) > 0 || blk_stat == BLK_STS_OK)) { if (likely(!scsi_end_request(req, blk_stat, good_bytes))) return; /* no bytes remaining */ } /* Kill remainder if no retries. */ if (unlikely(blk_stat && scsi_noretry_cmd(cmd))) { if (scsi_end_request(req, blk_stat, blk_rq_bytes(req))) WARN_ONCE(true, "Bytes remaining after failed, no-retry command"); return; } /* * If there had been no error, but we have leftover bytes in the * request just queue the command up again. */ if (likely(result == 0)) scsi_mq_requeue_cmd(cmd, 0); else scsi_io_completion_action(cmd, result); } static inline bool scsi_cmd_needs_dma_drain(struct scsi_device *sdev, struct request *rq) { return sdev->dma_drain_len && blk_rq_is_passthrough(rq) && !op_is_write(req_op(rq)) && sdev->host->hostt->dma_need_drain(rq); } /** * scsi_alloc_sgtables - Allocate and initialize data and integrity scatterlists * @cmd: SCSI command data structure to initialize. * * Initializes @cmd->sdb and also @cmd->prot_sdb if data integrity is enabled * for @cmd. * * Returns: * * BLK_STS_OK - on success * * BLK_STS_RESOURCE - if the failure is retryable * * BLK_STS_IOERR - if the failure is fatal */ blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd) { struct scsi_device *sdev = cmd->device; struct request *rq = scsi_cmd_to_rq(cmd); unsigned short nr_segs = blk_rq_nr_phys_segments(rq); struct scatterlist *last_sg = NULL; blk_status_t ret; bool need_drain = scsi_cmd_needs_dma_drain(sdev, rq); int count; if (WARN_ON_ONCE(!nr_segs)) return BLK_STS_IOERR; /* * Make sure there is space for the drain. The driver must adjust * max_hw_segments to be prepared for this. */ if (need_drain) nr_segs++; /* * If sg table allocation fails, requeue request later. */ if (unlikely(sg_alloc_table_chained(&cmd->sdb.table, nr_segs, cmd->sdb.table.sgl, SCSI_INLINE_SG_CNT))) return BLK_STS_RESOURCE; /* * Next, walk the list, and fill in the addresses and sizes of * each segment. */ count = __blk_rq_map_sg(rq, cmd->sdb.table.sgl, &last_sg); if (blk_rq_bytes(rq) & rq->q->limits.dma_pad_mask) { unsigned int pad_len = (rq->q->limits.dma_pad_mask & ~blk_rq_bytes(rq)) + 1; last_sg->length += pad_len; cmd->extra_len += pad_len; } if (need_drain) { sg_unmark_end(last_sg); last_sg = sg_next(last_sg); sg_set_buf(last_sg, sdev->dma_drain_buf, sdev->dma_drain_len); sg_mark_end(last_sg); cmd->extra_len += sdev->dma_drain_len; count++; } BUG_ON(count > cmd->sdb.table.nents); cmd->sdb.table.nents = count; cmd->sdb.length = blk_rq_payload_bytes(rq); if (blk_integrity_rq(rq)) { struct scsi_data_buffer *prot_sdb = cmd->prot_sdb; if (WARN_ON_ONCE(!prot_sdb)) { /* * This can happen if someone (e.g. multipath) * queues a command to a device on an adapter * that does not support DIX. */ ret = BLK_STS_IOERR; goto out_free_sgtables; } if (sg_alloc_table_chained(&prot_sdb->table, rq->nr_integrity_segments, prot_sdb->table.sgl, SCSI_INLINE_PROT_SG_CNT)) { ret = BLK_STS_RESOURCE; goto out_free_sgtables; } count = blk_rq_map_integrity_sg(rq, prot_sdb->table.sgl); cmd->prot_sdb = prot_sdb; cmd->prot_sdb->table.nents = count; } return BLK_STS_OK; out_free_sgtables: scsi_free_sgtables(cmd); return ret; } EXPORT_SYMBOL(scsi_alloc_sgtables); /** * scsi_initialize_rq - initialize struct scsi_cmnd partially * @rq: Request associated with the SCSI command to be initialized. * * This function initializes the members of struct scsi_cmnd that must be * initialized before request processing starts and that won't be * reinitialized if a SCSI command is requeued. */ static void scsi_initialize_rq(struct request *rq) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); memset(cmd->cmnd, 0, sizeof(cmd->cmnd)); cmd->cmd_len = MAX_COMMAND_SIZE; cmd->sense_len = 0; init_rcu_head(&cmd->rcu); cmd->jiffies_at_alloc = jiffies; cmd->retries = 0; } /** * scsi_alloc_request - allocate a block request and partially * initialize its &scsi_cmnd * @q: the device's request queue * @opf: the request operation code * @flags: block layer allocation flags * * Return: &struct request pointer on success or %NULL on failure */ struct request *scsi_alloc_request(struct request_queue *q, blk_opf_t opf, blk_mq_req_flags_t flags) { struct request *rq; rq = blk_mq_alloc_request(q, opf, flags); if (!IS_ERR(rq)) scsi_initialize_rq(rq); return rq; } EXPORT_SYMBOL_GPL(scsi_alloc_request); /* * Only called when the request isn't completed by SCSI, and not freed by * SCSI */ static void scsi_cleanup_rq(struct request *rq) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); cmd->flags = 0; if (rq->rq_flags & RQF_DONTPREP) { scsi_mq_uninit_cmd(cmd); rq->rq_flags &= ~RQF_DONTPREP; } } /* Called before a request is prepared. See also scsi_mq_prep_fn(). */ void scsi_init_command(struct scsi_device *dev, struct scsi_cmnd *cmd) { struct request *rq = scsi_cmd_to_rq(cmd); if (!blk_rq_is_passthrough(rq) && !(cmd->flags & SCMD_INITIALIZED)) { cmd->flags |= SCMD_INITIALIZED; scsi_initialize_rq(rq); } cmd->device = dev; INIT_LIST_HEAD(&cmd->eh_entry); INIT_DELAYED_WORK(&cmd->abort_work, scmd_eh_abort_handler); } static blk_status_t scsi_setup_scsi_cmnd(struct scsi_device *sdev, struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); /* * Passthrough requests may transfer data, in which case they must * a bio attached to them. Or they might contain a SCSI command * that does not transfer data, in which case they may optionally * submit a request without an attached bio. */ if (req->bio) { blk_status_t ret = scsi_alloc_sgtables(cmd); if (unlikely(ret != BLK_STS_OK)) return ret; } else { BUG_ON(blk_rq_bytes(req)); memset(&cmd->sdb, 0, sizeof(cmd->sdb)); } cmd->transfersize = blk_rq_bytes(req); return BLK_STS_OK; } static blk_status_t scsi_device_state_check(struct scsi_device *sdev, struct request *req) { switch (sdev->sdev_state) { case SDEV_CREATED: return BLK_STS_OK; case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: /* * If the device is offline we refuse to process any * commands. The device must be brought online * before trying any recovery commands. */ if (!sdev->offline_already) { sdev->offline_already = true; sdev_printk(KERN_ERR, sdev, "rejecting I/O to offline device\n"); } return BLK_STS_IOERR; case SDEV_DEL: /* * If the device is fully deleted, we refuse to * process any commands as well. */ sdev_printk(KERN_ERR, sdev, "rejecting I/O to dead device\n"); return BLK_STS_IOERR; case SDEV_BLOCK: case SDEV_CREATED_BLOCK: return BLK_STS_RESOURCE; case SDEV_QUIESCE: /* * If the device is blocked we only accept power management * commands. */ if (req && WARN_ON_ONCE(!(req->rq_flags & RQF_PM))) return BLK_STS_RESOURCE; return BLK_STS_OK; default: /* * For any other not fully online state we only allow * power management commands. */ if (req && !(req->rq_flags & RQF_PM)) return BLK_STS_OFFLINE; return BLK_STS_OK; } } /* * scsi_dev_queue_ready: if we can send requests to sdev, assign one token * and return the token else return -1. */ static inline int scsi_dev_queue_ready(struct request_queue *q, struct scsi_device *sdev) { int token; token = sbitmap_get(&sdev->budget_map); if (token < 0) return -1; if (!atomic_read(&sdev->device_blocked)) return token; /* * Only unblock if no other commands are pending and * if device_blocked has decreased to zero */ if (scsi_device_busy(sdev) > 1 || atomic_dec_return(&sdev->device_blocked) > 0) { sbitmap_put(&sdev->budget_map, token); return -1; } SCSI_LOG_MLQUEUE(3, sdev_printk(KERN_INFO, sdev, "unblocking device at zero depth\n")); return token; } /* * scsi_target_queue_ready: checks if there we can send commands to target * @sdev: scsi device on starget to check. */ static inline int scsi_target_queue_ready(struct Scsi_Host *shost, struct scsi_device *sdev) { struct scsi_target *starget = scsi_target(sdev); unsigned int busy; if (starget->single_lun) { spin_lock_irq(shost->host_lock); if (starget->starget_sdev_user && starget->starget_sdev_user != sdev) { spin_unlock_irq(shost->host_lock); return 0; } starget->starget_sdev_user = sdev; spin_unlock_irq(shost->host_lock); } if (starget->can_queue <= 0) return 1; busy = atomic_inc_return(&starget->target_busy) - 1; if (atomic_read(&starget->target_blocked) > 0) { if (busy) goto starved; /* * unblock after target_blocked iterates to zero */ if (atomic_dec_return(&starget->target_blocked) > 0) goto out_dec; SCSI_LOG_MLQUEUE(3, starget_printk(KERN_INFO, starget, "unblocking target at zero depth\n")); } if (busy >= starget->can_queue) goto starved; return 1; starved: spin_lock_irq(shost->host_lock); list_move_tail(&sdev->starved_entry, &shost->starved_list); spin_unlock_irq(shost->host_lock); out_dec: if (starget->can_queue > 0) atomic_dec(&starget->target_busy); return 0; } /* * scsi_host_queue_ready: if we can send requests to shost, return 1 else * return 0. We must end up running the queue again whenever 0 is * returned, else IO can hang. */ static inline int scsi_host_queue_ready(struct request_queue *q, struct Scsi_Host *shost, struct scsi_device *sdev, struct scsi_cmnd *cmd) { if (atomic_read(&shost->host_blocked) > 0) { if (scsi_host_busy(shost) > 0) goto starved; /* * unblock after host_blocked iterates to zero */ if (atomic_dec_return(&shost->host_blocked) > 0) goto out_dec; SCSI_LOG_MLQUEUE(3, shost_printk(KERN_INFO, shost, "unblocking host at zero depth\n")); } if (shost->host_self_blocked) goto starved; /* We're OK to process the command, so we can't be starved */ if (!list_empty(&sdev->starved_entry)) { spin_lock_irq(shost->host_lock); if (!list_empty(&sdev->starved_entry)) list_del_init(&sdev->starved_entry); spin_unlock_irq(shost->host_lock); } __set_bit(SCMD_STATE_INFLIGHT, &cmd->state); return 1; starved: spin_lock_irq(shost->host_lock); if (list_empty(&sdev->starved_entry)) list_add_tail(&sdev->starved_entry, &shost->starved_list); spin_unlock_irq(shost->host_lock); out_dec: scsi_dec_host_busy(shost, cmd); return 0; } /* * Busy state exporting function for request stacking drivers. * * For efficiency, no lock is taken to check the busy state of * shost/starget/sdev, since the returned value is not guaranteed and * may be changed after request stacking drivers call the function, * regardless of taking lock or not. * * When scsi can't dispatch I/Os anymore and needs to kill I/Os scsi * needs to return 'not busy'. Otherwise, request stacking drivers * may hold requests forever. */ static bool scsi_mq_lld_busy(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost; if (blk_queue_dying(q)) return false; shost = sdev->host; /* * Ignore host/starget busy state. * Since block layer does not have a concept of fairness across * multiple queues, congestion of host/starget needs to be handled * in SCSI layer. */ if (scsi_host_in_recovery(shost) || scsi_device_is_busy(sdev)) return true; return false; } /* * Block layer request completion callback. May be called from interrupt * context. */ static void scsi_complete(struct request *rq) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); enum scsi_disposition disposition; INIT_LIST_HEAD(&cmd->eh_entry); atomic_inc(&cmd->device->iodone_cnt); if (cmd->result) atomic_inc(&cmd->device->ioerr_cnt); disposition = scsi_decide_disposition(cmd); if (disposition != SUCCESS && scsi_cmd_runtime_exceeced(cmd)) disposition = SUCCESS; scsi_log_completion(cmd, disposition); switch (disposition) { case SUCCESS: scsi_finish_command(cmd); break; case NEEDS_RETRY: scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY); break; case ADD_TO_MLQUEUE: scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); break; default: scsi_eh_scmd_add(cmd); break; } } /** * scsi_dispatch_cmd - Dispatch a command to the low-level driver. * @cmd: command block we are dispatching. * * Return: nonzero return request was rejected and device's queue needs to be * plugged. */ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd) { struct Scsi_Host *host = cmd->device->host; int rtn = 0; atomic_inc(&cmd->device->iorequest_cnt); /* check if the device is still usable */ if (unlikely(cmd->device->sdev_state == SDEV_DEL)) { /* in SDEV_DEL we error all commands. DID_NO_CONNECT * returns an immediate error upwards, and signals * that the device is no longer present */ cmd->result = DID_NO_CONNECT << 16; goto done; } /* Check to see if the scsi lld made this device blocked. */ if (unlikely(scsi_device_blocked(cmd->device))) { /* * in blocked state, the command is just put back on * the device queue. The suspend state has already * blocked the queue so future requests should not * occur until the device transitions out of the * suspend state. */ SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd, "queuecommand : device blocked\n")); atomic_dec(&cmd->device->iorequest_cnt); return SCSI_MLQUEUE_DEVICE_BUSY; } /* Store the LUN value in cmnd, if needed. */ if (cmd->device->lun_in_cdb) cmd->cmnd[1] = (cmd->cmnd[1] & 0x1f) | (cmd->device->lun << 5 & 0xe0); scsi_log_send(cmd); /* * Before we queue this command, check if the command * length exceeds what the host adapter can handle. */ if (cmd->cmd_len > cmd->device->host->max_cmd_len) { SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd, "queuecommand : command too long. " "cdb_size=%d host->max_cmd_len=%d\n", cmd->cmd_len, cmd->device->host->max_cmd_len)); cmd->result = (DID_ABORT << 16); goto done; } if (unlikely(host->shost_state == SHOST_DEL)) { cmd->result = (DID_NO_CONNECT << 16); goto done; } trace_scsi_dispatch_cmd_start(cmd); rtn = host->hostt->queuecommand(host, cmd); if (rtn) { atomic_dec(&cmd->device->iorequest_cnt); trace_scsi_dispatch_cmd_error(cmd, rtn); if (rtn != SCSI_MLQUEUE_DEVICE_BUSY && rtn != SCSI_MLQUEUE_TARGET_BUSY) rtn = SCSI_MLQUEUE_HOST_BUSY; SCSI_LOG_MLQUEUE(3, scmd_printk(KERN_INFO, cmd, "queuecommand : request rejected\n")); } return rtn; done: scsi_done(cmd); return 0; } /* Size in bytes of the sg-list stored in the scsi-mq command-private data. */ static unsigned int scsi_mq_inline_sgl_size(struct Scsi_Host *shost) { return min_t(unsigned int, shost->sg_tablesize, SCSI_INLINE_SG_CNT) * sizeof(struct scatterlist); } static blk_status_t scsi_prepare_cmd(struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); struct scsi_device *sdev = req->q->queuedata; struct Scsi_Host *shost = sdev->host; bool in_flight = test_bit(SCMD_STATE_INFLIGHT, &cmd->state); struct scatterlist *sg; scsi_init_command(sdev, cmd); cmd->eh_eflags = 0; cmd->prot_type = 0; cmd->prot_flags = 0; cmd->submitter = 0; memset(&cmd->sdb, 0, sizeof(cmd->sdb)); cmd->underflow = 0; cmd->transfersize = 0; cmd->host_scribble = NULL; cmd->result = 0; cmd->extra_len = 0; cmd->state = 0; if (in_flight) __set_bit(SCMD_STATE_INFLIGHT, &cmd->state); cmd->prot_op = SCSI_PROT_NORMAL; if (blk_rq_bytes(req)) cmd->sc_data_direction = rq_dma_dir(req); else cmd->sc_data_direction = DMA_NONE; sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; cmd->sdb.table.sgl = sg; if (scsi_host_get_prot(shost)) { memset(cmd->prot_sdb, 0, sizeof(struct scsi_data_buffer)); cmd->prot_sdb->table.sgl = (struct scatterlist *)(cmd->prot_sdb + 1); } /* * Special handling for passthrough commands, which don't go to the ULP * at all: */ if (blk_rq_is_passthrough(req)) return scsi_setup_scsi_cmnd(sdev, req); if (sdev->handler && sdev->handler->prep_fn) { blk_status_t ret = sdev->handler->prep_fn(sdev, req); if (ret != BLK_STS_OK) return ret; } /* Usually overridden by the ULP */ cmd->allowed = 0; memset(cmd->cmnd, 0, sizeof(cmd->cmnd)); return scsi_cmd_to_driver(cmd)->init_command(cmd); } static void scsi_done_internal(struct scsi_cmnd *cmd, bool complete_directly) { struct request *req = scsi_cmd_to_rq(cmd); switch (cmd->submitter) { case SUBMITTED_BY_BLOCK_LAYER: break; case SUBMITTED_BY_SCSI_ERROR_HANDLER: return scsi_eh_done(cmd); case SUBMITTED_BY_SCSI_RESET_IOCTL: return; } if (unlikely(blk_should_fake_timeout(scsi_cmd_to_rq(cmd)->q))) return; if (unlikely(test_and_set_bit(SCMD_STATE_COMPLETE, &cmd->state))) return; trace_scsi_dispatch_cmd_done(cmd); if (complete_directly) blk_mq_complete_request_direct(req, scsi_complete); else blk_mq_complete_request(req); } void scsi_done(struct scsi_cmnd *cmd) { scsi_done_internal(cmd, false); } EXPORT_SYMBOL(scsi_done); void scsi_done_direct(struct scsi_cmnd *cmd) { scsi_done_internal(cmd, true); } EXPORT_SYMBOL(scsi_done_direct); static void scsi_mq_put_budget(struct request_queue *q, int budget_token) { struct scsi_device *sdev = q->queuedata; sbitmap_put(&sdev->budget_map, budget_token); } /* * When to reinvoke queueing after a resource shortage. It's 3 msecs to * not change behaviour from the previous unplug mechanism, experimentation * may prove this needs changing. */ #define SCSI_QUEUE_DELAY 3 static int scsi_mq_get_budget(struct request_queue *q) { struct scsi_device *sdev = q->queuedata; int token = scsi_dev_queue_ready(q, sdev); if (token >= 0) return token; atomic_inc(&sdev->restarts); /* * Orders atomic_inc(&sdev->restarts) and atomic_read(&sdev->device_busy). * .restarts must be incremented before .device_busy is read because the * code in scsi_run_queue_async() depends on the order of these operations. */ smp_mb__after_atomic(); /* * If all in-flight requests originated from this LUN are completed * before reading .device_busy, sdev->device_busy will be observed as * zero, then blk_mq_delay_run_hw_queues() will dispatch this request * soon. Otherwise, completion of one of these requests will observe * the .restarts flag, and the request queue will be run for handling * this request, see scsi_end_request(). */ if (unlikely(scsi_device_busy(sdev) == 0 && !scsi_device_blocked(sdev))) blk_mq_delay_run_hw_queues(sdev->request_queue, SCSI_QUEUE_DELAY); return -1; } static void scsi_mq_set_rq_budget_token(struct request *req, int token) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); cmd->budget_token = token; } static int scsi_mq_get_rq_budget_token(struct request *req) { struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); return cmd->budget_token; } static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { struct request *req = bd->rq; struct request_queue *q = req->q; struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost = sdev->host; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); blk_status_t ret; int reason; WARN_ON_ONCE(cmd->budget_token < 0); /* * If the device is not in running state we will reject some or all * commands. */ if (unlikely(sdev->sdev_state != SDEV_RUNNING)) { ret = scsi_device_state_check(sdev, req); if (ret != BLK_STS_OK) goto out_put_budget; } ret = BLK_STS_RESOURCE; if (!scsi_target_queue_ready(shost, sdev)) goto out_put_budget; if (unlikely(scsi_host_in_recovery(shost))) { if (cmd->flags & SCMD_FAIL_IF_RECOVERING) ret = BLK_STS_OFFLINE; goto out_dec_target_busy; } if (!scsi_host_queue_ready(q, shost, sdev, cmd)) goto out_dec_target_busy; /* * Only clear the driver-private command data if the LLD does not supply * a function to initialize that data. */ if (shost->hostt->cmd_size && !shost->hostt->init_cmd_priv) memset(scsi_cmd_priv(cmd), 0, shost->hostt->cmd_size); if (!(req->rq_flags & RQF_DONTPREP)) { ret = scsi_prepare_cmd(req); if (ret != BLK_STS_OK) goto out_dec_host_busy; req->rq_flags |= RQF_DONTPREP; } else { clear_bit(SCMD_STATE_COMPLETE, &cmd->state); } cmd->flags &= SCMD_PRESERVED_FLAGS; if (sdev->simple_tags) cmd->flags |= SCMD_TAGGED; if (bd->last) cmd->flags |= SCMD_LAST; scsi_set_resid(cmd, 0); memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); cmd->submitter = SUBMITTED_BY_BLOCK_LAYER; blk_mq_start_request(req); reason = scsi_dispatch_cmd(cmd); if (reason) { scsi_set_blocked(cmd, reason); ret = BLK_STS_RESOURCE; goto out_dec_host_busy; } return BLK_STS_OK; out_dec_host_busy: scsi_dec_host_busy(shost, cmd); out_dec_target_busy: if (scsi_target(sdev)->can_queue > 0) atomic_dec(&scsi_target(sdev)->target_busy); out_put_budget: scsi_mq_put_budget(q, cmd->budget_token); cmd->budget_token = -1; switch (ret) { case BLK_STS_OK: break; case BLK_STS_RESOURCE: if (scsi_device_blocked(sdev)) ret = BLK_STS_DEV_RESOURCE; break; case BLK_STS_AGAIN: cmd->result = DID_BUS_BUSY << 16; if (req->rq_flags & RQF_DONTPREP) scsi_mq_uninit_cmd(cmd); break; default: if (unlikely(!scsi_device_online(sdev))) cmd->result = DID_NO_CONNECT << 16; else cmd->result = DID_ERROR << 16; /* * Make sure to release all allocated resources when * we hit an error, as we will never see this command * again. */ if (req->rq_flags & RQF_DONTPREP) scsi_mq_uninit_cmd(cmd); scsi_run_queue_async(sdev); break; } return ret; } static int scsi_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx, unsigned int numa_node) { struct Scsi_Host *shost = set->driver_data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); struct scatterlist *sg; int ret = 0; cmd->sense_buffer = kmem_cache_alloc_node(scsi_sense_cache, GFP_KERNEL, numa_node); if (!cmd->sense_buffer) return -ENOMEM; if (scsi_host_get_prot(shost)) { sg = (void *)cmd + sizeof(struct scsi_cmnd) + shost->hostt->cmd_size; cmd->prot_sdb = (void *)sg + scsi_mq_inline_sgl_size(shost); } if (shost->hostt->init_cmd_priv) { ret = shost->hostt->init_cmd_priv(shost, cmd); if (ret < 0) kmem_cache_free(scsi_sense_cache, cmd->sense_buffer); } return ret; } static void scsi_mq_exit_request(struct blk_mq_tag_set *set, struct request *rq, unsigned int hctx_idx) { struct Scsi_Host *shost = set->driver_data; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(rq); if (shost->hostt->exit_cmd_priv) shost->hostt->exit_cmd_priv(shost, cmd); kmem_cache_free(scsi_sense_cache, cmd->sense_buffer); } static int scsi_mq_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct Scsi_Host *shost = hctx->driver_data; if (shost->hostt->mq_poll) return shost->hostt->mq_poll(shost, hctx->queue_num); return 0; } static int scsi_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { struct Scsi_Host *shost = data; hctx->driver_data = shost; return 0; } static void scsi_map_queues(struct blk_mq_tag_set *set) { struct Scsi_Host *shost = container_of(set, struct Scsi_Host, tag_set); if (shost->hostt->map_queues) return shost->hostt->map_queues(shost); blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); } void scsi_init_limits(struct Scsi_Host *shost, struct queue_limits *lim) { struct device *dev = shost->dma_dev; memset(lim, 0, sizeof(*lim)); lim->max_segments = min_t(unsigned short, shost->sg_tablesize, SG_MAX_SEGMENTS); if (scsi_host_prot_dma(shost)) { shost->sg_prot_tablesize = min_not_zero(shost->sg_prot_tablesize, (unsigned short)SCSI_MAX_PROT_SG_SEGMENTS); BUG_ON(shost->sg_prot_tablesize < shost->sg_tablesize); lim->max_integrity_segments = shost->sg_prot_tablesize; } lim->max_hw_sectors = shost->max_sectors; lim->seg_boundary_mask = shost->dma_boundary; lim->max_segment_size = shost->max_segment_size; lim->virt_boundary_mask = shost->virt_boundary_mask; lim->dma_alignment = max_t(unsigned int, shost->dma_alignment, dma_get_cache_alignment() - 1); /* * Propagate the DMA formation properties to the dma-mapping layer as * a courtesy service to the LLDDs. This needs to check that the buses * actually support the DMA API first, though. */ if (dev->dma_parms) { dma_set_seg_boundary(dev, shost->dma_boundary); dma_set_max_seg_size(dev, shost->max_segment_size); } } EXPORT_SYMBOL_GPL(scsi_init_limits); static const struct blk_mq_ops scsi_mq_ops_no_commit = { .get_budget = scsi_mq_get_budget, .put_budget = scsi_mq_put_budget, .queue_rq = scsi_queue_rq, .complete = scsi_complete, .timeout = scsi_timeout, #ifdef CONFIG_BLK_DEBUG_FS .show_rq = scsi_show_rq, #endif .init_request = scsi_mq_init_request, .exit_request = scsi_mq_exit_request, .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, .init_hctx = scsi_init_hctx, .poll = scsi_mq_poll, .set_rq_budget_token = scsi_mq_set_rq_budget_token, .get_rq_budget_token = scsi_mq_get_rq_budget_token, }; static void scsi_commit_rqs(struct blk_mq_hw_ctx *hctx) { struct Scsi_Host *shost = hctx->driver_data; shost->hostt->commit_rqs(shost, hctx->queue_num); } static const struct blk_mq_ops scsi_mq_ops = { .get_budget = scsi_mq_get_budget, .put_budget = scsi_mq_put_budget, .queue_rq = scsi_queue_rq, .commit_rqs = scsi_commit_rqs, .complete = scsi_complete, .timeout = scsi_timeout, #ifdef CONFIG_BLK_DEBUG_FS .show_rq = scsi_show_rq, #endif .init_request = scsi_mq_init_request, .exit_request = scsi_mq_exit_request, .cleanup_rq = scsi_cleanup_rq, .busy = scsi_mq_lld_busy, .map_queues = scsi_map_queues, .init_hctx = scsi_init_hctx, .poll = scsi_mq_poll, .set_rq_budget_token = scsi_mq_set_rq_budget_token, .get_rq_budget_token = scsi_mq_get_rq_budget_token, }; int scsi_mq_setup_tags(struct Scsi_Host *shost) { unsigned int cmd_size, sgl_size; struct blk_mq_tag_set *tag_set = &shost->tag_set; sgl_size = max_t(unsigned int, sizeof(struct scatterlist), scsi_mq_inline_sgl_size(shost)); cmd_size = sizeof(struct scsi_cmnd) + shost->hostt->cmd_size + sgl_size; if (scsi_host_get_prot(shost)) cmd_size += sizeof(struct scsi_data_buffer) + sizeof(struct scatterlist) * SCSI_INLINE_PROT_SG_CNT; memset(tag_set, 0, sizeof(*tag_set)); if (shost->hostt->commit_rqs) tag_set->ops = &scsi_mq_ops; else tag_set->ops = &scsi_mq_ops_no_commit; tag_set->nr_hw_queues = shost->nr_hw_queues ? : 1; tag_set->nr_maps = shost->nr_maps ? : 1; tag_set->queue_depth = shost->can_queue; tag_set->cmd_size = cmd_size; tag_set->numa_node = dev_to_node(shost->dma_dev); if (shost->hostt->tag_alloc_policy_rr) tag_set->flags |= BLK_MQ_F_TAG_RR; if (shost->queuecommand_may_block) tag_set->flags |= BLK_MQ_F_BLOCKING; tag_set->driver_data = shost; if (shost->host_tagset) tag_set->flags |= BLK_MQ_F_TAG_HCTX_SHARED; return blk_mq_alloc_tag_set(tag_set); } void scsi_mq_free_tags(struct kref *kref) { struct Scsi_Host *shost = container_of(kref, typeof(*shost), tagset_refcnt); blk_mq_free_tag_set(&shost->tag_set); complete(&shost->tagset_freed); } /** * scsi_device_from_queue - return sdev associated with a request_queue * @q: The request queue to return the sdev from * * Return the sdev associated with a request queue or NULL if the * request_queue does not reference a SCSI device. */ struct scsi_device *scsi_device_from_queue(struct request_queue *q) { struct scsi_device *sdev = NULL; if (q->mq_ops == &scsi_mq_ops_no_commit || q->mq_ops == &scsi_mq_ops) sdev = q->queuedata; if (!sdev || !get_device(&sdev->sdev_gendev)) sdev = NULL; return sdev; } /* * pktcdvd should have been integrated into the SCSI layers, but for historical * reasons like the old IDE driver it isn't. This export allows it to safely * probe if a given device is a SCSI one and only attach to that. */ #ifdef CONFIG_CDROM_PKTCDVD_MODULE EXPORT_SYMBOL_GPL(scsi_device_from_queue); #endif /** * scsi_block_requests - Utility function used by low-level drivers to prevent * further commands from being queued to the device. * @shost: host in question * * There is no timer nor any other means by which the requests get unblocked * other than the low-level driver calling scsi_unblock_requests(). */ void scsi_block_requests(struct Scsi_Host *shost) { shost->host_self_blocked = 1; } EXPORT_SYMBOL(scsi_block_requests); /** * scsi_unblock_requests - Utility function used by low-level drivers to allow * further commands to be queued to the device. * @shost: host in question * * There is no timer nor any other means by which the requests get unblocked * other than the low-level driver calling scsi_unblock_requests(). This is done * as an API function so that changes to the internals of the scsi mid-layer * won't require wholesale changes to drivers that use this feature. */ void scsi_unblock_requests(struct Scsi_Host *shost) { shost->host_self_blocked = 0; scsi_run_host_queues(shost); } EXPORT_SYMBOL(scsi_unblock_requests); void scsi_exit_queue(void) { kmem_cache_destroy(scsi_sense_cache); } /** * scsi_mode_select - issue a mode select * @sdev: SCSI device to be queried * @pf: Page format bit (1 == standard, 0 == vendor specific) * @sp: Save page bit (0 == don't save, 1 == save) * @buffer: request buffer (may not be smaller than eight bytes) * @len: length of request buffer. * @timeout: command timeout * @retries: number of retries before failing * @data: returns a structure abstracting the mode header data * @sshdr: place to put sense data (or NULL if no sense to be collected). * must be SCSI_SENSE_BUFFERSIZE big. * * Returns zero if successful; negative error number or scsi * status on error * */ int scsi_mode_select(struct scsi_device *sdev, int pf, int sp, unsigned char *buffer, int len, int timeout, int retries, struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { unsigned char cmd[10]; unsigned char *real_buffer; const struct scsi_exec_args exec_args = { .sshdr = sshdr, }; int ret; memset(cmd, 0, sizeof(cmd)); cmd[1] = (pf ? 0x10 : 0) | (sp ? 0x01 : 0); /* * Use MODE SELECT(10) if the device asked for it or if the mode page * and the mode select header cannot fit within the maximumm 255 bytes * of the MODE SELECT(6) command. */ if (sdev->use_10_for_ms || len + 4 > 255 || data->block_descriptor_length > 255) { if (len > 65535 - 8) return -EINVAL; real_buffer = kmalloc(8 + len, GFP_KERNEL); if (!real_buffer) return -ENOMEM; memcpy(real_buffer + 8, buffer, len); len += 8; real_buffer[0] = 0; real_buffer[1] = 0; real_buffer[2] = data->medium_type; real_buffer[3] = data->device_specific; real_buffer[4] = data->longlba ? 0x01 : 0; real_buffer[5] = 0; put_unaligned_be16(data->block_descriptor_length, &real_buffer[6]); cmd[0] = MODE_SELECT_10; put_unaligned_be16(len, &cmd[7]); } else { if (data->longlba) return -EINVAL; real_buffer = kmalloc(4 + len, GFP_KERNEL); if (!real_buffer) return -ENOMEM; memcpy(real_buffer + 4, buffer, len); len += 4; real_buffer[0] = 0; real_buffer[1] = data->medium_type; real_buffer[2] = data->device_specific; real_buffer[3] = data->block_descriptor_length; cmd[0] = MODE_SELECT; cmd[4] = len; } ret = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_OUT, real_buffer, len, timeout, retries, &exec_args); kfree(real_buffer); return ret; } EXPORT_SYMBOL_GPL(scsi_mode_select); /** * scsi_mode_sense - issue a mode sense, falling back from 10 to six bytes if necessary. * @sdev: SCSI device to be queried * @dbd: set to prevent mode sense from returning block descriptors * @modepage: mode page being requested * @subpage: sub-page of the mode page being requested * @buffer: request buffer (may not be smaller than eight bytes) * @len: length of request buffer. * @timeout: command timeout * @retries: number of retries before failing * @data: returns a structure abstracting the mode header data * @sshdr: place to put sense data (or NULL if no sense to be collected). * must be SCSI_SENSE_BUFFERSIZE big. * * Returns zero if successful, or a negative error number on failure */ int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, int subpage, unsigned char *buffer, int len, int timeout, int retries, struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { unsigned char cmd[12]; int use_10_for_ms; int header_length; int result; struct scsi_sense_hdr my_sshdr; struct scsi_failure failure_defs[] = { { .sense = UNIT_ATTENTION, .asc = SCMD_FAILURE_ASC_ANY, .ascq = SCMD_FAILURE_ASCQ_ANY, .allowed = retries, .result = SAM_STAT_CHECK_CONDITION, }, {} }; struct scsi_failures failures = { .failure_definitions = failure_defs, }; const struct scsi_exec_args exec_args = { /* caller might not be interested in sense, but we need it */ .sshdr = sshdr ? : &my_sshdr, .failures = &failures, }; memset(data, 0, sizeof(*data)); memset(&cmd[0], 0, 12); dbd = sdev->set_dbd_for_ms ? 8 : dbd; cmd[1] = dbd & 0x18; /* allows DBD and LLBA bits */ cmd[2] = modepage; cmd[3] = subpage; sshdr = exec_args.sshdr; retry: use_10_for_ms = sdev->use_10_for_ms || len > 255; if (use_10_for_ms) { if (len < 8 || len > 65535) return -EINVAL; cmd[0] = MODE_SENSE_10; put_unaligned_be16(len, &cmd[7]); header_length = 8; } else { if (len < 4) return -EINVAL; cmd[0] = MODE_SENSE; cmd[4] = len; header_length = 4; } memset(buffer, 0, len); result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, buffer, len, timeout, retries, &exec_args); if (result < 0) return result; /* This code looks awful: what it's doing is making sure an * ILLEGAL REQUEST sense return identifies the actual command * byte as the problem. MODE_SENSE commands can return * ILLEGAL REQUEST if the code page isn't supported */ if (!scsi_status_is_good(result)) { if (scsi_sense_valid(sshdr)) { if ((sshdr->sense_key == ILLEGAL_REQUEST) && (sshdr->asc == 0x20) && (sshdr->ascq == 0)) { /* * Invalid command operation code: retry using * MODE SENSE(6) if this was a MODE SENSE(10) * request, except if the request mode page is * too large for MODE SENSE single byte * allocation length field. */ if (use_10_for_ms) { if (len > 255) return -EIO; sdev->use_10_for_ms = 0; goto retry; } } } return -EIO; } if (unlikely(buffer[0] == 0x86 && buffer[1] == 0x0b && (modepage == 6 || modepage == 8))) { /* Initio breakage? */ header_length = 0; data->length = 13; data->medium_type = 0; data->device_specific = 0; data->longlba = 0; data->block_descriptor_length = 0; } else if (use_10_for_ms) { data->length = get_unaligned_be16(&buffer[0]) + 2; data->medium_type = buffer[2]; data->device_specific = buffer[3]; data->longlba = buffer[4] & 0x01; data->block_descriptor_length = get_unaligned_be16(&buffer[6]); } else { data->length = buffer[0] + 1; data->medium_type = buffer[1]; data->device_specific = buffer[2]; data->block_descriptor_length = buffer[3]; } data->header_length = header_length; return 0; } EXPORT_SYMBOL(scsi_mode_sense); /** * scsi_test_unit_ready - test if unit is ready * @sdev: scsi device to change the state of. * @timeout: command timeout * @retries: number of retries before failing * @sshdr: outpout pointer for decoded sense information. * * Returns zero if unsuccessful or an error if TUR failed. For * removable media, UNIT_ATTENTION sets ->changed flag. **/ int scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, struct scsi_sense_hdr *sshdr) { char cmd[] = { TEST_UNIT_READY, 0, 0, 0, 0, 0, }; const struct scsi_exec_args exec_args = { .sshdr = sshdr, }; int result; /* try to eat the UNIT_ATTENTION if there are enough retries */ do { result = scsi_execute_cmd(sdev, cmd, REQ_OP_DRV_IN, NULL, 0, timeout, 1, &exec_args); if (sdev->removable && result > 0 && scsi_sense_valid(sshdr) && sshdr->sense_key == UNIT_ATTENTION) sdev->changed = 1; } while (result > 0 && scsi_sense_valid(sshdr) && sshdr->sense_key == UNIT_ATTENTION && --retries); return result; } EXPORT_SYMBOL(scsi_test_unit_ready); /** * scsi_device_set_state - Take the given device through the device state model. * @sdev: scsi device to change the state of. * @state: state to change to. * * Returns zero if successful or an error if the requested * transition is illegal. */ int scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state) { enum scsi_device_state oldstate = sdev->sdev_state; if (state == oldstate) return 0; switch (state) { case SDEV_CREATED: switch (oldstate) { case SDEV_CREATED_BLOCK: break; default: goto illegal; } break; case SDEV_RUNNING: switch (oldstate) { case SDEV_CREATED: case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: case SDEV_QUIESCE: case SDEV_BLOCK: break; default: goto illegal; } break; case SDEV_QUIESCE: switch (oldstate) { case SDEV_RUNNING: case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: break; default: goto illegal; } break; case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: switch (oldstate) { case SDEV_CREATED: case SDEV_RUNNING: case SDEV_QUIESCE: case SDEV_BLOCK: break; default: goto illegal; } break; case SDEV_BLOCK: switch (oldstate) { case SDEV_RUNNING: case SDEV_CREATED_BLOCK: case SDEV_QUIESCE: case SDEV_OFFLINE: break; default: goto illegal; } break; case SDEV_CREATED_BLOCK: switch (oldstate) { case SDEV_CREATED: break; default: goto illegal; } break; case SDEV_CANCEL: switch (oldstate) { case SDEV_CREATED: case SDEV_RUNNING: case SDEV_QUIESCE: case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: break; default: goto illegal; } break; case SDEV_DEL: switch (oldstate) { case SDEV_CREATED: case SDEV_RUNNING: case SDEV_OFFLINE: case SDEV_TRANSPORT_OFFLINE: case SDEV_CANCEL: case SDEV_BLOCK: case SDEV_CREATED_BLOCK: break; default: goto illegal; } break; } sdev->offline_already = false; sdev->sdev_state = state; return 0; illegal: SCSI_LOG_ERROR_RECOVERY(1, sdev_printk(KERN_ERR, sdev, "Illegal state transition %s->%s", scsi_device_state_name(oldstate), scsi_device_state_name(state)) ); return -EINVAL; } EXPORT_SYMBOL(scsi_device_set_state); /** * scsi_evt_emit - emit a single SCSI device uevent * @sdev: associated SCSI device * @evt: event to emit * * Send a single uevent (scsi_event) to the associated scsi_device. */ static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt) { int idx = 0; char *envp[3]; switch (evt->evt_type) { case SDEV_EVT_MEDIA_CHANGE: envp[idx++] = "SDEV_MEDIA_CHANGE=1"; break; case SDEV_EVT_INQUIRY_CHANGE_REPORTED: scsi_rescan_device(sdev); envp[idx++] = "SDEV_UA=INQUIRY_DATA_HAS_CHANGED"; break; case SDEV_EVT_CAPACITY_CHANGE_REPORTED: envp[idx++] = "SDEV_UA=CAPACITY_DATA_HAS_CHANGED"; break; case SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED: envp[idx++] = "SDEV_UA=THIN_PROVISIONING_SOFT_THRESHOLD_REACHED"; break; case SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED: envp[idx++] = "SDEV_UA=MODE_PARAMETERS_CHANGED"; break; case SDEV_EVT_LUN_CHANGE_REPORTED: envp[idx++] = "SDEV_UA=REPORTED_LUNS_DATA_HAS_CHANGED"; break; case SDEV_EVT_ALUA_STATE_CHANGE_REPORTED: envp[idx++] = "SDEV_UA=ASYMMETRIC_ACCESS_STATE_CHANGED"; break; case SDEV_EVT_POWER_ON_RESET_OCCURRED: envp[idx++] = "SDEV_UA=POWER_ON_RESET_OCCURRED"; break; default: /* do nothing */ break; } envp[idx++] = NULL; kobject_uevent_env(&sdev->sdev_gendev.kobj, KOBJ_CHANGE, envp); } /** * scsi_evt_thread - send a uevent for each scsi event * @work: work struct for scsi_device * * Dispatch queued events to their associated scsi_device kobjects * as uevents. */ void scsi_evt_thread(struct work_struct *work) { struct scsi_device *sdev; enum scsi_device_event evt_type; LIST_HEAD(event_list); sdev = container_of(work, struct scsi_device, event_work); for (evt_type = SDEV_EVT_FIRST; evt_type <= SDEV_EVT_LAST; evt_type++) if (test_and_clear_bit(evt_type, sdev->pending_events)) sdev_evt_send_simple(sdev, evt_type, GFP_KERNEL); while (1) { struct scsi_event *evt; struct list_head *this, *tmp; unsigned long flags; spin_lock_irqsave(&sdev->list_lock, flags); list_splice_init(&sdev->event_list, &event_list); spin_unlock_irqrestore(&sdev->list_lock, flags); if (list_empty(&event_list)) break; list_for_each_safe(this, tmp, &event_list) { evt = list_entry(this, struct scsi_event, node); list_del(&evt->node); scsi_evt_emit(sdev, evt); kfree(evt); } } } /** * sdev_evt_send - send asserted event to uevent thread * @sdev: scsi_device event occurred on * @evt: event to send * * Assert scsi device event asynchronously. */ void sdev_evt_send(struct scsi_device *sdev, struct scsi_event *evt) { unsigned long flags; #if 0 /* FIXME: currently this check eliminates all media change events * for polled devices. Need to update to discriminate between AN * and polled events */ if (!test_bit(evt->evt_type, sdev->supported_events)) { kfree(evt); return; } #endif spin_lock_irqsave(&sdev->list_lock, flags); list_add_tail(&evt->node, &sdev->event_list); schedule_work(&sdev->event_work); spin_unlock_irqrestore(&sdev->list_lock, flags); } EXPORT_SYMBOL_GPL(sdev_evt_send); /** * sdev_evt_alloc - allocate a new scsi event * @evt_type: type of event to allocate * @gfpflags: GFP flags for allocation * * Allocates and returns a new scsi_event. */ struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type, gfp_t gfpflags) { struct scsi_event *evt = kzalloc(sizeof(struct scsi_event), gfpflags); if (!evt) return NULL; evt->evt_type = evt_type; INIT_LIST_HEAD(&evt->node); /* evt_type-specific initialization, if any */ switch (evt_type) { case SDEV_EVT_MEDIA_CHANGE: case SDEV_EVT_INQUIRY_CHANGE_REPORTED: case SDEV_EVT_CAPACITY_CHANGE_REPORTED: case SDEV_EVT_SOFT_THRESHOLD_REACHED_REPORTED: case SDEV_EVT_MODE_PARAMETER_CHANGE_REPORTED: case SDEV_EVT_LUN_CHANGE_REPORTED: case SDEV_EVT_ALUA_STATE_CHANGE_REPORTED: case SDEV_EVT_POWER_ON_RESET_OCCURRED: default: /* do nothing */ break; } return evt; } EXPORT_SYMBOL_GPL(sdev_evt_alloc); /** * sdev_evt_send_simple - send asserted event to uevent thread * @sdev: scsi_device event occurred on * @evt_type: type of event to send * @gfpflags: GFP flags for allocation * * Assert scsi device event asynchronously, given an event type. */ void sdev_evt_send_simple(struct scsi_device *sdev, enum scsi_device_event evt_type, gfp_t gfpflags) { struct scsi_event *evt = sdev_evt_alloc(evt_type, gfpflags); if (!evt) { sdev_printk(KERN_ERR, sdev, "event %d eaten due to OOM\n", evt_type); return; } sdev_evt_send(sdev, evt); } EXPORT_SYMBOL_GPL(sdev_evt_send_simple); /** * scsi_device_quiesce - Block all commands except power management. * @sdev: scsi device to quiesce. * * This works by trying to transition to the SDEV_QUIESCE state * (which must be a legal transition). When the device is in this * state, only power management requests will be accepted, all others will * be deferred. * * Must be called with user context, may sleep. * * Returns zero if unsuccessful or an error if not. */ int scsi_device_quiesce(struct scsi_device *sdev) { struct request_queue *q = sdev->request_queue; unsigned int memflags; int err; /* * It is allowed to call scsi_device_quiesce() multiple times from * the same context but concurrent scsi_device_quiesce() calls are * not allowed. */ WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current); if (sdev->quiesced_by == current) return 0; blk_set_pm_only(q); memflags = blk_mq_freeze_queue(q); /* * Ensure that the effect of blk_set_pm_only() will be visible * for percpu_ref_tryget() callers that occur after the queue * unfreeze even if the queue was already frozen before this function * was called. See also https://lwn.net/Articles/573497/. */ synchronize_rcu(); blk_mq_unfreeze_queue(q, memflags); mutex_lock(&sdev->state_mutex); err = scsi_device_set_state(sdev, SDEV_QUIESCE); if (err == 0) sdev->quiesced_by = current; else blk_clear_pm_only(q); mutex_unlock(&sdev->state_mutex); return err; } EXPORT_SYMBOL(scsi_device_quiesce); /** * scsi_device_resume - Restart user issued commands to a quiesced device. * @sdev: scsi device to resume. * * Moves the device from quiesced back to running and restarts the * queues. * * Must be called with user context, may sleep. */ void scsi_device_resume(struct scsi_device *sdev) { /* check if the device state was mutated prior to resume, and if * so assume the state is being managed elsewhere (for example * device deleted during suspend) */ mutex_lock(&sdev->state_mutex); if (sdev->sdev_state == SDEV_QUIESCE) scsi_device_set_state(sdev, SDEV_RUNNING); if (sdev->quiesced_by) { sdev->quiesced_by = NULL; blk_clear_pm_only(sdev->request_queue); } mutex_unlock(&sdev->state_mutex); } EXPORT_SYMBOL(scsi_device_resume); static void device_quiesce_fn(struct scsi_device *sdev, void *data) { scsi_device_quiesce(sdev); } void scsi_target_quiesce(struct scsi_target *starget) { starget_for_each_device(starget, NULL, device_quiesce_fn); } EXPORT_SYMBOL(scsi_target_quiesce); static void device_resume_fn(struct scsi_device *sdev, void *data) { scsi_device_resume(sdev); } void scsi_target_resume(struct scsi_target *starget) { starget_for_each_device(starget, NULL, device_resume_fn); } EXPORT_SYMBOL(scsi_target_resume); static int __scsi_internal_device_block_nowait(struct scsi_device *sdev) { if (scsi_device_set_state(sdev, SDEV_BLOCK)) return scsi_device_set_state(sdev, SDEV_CREATED_BLOCK); return 0; } void scsi_start_queue(struct scsi_device *sdev) { if (cmpxchg(&sdev->queue_stopped, 1, 0)) blk_mq_unquiesce_queue(sdev->request_queue); } static void scsi_stop_queue(struct scsi_device *sdev) { /* * The atomic variable of ->queue_stopped covers that * blk_mq_quiesce_queue* is balanced with blk_mq_unquiesce_queue. * * The caller needs to wait until quiesce is done. */ if (!cmpxchg(&sdev->queue_stopped, 0, 1)) blk_mq_quiesce_queue_nowait(sdev->request_queue); } /** * scsi_internal_device_block_nowait - try to transition to the SDEV_BLOCK state * @sdev: device to block * * Pause SCSI command processing on the specified device. Does not sleep. * * Returns zero if successful or a negative error code upon failure. * * Notes: * This routine transitions the device to the SDEV_BLOCK state (which must be * a legal transition). When the device is in this state, command processing * is paused until the device leaves the SDEV_BLOCK state. See also * scsi_internal_device_unblock_nowait(). */ int scsi_internal_device_block_nowait(struct scsi_device *sdev) { int ret = __scsi_internal_device_block_nowait(sdev); /* * The device has transitioned to SDEV_BLOCK. Stop the * block layer from calling the midlayer with this device's * request queue. */ if (!ret) scsi_stop_queue(sdev); return ret; } EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait); /** * scsi_device_block - try to transition to the SDEV_BLOCK state * @sdev: device to block * @data: dummy argument, ignored * * Pause SCSI command processing on the specified device. Callers must wait * until all ongoing scsi_queue_rq() calls have finished after this function * returns. * * Note: * This routine transitions the device to the SDEV_BLOCK state (which must be * a legal transition). When the device is in this state, command processing * is paused until the device leaves the SDEV_BLOCK state. See also * scsi_internal_device_unblock(). */ static void scsi_device_block(struct scsi_device *sdev, void *data) { int err; enum scsi_device_state state; mutex_lock(&sdev->state_mutex); err = __scsi_internal_device_block_nowait(sdev); state = sdev->sdev_state; if (err == 0) /* * scsi_stop_queue() must be called with the state_mutex * held. Otherwise a simultaneous scsi_start_queue() call * might unquiesce the queue before we quiesce it. */ scsi_stop_queue(sdev); mutex_unlock(&sdev->state_mutex); WARN_ONCE(err, "%s: failed to block %s in state %d\n", __func__, dev_name(&sdev->sdev_gendev), state); } /** * scsi_internal_device_unblock_nowait - resume a device after a block request * @sdev: device to resume * @new_state: state to set the device to after unblocking * * Restart the device queue for a previously suspended SCSI device. Does not * sleep. * * Returns zero if successful or a negative error code upon failure. * * Notes: * This routine transitions the device to the SDEV_RUNNING state or to one of * the offline states (which must be a legal transition) allowing the midlayer * to goose the queue for this device. */ int scsi_internal_device_unblock_nowait(struct scsi_device *sdev, enum scsi_device_state new_state) { switch (new_state) { case SDEV_RUNNING: case SDEV_TRANSPORT_OFFLINE: break; default: return -EINVAL; } /* * Try to transition the scsi device to SDEV_RUNNING or one of the * offlined states and goose the device queue if successful. */ switch (sdev->sdev_state) { case SDEV_BLOCK: case SDEV_TRANSPORT_OFFLINE: sdev->sdev_state = new_state; break; case SDEV_CREATED_BLOCK: if (new_state == SDEV_TRANSPORT_OFFLINE || new_state == SDEV_OFFLINE) sdev->sdev_state = new_state; else sdev->sdev_state = SDEV_CREATED; break; case SDEV_CANCEL: case SDEV_OFFLINE: break; default: return -EINVAL; } scsi_start_queue(sdev); return 0; } EXPORT_SYMBOL_GPL(scsi_internal_device_unblock_nowait); /** * scsi_internal_device_unblock - resume a device after a block request * @sdev: device to resume * @new_state: state to set the device to after unblocking * * Restart the device queue for a previously suspended SCSI device. May sleep. * * Returns zero if successful or a negative error code upon failure. * * Notes: * This routine transitions the device to the SDEV_RUNNING state or to one of * the offline states (which must be a legal transition) allowing the midlayer * to goose the queue for this device. */ static int scsi_internal_device_unblock(struct scsi_device *sdev, enum scsi_device_state new_state) { int ret; mutex_lock(&sdev->state_mutex); ret = scsi_internal_device_unblock_nowait(sdev, new_state); mutex_unlock(&sdev->state_mutex); return ret; } static int target_block(struct device *dev, void *data) { if (scsi_is_target_device(dev)) starget_for_each_device(to_scsi_target(dev), NULL, scsi_device_block); return 0; } /** * scsi_block_targets - transition all SCSI child devices to SDEV_BLOCK state * @dev: a parent device of one or more scsi_target devices * @shost: the Scsi_Host to which this device belongs * * Iterate over all children of @dev, which should be scsi_target devices, * and switch all subordinate scsi devices to SDEV_BLOCK state. Wait for * ongoing scsi_queue_rq() calls to finish. May sleep. * * Note: * @dev must not itself be a scsi_target device. */ void scsi_block_targets(struct Scsi_Host *shost, struct device *dev) { WARN_ON_ONCE(scsi_is_target_device(dev)); device_for_each_child(dev, NULL, target_block); blk_mq_wait_quiesce_done(&shost->tag_set); } EXPORT_SYMBOL_GPL(scsi_block_targets); static void device_unblock(struct scsi_device *sdev, void *data) { scsi_internal_device_unblock(sdev, *(enum scsi_device_state *)data); } static int target_unblock(struct device *dev, void *data) { if (scsi_is_target_device(dev)) starget_for_each_device(to_scsi_target(dev), data, device_unblock); return 0; } void scsi_target_unblock(struct device *dev, enum scsi_device_state new_state) { if (scsi_is_target_device(dev)) starget_for_each_device(to_scsi_target(dev), &new_state, device_unblock); else device_for_each_child(dev, &new_state, target_unblock); } EXPORT_SYMBOL_GPL(scsi_target_unblock); /** * scsi_host_block - Try to transition all logical units to the SDEV_BLOCK state * @shost: device to block * * Pause SCSI command processing for all logical units associated with the SCSI * host and wait until pending scsi_queue_rq() calls have finished. * * Returns zero if successful or a negative error code upon failure. */ int scsi_host_block(struct Scsi_Host *shost) { struct scsi_device *sdev; int ret; /* * Call scsi_internal_device_block_nowait so we can avoid * calling synchronize_rcu() for each LUN. */ shost_for_each_device(sdev, shost) { mutex_lock(&sdev->state_mutex); ret = scsi_internal_device_block_nowait(sdev); mutex_unlock(&sdev->state_mutex); if (ret) { scsi_device_put(sdev); return ret; } } /* Wait for ongoing scsi_queue_rq() calls to finish. */ blk_mq_wait_quiesce_done(&shost->tag_set); return 0; } EXPORT_SYMBOL_GPL(scsi_host_block); int scsi_host_unblock(struct Scsi_Host *shost, int new_state) { struct scsi_device *sdev; int ret = 0; shost_for_each_device(sdev, shost) { ret = scsi_internal_device_unblock(sdev, new_state); if (ret) { scsi_device_put(sdev); break; } } return ret; } EXPORT_SYMBOL_GPL(scsi_host_unblock); /** * scsi_kmap_atomic_sg - find and atomically map an sg-elemnt * @sgl: scatter-gather list * @sg_count: number of segments in sg * @offset: offset in bytes into sg, on return offset into the mapped area * @len: bytes to map, on return number of bytes mapped * * Returns virtual address of the start of the mapped page */ void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count, size_t *offset, size_t *len) { int i; size_t sg_len = 0, len_complete = 0; struct scatterlist *sg; struct page *page; WARN_ON(!irqs_disabled()); for_each_sg(sgl, sg, sg_count, i) { len_complete = sg_len; /* Complete sg-entries */ sg_len += sg->length; if (sg_len > *offset) break; } if (unlikely(i == sg_count)) { printk(KERN_ERR "%s: Bytes in sg: %zu, requested offset %zu, " "elements %d\n", __func__, sg_len, *offset, sg_count); WARN_ON(1); return NULL; } /* Offset starting from the beginning of first page in this sg-entry */ *offset = *offset - len_complete + sg->offset; page = sg_page(sg) + (*offset >> PAGE_SHIFT); *offset &= ~PAGE_MASK; /* Bytes in this sg-entry from *offset to the end of the page */ sg_len = PAGE_SIZE - *offset; if (*len > sg_len) *len = sg_len; return kmap_atomic(page); } EXPORT_SYMBOL(scsi_kmap_atomic_sg); /** * scsi_kunmap_atomic_sg - atomically unmap a virtual address, previously mapped with scsi_kmap_atomic_sg * @virt: virtual address to be unmapped */ void scsi_kunmap_atomic_sg(void *virt) { kunmap_atomic(virt); } EXPORT_SYMBOL(scsi_kunmap_atomic_sg); void sdev_disable_disk_events(struct scsi_device *sdev) { atomic_inc(&sdev->disk_events_disable_depth); } EXPORT_SYMBOL(sdev_disable_disk_events); void sdev_enable_disk_events(struct scsi_device *sdev) { if (WARN_ON_ONCE(atomic_read(&sdev->disk_events_disable_depth) <= 0)) return; atomic_dec(&sdev->disk_events_disable_depth); } EXPORT_SYMBOL(sdev_enable_disk_events); static unsigned char designator_prio(const unsigned char *d) { if (d[1] & 0x30) /* not associated with LUN */ return 0; if (d[3] == 0) /* invalid length */ return 0; /* * Order of preference for lun descriptor: * - SCSI name string * - NAA IEEE Registered Extended * - EUI-64 based 16-byte * - EUI-64 based 12-byte * - NAA IEEE Registered * - NAA IEEE Extended * - EUI-64 based 8-byte * - SCSI name string (truncated) * - T10 Vendor ID * as longer descriptors reduce the likelyhood * of identification clashes. */ switch (d[1] & 0xf) { case 8: /* SCSI name string, variable-length UTF-8 */ return 9; case 3: switch (d[4] >> 4) { case 6: /* NAA registered extended */ return 8; case 5: /* NAA registered */ return 5; case 4: /* NAA extended */ return 4; case 3: /* NAA locally assigned */ return 1; default: break; } break; case 2: switch (d[3]) { case 16: /* EUI64-based, 16 byte */ return 7; case 12: /* EUI64-based, 12 byte */ return 6; case 8: /* EUI64-based, 8 byte */ return 3; default: break; } break; case 1: /* T10 vendor ID */ return 1; default: break; } return 0; } /** * scsi_vpd_lun_id - return a unique device identification * @sdev: SCSI device * @id: buffer for the identification * @id_len: length of the buffer * * Copies a unique device identification into @id based * on the information in the VPD page 0x83 of the device. * The string will be formatted as a SCSI name string. * * Returns the length of the identification or error on failure. * If the identifier is longer than the supplied buffer the actual * identifier length is returned and the buffer is not zero-padded. */ int scsi_vpd_lun_id(struct scsi_device *sdev, char *id, size_t id_len) { u8 cur_id_prio = 0; u8 cur_id_size = 0; const unsigned char *d, *cur_id_str; const struct scsi_vpd *vpd_pg83; int id_size = -EINVAL; rcu_read_lock(); vpd_pg83 = rcu_dereference(sdev->vpd_pg83); if (!vpd_pg83) { rcu_read_unlock(); return -ENXIO; } /* The id string must be at least 20 bytes + terminating NULL byte */ if (id_len < 21) { rcu_read_unlock(); return -EINVAL; } memset(id, 0, id_len); for (d = vpd_pg83->data + 4; d < vpd_pg83->data + vpd_pg83->len; d += d[3] + 4) { u8 prio = designator_prio(d); if (prio == 0 || cur_id_prio > prio) continue; switch (d[1] & 0xf) { case 0x1: /* T10 Vendor ID */ if (cur_id_size > d[3]) break; cur_id_prio = prio; cur_id_size = d[3]; if (cur_id_size + 4 > id_len) cur_id_size = id_len - 4; cur_id_str = d + 4; id_size = snprintf(id, id_len, "t10.%*pE", cur_id_size, cur_id_str); break; case 0x2: /* EUI-64 */ cur_id_prio = prio; cur_id_size = d[3]; cur_id_str = d + 4; switch (cur_id_size) { case 8: id_size = snprintf(id, id_len, "eui.%8phN", cur_id_str); break; case 12: id_size = snprintf(id, id_len, "eui.%12phN", cur_id_str); break; case 16: id_size = snprintf(id, id_len, "eui.%16phN", cur_id_str); break; default: break; } break; case 0x3: /* NAA */ cur_id_prio = prio; cur_id_size = d[3]; cur_id_str = d + 4; switch (cur_id_size) { case 8: id_size = snprintf(id, id_len, "naa.%8phN", cur_id_str); break; case 16: id_size = snprintf(id, id_len, "naa.%16phN", cur_id_str); break; default: break; } break; case 0x8: /* SCSI name string */ if (cur_id_size > d[3]) break; /* Prefer others for truncated descriptor */ if (d[3] > id_len) { prio = 2; if (cur_id_prio > prio) break; } cur_id_prio = prio; cur_id_size = id_size = d[3]; cur_id_str = d + 4; if (cur_id_size >= id_len) cur_id_size = id_len - 1; memcpy(id, cur_id_str, cur_id_size); break; default: break; } } rcu_read_unlock(); return id_size; } EXPORT_SYMBOL(scsi_vpd_lun_id); /** * scsi_vpd_tpg_id - return a target port group identifier * @sdev: SCSI device * @rel_id: pointer to return relative target port in if not %NULL * * Returns the Target Port Group identifier from the information * from VPD page 0x83 of the device. * Optionally sets @rel_id to the relative target port on success. * * Return: the identifier or error on failure. */ int scsi_vpd_tpg_id(struct scsi_device *sdev, int *rel_id) { const unsigned char *d; const struct scsi_vpd *vpd_pg83; int group_id = -EAGAIN, rel_port = -1; rcu_read_lock(); vpd_pg83 = rcu_dereference(sdev->vpd_pg83); if (!vpd_pg83) { rcu_read_unlock(); return -ENXIO; } d = vpd_pg83->data + 4; while (d < vpd_pg83->data + vpd_pg83->len) { switch (d[1] & 0xf) { case 0x4: /* Relative target port */ rel_port = get_unaligned_be16(&d[6]); break; case 0x5: /* Target port group */ group_id = get_unaligned_be16(&d[6]); break; default: break; } d += d[3] + 4; } rcu_read_unlock(); if (group_id >= 0 && rel_id && rel_port != -1) *rel_id = rel_port; return group_id; } EXPORT_SYMBOL(scsi_vpd_tpg_id); /** * scsi_build_sense - build sense data for a command * @scmd: scsi command for which the sense should be formatted * @desc: Sense format (non-zero == descriptor format, * 0 == fixed format) * @key: Sense key * @asc: Additional sense code * @ascq: Additional sense code qualifier * **/ void scsi_build_sense(struct scsi_cmnd *scmd, int desc, u8 key, u8 asc, u8 ascq) { scsi_build_sense_buffer(desc, scmd->sense_buffer, key, asc, ascq); scmd->result = SAM_STAT_CHECK_CONDITION; } EXPORT_SYMBOL_GPL(scsi_build_sense); #ifdef CONFIG_SCSI_LIB_KUNIT_TEST #include "scsi_lib_test.c" #endif
1 1 2 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 // SPDX-License-Identifier: GPL-2.0-only /* * linux/fs/msdos/namei.c * * Written 1992,1993 by Werner Almesberger * Hidden files 1995 by Albert Cahalan <albert@ccs.neu.edu> <adc@coe.neu.edu> * Rewritten for constant inumbers 1999 by Al Viro */ #include <linux/module.h> #include <linux/iversion.h> #include "fat.h" /* Characters that are undesirable in an MS-DOS file name */ static unsigned char bad_chars[] = "*?<>|\""; static unsigned char bad_if_strict[] = "+=,; "; /***** Formats an MS-DOS file name. Rejects invalid names. */ static int msdos_format_name(const unsigned char *name, int len, unsigned char *res, struct fat_mount_options *opts) /* * name is the proposed name, len is its length, res is * the resulting name, opts->name_check is either (r)elaxed, * (n)ormal or (s)trict, opts->dotsOK allows dots at the * beginning of name (for hidden files) */ { unsigned char *walk; unsigned char c; int space; if (name[0] == '.') { /* dotfile because . and .. already done */ if (opts->dotsOK) { /* Get rid of dot - test for it elsewhere */ name++; len--; } else return -EINVAL; } /* * disallow names that _really_ start with a dot */ space = 1; c = 0; for (walk = res; len && walk - res < 8; walk++) { c = *name++; len--; if (opts->name_check != 'r' && strchr(bad_chars, c)) return -EINVAL; if (opts->name_check == 's' && strchr(bad_if_strict, c)) return -EINVAL; if (c >= 'A' && c <= 'Z' && opts->name_check == 's') return -EINVAL; if (c < ' ' || c == ':' || c == '\\') return -EINVAL; /* * 0xE5 is legal as a first character, but we must substitute * 0x05 because 0xE5 marks deleted files. Yes, DOS really * does this. * It seems that Microsoft hacked DOS to support non-US * characters after the 0xE5 character was already in use to * mark deleted files. */ if ((res == walk) && (c == 0xE5)) c = 0x05; if (c == '.') break; space = (c == ' '); *walk = (!opts->nocase && c >= 'a' && c <= 'z') ? c - 32 : c; } if (space) return -EINVAL; if (opts->name_check == 's' && len && c != '.') { c = *name++; len--; if (c != '.') return -EINVAL; } while (c != '.' && len--) c = *name++; if (c == '.') { while (walk - res < 8) *walk++ = ' '; while (len > 0 && walk - res < MSDOS_NAME) { c = *name++; len--; if (opts->name_check != 'r' && strchr(bad_chars, c)) return -EINVAL; if (opts->name_check == 's' && strchr(bad_if_strict, c)) return -EINVAL; if (c < ' ' || c == ':' || c == '\\') return -EINVAL; if (c == '.') { if (opts->name_check == 's') return -EINVAL; break; } if (c >= 'A' && c <= 'Z' && opts->name_check == 's') return -EINVAL; space = c == ' '; if (!opts->nocase && c >= 'a' && c <= 'z') *walk++ = c - 32; else *walk++ = c; } if (space) return -EINVAL; if (opts->name_check == 's' && len) return -EINVAL; } while (walk - res < MSDOS_NAME) *walk++ = ' '; return 0; } /***** Locates a directory entry. Uses unformatted name. */ static int msdos_find(struct inode *dir, const unsigned char *name, int len, struct fat_slot_info *sinfo) { struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb); unsigned char msdos_name[MSDOS_NAME]; int err; err = msdos_format_name(name, len, msdos_name, &sbi->options); if (err) return -ENOENT; err = fat_scan(dir, msdos_name, sinfo); if (!err && sbi->options.dotsOK) { if (name[0] == '.') { if (!(sinfo->de->attr & ATTR_HIDDEN)) err = -ENOENT; } else { if (sinfo->de->attr & ATTR_HIDDEN) err = -ENOENT; } if (err) brelse(sinfo->bh); } return err; } /* * Compute the hash for the msdos name corresponding to the dentry. * Note: if the name is invalid, we leave the hash code unchanged so * that the existing dentry can be used. The msdos fs routines will * return ENOENT or EINVAL as appropriate. */ static int msdos_hash(const struct dentry *dentry, struct qstr *qstr) { struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; unsigned char msdos_name[MSDOS_NAME]; int error; error = msdos_format_name(qstr->name, qstr->len, msdos_name, options); if (!error) qstr->hash = full_name_hash(dentry, msdos_name, MSDOS_NAME); return 0; } /* * Compare two msdos names. If either of the names are invalid, * we fall back to doing the standard name comparison. */ static int msdos_cmp(const struct dentry *dentry, unsigned int len, const char *str, const struct qstr *name) { struct fat_mount_options *options = &MSDOS_SB(dentry->d_sb)->options; unsigned char a_msdos_name[MSDOS_NAME], b_msdos_name[MSDOS_NAME]; int error; error = msdos_format_name(name->name, name->len, a_msdos_name, options); if (error) goto old_compare; error = msdos_format_name(str, len, b_msdos_name, options); if (error) goto old_compare; error = memcmp(a_msdos_name, b_msdos_name, MSDOS_NAME); out: return error; old_compare: error = 1; if (name->len == len) error = memcmp(name->name, str, len); goto out; } static const struct dentry_operations msdos_dentry_operations = { .d_hash = msdos_hash, .d_compare = msdos_cmp, }; /* * AV. Wrappers for FAT sb operations. Is it wise? */ /***** Get inode using directory and name */ static struct dentry *msdos_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; struct inode *inode; int err; mutex_lock(&MSDOS_SB(sb)->s_lock); err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); switch (err) { case -ENOENT: inode = NULL; break; case 0: inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); break; default: inode = ERR_PTR(err); } mutex_unlock(&MSDOS_SB(sb)->s_lock); return d_splice_alias(inode, dentry); } /***** Creates a directory entry (name is already formatted). */ static int msdos_add_entry(struct inode *dir, const unsigned char *name, int is_dir, int is_hid, int cluster, struct timespec64 *ts, struct fat_slot_info *sinfo) { struct msdos_sb_info *sbi = MSDOS_SB(dir->i_sb); struct msdos_dir_entry de; __le16 time, date; int err; memcpy(de.name, name, MSDOS_NAME); de.attr = is_dir ? ATTR_DIR : ATTR_ARCH; if (is_hid) de.attr |= ATTR_HIDDEN; de.lcase = 0; fat_time_unix2fat(sbi, ts, &time, &date, NULL); de.cdate = de.adate = 0; de.ctime = 0; de.ctime_cs = 0; de.time = time; de.date = date; fat_set_start(&de, cluster); de.size = 0; err = fat_add_entries(dir, &de, 1, sinfo); if (err) return err; fat_truncate_time(dir, ts, S_CTIME|S_MTIME); if (IS_DIRSYNC(dir)) (void)fat_sync_inode(dir); else mark_inode_dirty(dir); return 0; } /***** Create a file */ static int msdos_create(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode, bool excl) { struct super_block *sb = dir->i_sb; struct inode *inode = NULL; struct fat_slot_info sinfo; struct timespec64 ts; unsigned char msdos_name[MSDOS_NAME]; int err, is_hid; mutex_lock(&MSDOS_SB(sb)->s_lock); err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, msdos_name, &MSDOS_SB(sb)->options); if (err) goto out; is_hid = (dentry->d_name.name[0] == '.') && (msdos_name[0] != '.'); /* Have to do it due to foo vs. .foo conflicts */ if (!fat_scan(dir, msdos_name, &sinfo)) { brelse(sinfo.bh); err = -EINVAL; goto out; } ts = current_time(dir); err = msdos_add_entry(dir, msdos_name, 0, is_hid, 0, &ts, &sinfo); if (err) goto out; inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { err = PTR_ERR(inode); goto out; } fat_truncate_time(inode, &ts, S_ATIME|S_CTIME|S_MTIME); /* timestamp is already written, so mark_inode_dirty() is unneeded. */ d_instantiate(dentry, inode); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); if (!err) err = fat_flush_inodes(sb, dir, inode); return err; } /***** Remove a directory */ static int msdos_rmdir(struct inode *dir, struct dentry *dentry) { struct super_block *sb = dir->i_sb; struct inode *inode = d_inode(dentry); struct fat_slot_info sinfo; int err; mutex_lock(&MSDOS_SB(sb)->s_lock); err = fat_dir_empty(inode); if (err) goto out; err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); if (err) goto out; err = fat_remove_entries(dir, &sinfo); /* and releases bh */ if (err) goto out; drop_nlink(dir); clear_nlink(inode); fat_truncate_time(inode, NULL, S_CTIME); fat_detach(inode); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); if (!err) err = fat_flush_inodes(sb, dir, inode); return err; } /***** Make a directory */ static struct dentry *msdos_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { struct super_block *sb = dir->i_sb; struct fat_slot_info sinfo; struct inode *inode; unsigned char msdos_name[MSDOS_NAME]; struct timespec64 ts; int err, is_hid, cluster; mutex_lock(&MSDOS_SB(sb)->s_lock); err = msdos_format_name(dentry->d_name.name, dentry->d_name.len, msdos_name, &MSDOS_SB(sb)->options); if (err) goto out; is_hid = (dentry->d_name.name[0] == '.') && (msdos_name[0] != '.'); /* foo vs .foo situation */ if (!fat_scan(dir, msdos_name, &sinfo)) { brelse(sinfo.bh); err = -EINVAL; goto out; } ts = current_time(dir); cluster = fat_alloc_new_dir(dir, &ts); if (cluster < 0) { err = cluster; goto out; } err = msdos_add_entry(dir, msdos_name, 1, is_hid, cluster, &ts, &sinfo); if (err) goto out_free; inc_nlink(dir); inode = fat_build_inode(sb, sinfo.de, sinfo.i_pos); brelse(sinfo.bh); if (IS_ERR(inode)) { err = PTR_ERR(inode); /* the directory was completed, just return a error */ goto out; } set_nlink(inode, 2); fat_truncate_time(inode, &ts, S_ATIME|S_CTIME|S_MTIME); /* timestamp is already written, so mark_inode_dirty() is unneeded. */ d_instantiate(dentry, inode); mutex_unlock(&MSDOS_SB(sb)->s_lock); fat_flush_inodes(sb, dir, inode); return NULL; out_free: fat_free_clusters(dir, cluster); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); return ERR_PTR(err); } /***** Unlink a file */ static int msdos_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = d_inode(dentry); struct super_block *sb = inode->i_sb; struct fat_slot_info sinfo; int err; mutex_lock(&MSDOS_SB(sb)->s_lock); err = msdos_find(dir, dentry->d_name.name, dentry->d_name.len, &sinfo); if (err) goto out; err = fat_remove_entries(dir, &sinfo); /* and releases bh */ if (err) goto out; clear_nlink(inode); fat_truncate_time(inode, NULL, S_CTIME); fat_detach(inode); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); if (!err) err = fat_flush_inodes(sb, dir, inode); return err; } static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name, struct dentry *old_dentry, struct inode *new_dir, unsigned char *new_name, struct dentry *new_dentry, int is_hid) { struct buffer_head *dotdot_bh; struct msdos_dir_entry *dotdot_de; struct inode *old_inode, *new_inode; struct fat_slot_info old_sinfo, sinfo; struct timespec64 ts; loff_t new_i_pos; int err, old_attrs, is_dir, update_dotdot, corrupt = 0; old_sinfo.bh = sinfo.bh = dotdot_bh = NULL; old_inode = d_inode(old_dentry); new_inode = d_inode(new_dentry); err = fat_scan(old_dir, old_name, &old_sinfo); if (err) { err = -EIO; goto out; } is_dir = S_ISDIR(old_inode->i_mode); update_dotdot = (is_dir && old_dir != new_dir); if (update_dotdot) { if (fat_get_dotdot_entry(old_inode, &dotdot_bh, &dotdot_de)) { err = -EIO; goto out; } } old_attrs = MSDOS_I(old_inode)->i_attrs; err = fat_scan(new_dir, new_name, &sinfo); if (!err) { if (!new_inode) { /* "foo" -> ".foo" case. just change the ATTR_HIDDEN */ if (sinfo.de != old_sinfo.de) { err = -EINVAL; goto out; } if (is_hid) MSDOS_I(old_inode)->i_attrs |= ATTR_HIDDEN; else MSDOS_I(old_inode)->i_attrs &= ~ATTR_HIDDEN; if (IS_DIRSYNC(old_dir)) { err = fat_sync_inode(old_inode); if (err) { MSDOS_I(old_inode)->i_attrs = old_attrs; goto out; } } else mark_inode_dirty(old_inode); inode_inc_iversion(old_dir); fat_truncate_time(old_dir, NULL, S_CTIME|S_MTIME); if (IS_DIRSYNC(old_dir)) (void)fat_sync_inode(old_dir); else mark_inode_dirty(old_dir); goto out; } } ts = current_time(old_inode); if (new_inode) { if (err) goto out; if (is_dir) { err = fat_dir_empty(new_inode); if (err) goto out; } new_i_pos = MSDOS_I(new_inode)->i_pos; fat_detach(new_inode); } else { err = msdos_add_entry(new_dir, new_name, is_dir, is_hid, 0, &ts, &sinfo); if (err) goto out; new_i_pos = sinfo.i_pos; } inode_inc_iversion(new_dir); fat_detach(old_inode); fat_attach(old_inode, new_i_pos); if (is_hid) MSDOS_I(old_inode)->i_attrs |= ATTR_HIDDEN; else MSDOS_I(old_inode)->i_attrs &= ~ATTR_HIDDEN; if (IS_DIRSYNC(new_dir)) { err = fat_sync_inode(old_inode); if (err) goto error_inode; } else mark_inode_dirty(old_inode); if (update_dotdot) { fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart); mark_buffer_dirty_inode(dotdot_bh, old_inode); if (IS_DIRSYNC(new_dir)) { err = sync_dirty_buffer(dotdot_bh); if (err) goto error_dotdot; } drop_nlink(old_dir); if (!new_inode) inc_nlink(new_dir); } err = fat_remove_entries(old_dir, &old_sinfo); /* and releases bh */ old_sinfo.bh = NULL; if (err) goto error_dotdot; inode_inc_iversion(old_dir); fat_truncate_time(old_dir, &ts, S_CTIME|S_MTIME); if (IS_DIRSYNC(old_dir)) (void)fat_sync_inode(old_dir); else mark_inode_dirty(old_dir); if (new_inode) { drop_nlink(new_inode); if (is_dir) drop_nlink(new_inode); fat_truncate_time(new_inode, &ts, S_CTIME); } out: brelse(sinfo.bh); brelse(dotdot_bh); brelse(old_sinfo.bh); return err; error_dotdot: /* data cluster is shared, serious corruption */ corrupt = 1; if (update_dotdot) { fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart); mark_buffer_dirty_inode(dotdot_bh, old_inode); corrupt |= sync_dirty_buffer(dotdot_bh); } error_inode: fat_detach(old_inode); fat_attach(old_inode, old_sinfo.i_pos); MSDOS_I(old_inode)->i_attrs = old_attrs; if (new_inode) { fat_attach(new_inode, new_i_pos); if (corrupt) corrupt |= fat_sync_inode(new_inode); } else { /* * If new entry was not sharing the data cluster, it * shouldn't be serious corruption. */ int err2 = fat_remove_entries(new_dir, &sinfo); if (corrupt) corrupt |= err2; sinfo.bh = NULL; } if (corrupt < 0) { fat_fs_error(new_dir->i_sb, "%s: Filesystem corrupted (i_pos %lld)", __func__, sinfo.i_pos); } goto out; } /***** Rename, a wrapper for rename_same_dir & rename_diff_dir */ static int msdos_rename(struct mnt_idmap *idmap, struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags) { struct super_block *sb = old_dir->i_sb; unsigned char old_msdos_name[MSDOS_NAME], new_msdos_name[MSDOS_NAME]; int err, is_hid; if (flags & ~RENAME_NOREPLACE) return -EINVAL; mutex_lock(&MSDOS_SB(sb)->s_lock); err = msdos_format_name(old_dentry->d_name.name, old_dentry->d_name.len, old_msdos_name, &MSDOS_SB(old_dir->i_sb)->options); if (err) goto out; err = msdos_format_name(new_dentry->d_name.name, new_dentry->d_name.len, new_msdos_name, &MSDOS_SB(new_dir->i_sb)->options); if (err) goto out; is_hid = (new_dentry->d_name.name[0] == '.') && (new_msdos_name[0] != '.'); err = do_msdos_rename(old_dir, old_msdos_name, old_dentry, new_dir, new_msdos_name, new_dentry, is_hid); out: mutex_unlock(&MSDOS_SB(sb)->s_lock); if (!err) err = fat_flush_inodes(sb, old_dir, new_dir); return err; } static const struct inode_operations msdos_dir_inode_operations = { .create = msdos_create, .lookup = msdos_lookup, .unlink = msdos_unlink, .mkdir = msdos_mkdir, .rmdir = msdos_rmdir, .rename = msdos_rename, .setattr = fat_setattr, .getattr = fat_getattr, .update_time = fat_update_time, }; static void setup(struct super_block *sb) { MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations; set_default_d_op(sb, &msdos_dentry_operations); sb->s_flags |= SB_NOATIME; } static int msdos_fill_super(struct super_block *sb, struct fs_context *fc) { return fat_fill_super(sb, fc, setup); } static int msdos_get_tree(struct fs_context *fc) { return get_tree_bdev(fc, msdos_fill_super); } static int msdos_parse_param(struct fs_context *fc, struct fs_parameter *param) { return fat_parse_param(fc, param, false); } static const struct fs_context_operations msdos_context_ops = { .parse_param = msdos_parse_param, .get_tree = msdos_get_tree, .reconfigure = fat_reconfigure, .free = fat_free_fc, }; static int msdos_init_fs_context(struct fs_context *fc) { int err; /* Initialize with is_vfat == false */ err = fat_init_fs_context(fc, false); if (err) return err; fc->ops = &msdos_context_ops; return 0; } static struct file_system_type msdos_fs_type = { .owner = THIS_MODULE, .name = "msdos", .kill_sb = kill_block_super, .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, .init_fs_context = msdos_init_fs_context, .parameters = fat_param_spec, }; MODULE_ALIAS_FS("msdos"); static int __init init_msdos_fs(void) { return register_filesystem(&msdos_fs_type); } static void __exit exit_msdos_fs(void) { unregister_filesystem(&msdos_fs_type); } MODULE_LICENSE("GPL"); MODULE_AUTHOR("Werner Almesberger"); MODULE_DESCRIPTION("MS-DOS filesystem support"); module_init(init_msdos_fs) module_exit(exit_msdos_fs)
13 267 267 267 265 267 267 171 267 269 270 267 267 265 264 264 265 7 266 264 176 342 268 342 270 270 269 269 266 266 266 132 338 339 264 267 261 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 /* SPDX-License-Identifier: GPL-2.0-or-later */ #ifndef __SOUND_PCM_PARAMS_H #define __SOUND_PCM_PARAMS_H /* * PCM params helpers * Copyright (c) by Abramo Bagnara <abramo@alsa-project.org> */ #include <sound/pcm.h> int snd_pcm_hw_param_first(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir); int snd_pcm_hw_param_last(struct snd_pcm_substream *pcm, struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir); int snd_pcm_hw_param_value(const struct snd_pcm_hw_params *params, snd_pcm_hw_param_t var, int *dir); #define SNDRV_MASK_BITS 64 /* we use so far 64bits only */ #define SNDRV_MASK_SIZE (SNDRV_MASK_BITS / 32) #define MASK_OFS(i) ((i) >> 5) #define MASK_BIT(i) (1U << ((i) & 31)) static inline void snd_mask_none(struct snd_mask *mask) { memset(mask, 0, sizeof(*mask)); } static inline void snd_mask_any(struct snd_mask *mask) { memset(mask, 0xff, SNDRV_MASK_SIZE * sizeof(u_int32_t)); } static inline int snd_mask_empty(const struct snd_mask *mask) { int i; for (i = 0; i < SNDRV_MASK_SIZE; i++) if (mask->bits[i]) return 0; return 1; } static inline unsigned int snd_mask_min(const struct snd_mask *mask) { int i; for (i = 0; i < SNDRV_MASK_SIZE; i++) { if (mask->bits[i]) return __ffs(mask->bits[i]) + (i << 5); } return 0; } static inline unsigned int snd_mask_max(const struct snd_mask *mask) { int i; for (i = SNDRV_MASK_SIZE - 1; i >= 0; i--) { if (mask->bits[i]) return __fls(mask->bits[i]) + (i << 5); } return 0; } static inline void snd_mask_set(struct snd_mask *mask, unsigned int val) { mask->bits[MASK_OFS(val)] |= MASK_BIT(val); } /* Most of drivers need only this one */ static inline void snd_mask_set_format(struct snd_mask *mask, snd_pcm_format_t format) { snd_mask_set(mask, (__force unsigned int)format); } static inline void snd_mask_reset(struct snd_mask *mask, unsigned int val) { mask->bits[MASK_OFS(val)] &= ~MASK_BIT(val); } static inline void snd_mask_set_range(struct snd_mask *mask, unsigned int from, unsigned int to) { unsigned int i; for (i = from; i <= to; i++) mask->bits[MASK_OFS(i)] |= MASK_BIT(i); } static inline void snd_mask_reset_range(struct snd_mask *mask, unsigned int from, unsigned int to) { unsigned int i; for (i = from; i <= to; i++) mask->bits[MASK_OFS(i)] &= ~MASK_BIT(i); } static inline void snd_mask_leave(struct snd_mask *mask, unsigned int val) { unsigned int v; v = mask->bits[MASK_OFS(val)] & MASK_BIT(val); snd_mask_none(mask); mask->bits[MASK_OFS(val)] = v; } static inline void snd_mask_intersect(struct snd_mask *mask, const struct snd_mask *v) { int i; for (i = 0; i < SNDRV_MASK_SIZE; i++) mask->bits[i] &= v->bits[i]; } static inline int snd_mask_eq(const struct snd_mask *mask, const struct snd_mask *v) { return ! memcmp(mask, v, SNDRV_MASK_SIZE * sizeof(u_int32_t)); } static inline void snd_mask_copy(struct snd_mask *mask, const struct snd_mask *v) { *mask = *v; } static inline int snd_mask_test(const struct snd_mask *mask, unsigned int val) { return mask->bits[MASK_OFS(val)] & MASK_BIT(val); } /* Most of drivers need only this one */ static inline int snd_mask_test_format(const struct snd_mask *mask, snd_pcm_format_t format) { return snd_mask_test(mask, (__force unsigned int)format); } static inline int snd_mask_single(const struct snd_mask *mask) { int i, c = 0; for (i = 0; i < SNDRV_MASK_SIZE; i++) { if (! mask->bits[i]) continue; if (mask->bits[i] & (mask->bits[i] - 1)) return 0; if (c) return 0; c++; } return 1; } static inline int snd_mask_refine(struct snd_mask *mask, const struct snd_mask *v) { struct snd_mask old; snd_mask_copy(&old, mask); snd_mask_intersect(mask, v); if (snd_mask_empty(mask)) return -EINVAL; return !snd_mask_eq(mask, &old); } static inline int snd_mask_refine_first(struct snd_mask *mask) { if (snd_mask_single(mask)) return 0; snd_mask_leave(mask, snd_mask_min(mask)); return 1; } static inline int snd_mask_refine_last(struct snd_mask *mask) { if (snd_mask_single(mask)) return 0; snd_mask_leave(mask, snd_mask_max(mask)); return 1; } static inline int snd_mask_refine_min(struct snd_mask *mask, unsigned int val) { if (snd_mask_min(mask) >= val) return 0; snd_mask_reset_range(mask, 0, val - 1); if (snd_mask_empty(mask)) return -EINVAL; return 1; } static inline int snd_mask_refine_max(struct snd_mask *mask, unsigned int val) { if (snd_mask_max(mask) <= val) return 0; snd_mask_reset_range(mask, val + 1, SNDRV_MASK_BITS); if (snd_mask_empty(mask)) return -EINVAL; return 1; } static inline int snd_mask_refine_set(struct snd_mask *mask, unsigned int val) { int changed; changed = !snd_mask_single(mask); snd_mask_leave(mask, val); if (snd_mask_empty(mask)) return -EINVAL; return changed; } static inline int snd_mask_value(const struct snd_mask *mask) { return snd_mask_min(mask); } static inline void snd_interval_any(struct snd_interval *i) { i->min = 0; i->openmin = 0; i->max = UINT_MAX; i->openmax = 0; i->integer = 0; i->empty = 0; } static inline void snd_interval_none(struct snd_interval *i) { i->empty = 1; } static inline int snd_interval_checkempty(const struct snd_interval *i) { return (i->min > i->max || (i->min == i->max && (i->openmin || i->openmax))); } static inline int snd_interval_empty(const struct snd_interval *i) { return i->empty; } static inline int snd_interval_single(const struct snd_interval *i) { return (i->min == i->max || (i->min + 1 == i->max && (i->openmin || i->openmax))); } static inline int snd_interval_value(const struct snd_interval *i) { if (i->openmin && !i->openmax) return i->max; return i->min; } static inline int snd_interval_min(const struct snd_interval *i) { return i->min; } static inline int snd_interval_max(const struct snd_interval *i) { unsigned int v; v = i->max; if (i->openmax) v--; return v; } static inline int snd_interval_test(const struct snd_interval *i, unsigned int val) { return !((i->min > val || (i->min == val && i->openmin) || i->max < val || (i->max == val && i->openmax))); } static inline void snd_interval_copy(struct snd_interval *d, const struct snd_interval *s) { *d = *s; } static inline int snd_interval_setinteger(struct snd_interval *i) { if (i->integer) return 0; if (i->openmin && i->openmax && i->min == i->max) return -EINVAL; i->integer = 1; return 1; } static inline int snd_interval_eq(const struct snd_interval *i1, const struct snd_interval *i2) { if (i1->empty) return i2->empty; if (i2->empty) return i1->empty; return i1->min == i2->min && i1->openmin == i2->openmin && i1->max == i2->max && i1->openmax == i2->openmax; } /** * params_access - get the access type from the hw params * @p: hw params */ static inline snd_pcm_access_t params_access(const struct snd_pcm_hw_params *p) { return (__force snd_pcm_access_t)snd_mask_min(hw_param_mask_c(p, SNDRV_PCM_HW_PARAM_ACCESS)); } /** * params_format - get the sample format from the hw params * @p: hw params */ static inline snd_pcm_format_t params_format(const struct snd_pcm_hw_params *p) { return (__force snd_pcm_format_t)snd_mask_min(hw_param_mask_c(p, SNDRV_PCM_HW_PARAM_FORMAT)); } /** * params_subformat - get the sample subformat from the hw params * @p: hw params */ static inline snd_pcm_subformat_t params_subformat(const struct snd_pcm_hw_params *p) { return (__force snd_pcm_subformat_t)snd_mask_min(hw_param_mask_c(p, SNDRV_PCM_HW_PARAM_SUBFORMAT)); } /** * params_period_bytes - get the period size (in bytes) from the hw params * @p: hw params */ static inline unsigned int params_period_bytes(const struct snd_pcm_hw_params *p) { return hw_param_interval_c(p, SNDRV_PCM_HW_PARAM_PERIOD_BYTES)->min; } /** * params_width - get the number of bits of the sample format from the hw params * @p: hw params * * This function returns the number of bits per sample that the selected sample * format of the hw params has. */ static inline int params_width(const struct snd_pcm_hw_params *p) { return snd_pcm_format_width(params_format(p)); } /* * params_physical_width - get the storage size of the sample format from the hw params * @p: hw params * * This functions returns the number of bits per sample that the selected sample * format of the hw params takes up in memory. This will be equal or larger than * params_width(). */ static inline int params_physical_width(const struct snd_pcm_hw_params *p) { return snd_pcm_format_physical_width(params_format(p)); } int snd_pcm_hw_params_bits(const struct snd_pcm_hw_params *p); static inline void params_set_format(struct snd_pcm_hw_params *p, snd_pcm_format_t fmt) { snd_mask_set_format(hw_param_mask(p, SNDRV_PCM_HW_PARAM_FORMAT), fmt); } #endif /* __SOUND_PCM_PARAMS_H */
2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 /* BlueZ - Bluetooth protocol stack for Linux Copyright (C) 2000-2001 Qualcomm Incorporated Written 2000,2001 by Maxim Krasnyansky <maxk@qualcomm.com> This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License version 2 as published by the Free Software Foundation; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS SOFTWARE IS DISCLAIMED. */ /* Bluetooth address family and sockets. */ #include <linux/module.h> #include <linux/debugfs.h> #include <linux/stringify.h> #include <linux/sched/signal.h> #include <asm/ioctls.h> #include <net/bluetooth/bluetooth.h> #include <linux/proc_fs.h> #include <linux/ethtool.h> #include <linux/sockios.h> #include "leds.h" #include "selftest.h" /* Bluetooth sockets */ #define BT_MAX_PROTO (BTPROTO_LAST + 1) static const struct net_proto_family *bt_proto[BT_MAX_PROTO]; static DEFINE_RWLOCK(bt_proto_lock); static struct lock_class_key bt_lock_key[BT_MAX_PROTO]; static const char *const bt_key_strings[BT_MAX_PROTO] = { "sk_lock-AF_BLUETOOTH-BTPROTO_L2CAP", "sk_lock-AF_BLUETOOTH-BTPROTO_HCI", "sk_lock-AF_BLUETOOTH-BTPROTO_SCO", "sk_lock-AF_BLUETOOTH-BTPROTO_RFCOMM", "sk_lock-AF_BLUETOOTH-BTPROTO_BNEP", "sk_lock-AF_BLUETOOTH-BTPROTO_CMTP", "sk_lock-AF_BLUETOOTH-BTPROTO_HIDP", "sk_lock-AF_BLUETOOTH-BTPROTO_AVDTP", "sk_lock-AF_BLUETOOTH-BTPROTO_ISO", }; static struct lock_class_key bt_slock_key[BT_MAX_PROTO]; static const char *const bt_slock_key_strings[BT_MAX_PROTO] = { "slock-AF_BLUETOOTH-BTPROTO_L2CAP", "slock-AF_BLUETOOTH-BTPROTO_HCI", "slock-AF_BLUETOOTH-BTPROTO_SCO", "slock-AF_BLUETOOTH-BTPROTO_RFCOMM", "slock-AF_BLUETOOTH-BTPROTO_BNEP", "slock-AF_BLUETOOTH-BTPROTO_CMTP", "slock-AF_BLUETOOTH-BTPROTO_HIDP", "slock-AF_BLUETOOTH-BTPROTO_AVDTP", "slock-AF_BLUETOOTH-BTPROTO_ISO", }; void bt_sock_reclassify_lock(struct sock *sk, int proto) { BUG_ON(!sk); BUG_ON(!sock_allow_reclassification(sk)); sock_lock_init_class_and_name(sk, bt_slock_key_strings[proto], &bt_slock_key[proto], bt_key_strings[proto], &bt_lock_key[proto]); } EXPORT_SYMBOL(bt_sock_reclassify_lock); int bt_sock_register(int proto, const struct net_proto_family *ops) { int err = 0; if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; write_lock(&bt_proto_lock); if (bt_proto[proto]) err = -EEXIST; else bt_proto[proto] = ops; write_unlock(&bt_proto_lock); return err; } EXPORT_SYMBOL(bt_sock_register); void bt_sock_unregister(int proto) { if (proto < 0 || proto >= BT_MAX_PROTO) return; write_lock(&bt_proto_lock); bt_proto[proto] = NULL; write_unlock(&bt_proto_lock); } EXPORT_SYMBOL(bt_sock_unregister); static int bt_sock_create(struct net *net, struct socket *sock, int proto, int kern) { int err; if (net != &init_net) return -EAFNOSUPPORT; if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; if (!bt_proto[proto]) request_module("bt-proto-%d", proto); err = -EPROTONOSUPPORT; read_lock(&bt_proto_lock); if (bt_proto[proto] && try_module_get(bt_proto[proto]->owner)) { err = bt_proto[proto]->create(net, sock, proto, kern); if (!err) bt_sock_reclassify_lock(sock->sk, proto); module_put(bt_proto[proto]->owner); } read_unlock(&bt_proto_lock); return err; } struct sock *bt_sock_alloc(struct net *net, struct socket *sock, struct proto *prot, int proto, gfp_t prio, int kern) { struct sock *sk; sk = sk_alloc(net, PF_BLUETOOTH, prio, prot, kern); if (!sk) return NULL; sock_init_data(sock, sk); INIT_LIST_HEAD(&bt_sk(sk)->accept_q); sock_reset_flag(sk, SOCK_ZAPPED); sk->sk_protocol = proto; sk->sk_state = BT_OPEN; /* Init peer information so it can be properly monitored */ if (!kern) { spin_lock(&sk->sk_peer_lock); sk->sk_peer_pid = get_pid(task_tgid(current)); sk->sk_peer_cred = get_current_cred(); spin_unlock(&sk->sk_peer_lock); } return sk; } EXPORT_SYMBOL(bt_sock_alloc); void bt_sock_link(struct bt_sock_list *l, struct sock *sk) { write_lock(&l->lock); sk_add_node(sk, &l->head); write_unlock(&l->lock); } EXPORT_SYMBOL(bt_sock_link); void bt_sock_unlink(struct bt_sock_list *l, struct sock *sk) { write_lock(&l->lock); sk_del_node_init(sk); write_unlock(&l->lock); } EXPORT_SYMBOL(bt_sock_unlink); bool bt_sock_linked(struct bt_sock_list *l, struct sock *s) { struct sock *sk; if (!l || !s) return false; read_lock(&l->lock); sk_for_each(sk, &l->head) { if (s == sk) { read_unlock(&l->lock); return true; } } read_unlock(&l->lock); return false; } EXPORT_SYMBOL(bt_sock_linked); void bt_accept_enqueue(struct sock *parent, struct sock *sk, bool bh) { const struct cred *old_cred; struct pid *old_pid; BT_DBG("parent %p, sk %p", parent, sk); sock_hold(sk); if (bh) bh_lock_sock_nested(sk); else lock_sock_nested(sk, SINGLE_DEPTH_NESTING); list_add_tail(&bt_sk(sk)->accept_q, &bt_sk(parent)->accept_q); bt_sk(sk)->parent = parent; /* Copy credentials from parent since for incoming connections the * socket is allocated by the kernel. */ spin_lock(&sk->sk_peer_lock); old_pid = sk->sk_peer_pid; old_cred = sk->sk_peer_cred; sk->sk_peer_pid = get_pid(parent->sk_peer_pid); sk->sk_peer_cred = get_cred(parent->sk_peer_cred); spin_unlock(&sk->sk_peer_lock); put_pid(old_pid); put_cred(old_cred); if (bh) bh_unlock_sock(sk); else release_sock(sk); sk_acceptq_added(parent); } EXPORT_SYMBOL(bt_accept_enqueue); /* Calling function must hold the sk lock. * bt_sk(sk)->parent must be non-NULL meaning sk is in the parent list. */ void bt_accept_unlink(struct sock *sk) { BT_DBG("sk %p state %d", sk, sk->sk_state); list_del_init(&bt_sk(sk)->accept_q); sk_acceptq_removed(bt_sk(sk)->parent); bt_sk(sk)->parent = NULL; sock_put(sk); } EXPORT_SYMBOL(bt_accept_unlink); struct sock *bt_accept_dequeue(struct sock *parent, struct socket *newsock) { struct bt_sock *s, *n; struct sock *sk; BT_DBG("parent %p", parent); restart: list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { sk = (struct sock *)s; /* Prevent early freeing of sk due to unlink and sock_kill */ sock_hold(sk); lock_sock(sk); /* Check sk has not already been unlinked via * bt_accept_unlink() due to serialisation caused by sk locking */ if (!bt_sk(sk)->parent) { BT_DBG("sk %p, already unlinked", sk); release_sock(sk); sock_put(sk); /* Restart the loop as sk is no longer in the list * and also avoid a potential infinite loop because * list_for_each_entry_safe() is not thread safe. */ goto restart; } /* sk is safely in the parent list so reduce reference count */ sock_put(sk); /* FIXME: Is this check still needed */ if (sk->sk_state == BT_CLOSED) { bt_accept_unlink(sk); release_sock(sk); continue; } if (sk->sk_state == BT_CONNECTED || !newsock || test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags)) { bt_accept_unlink(sk); if (newsock) sock_graft(sk, newsock); release_sock(sk); return sk; } release_sock(sk); } return NULL; } EXPORT_SYMBOL(bt_accept_dequeue); int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { struct sock *sk = sock->sk; struct sk_buff *skb; size_t copied; size_t skblen; int err; BT_DBG("sock %p sk %p len %zu", sock, sk, len); if (flags & MSG_OOB) return -EOPNOTSUPP; skb = skb_recv_datagram(sk, flags, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) err = 0; return err; } skblen = skb->len; copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; copied = len; } skb_reset_transport_header(skb); err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err == 0) { sock_recv_cmsgs(msg, sk, skb); if (msg->msg_name && bt_sk(sk)->skb_msg_name) bt_sk(sk)->skb_msg_name(skb, msg->msg_name, &msg->msg_namelen); if (test_bit(BT_SK_PKT_STATUS, &bt_sk(sk)->flags)) { u8 pkt_status = hci_skb_pkt_status(skb); put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_STATUS, sizeof(pkt_status), &pkt_status); } if (test_bit(BT_SK_PKT_SEQNUM, &bt_sk(sk)->flags)) { u16 pkt_seqnum = hci_skb_pkt_seqnum(skb); put_cmsg(msg, SOL_BLUETOOTH, BT_SCM_PKT_SEQNUM, sizeof(pkt_seqnum), &pkt_seqnum); } } skb_free_datagram(sk, skb); if (flags & MSG_TRUNC) copied = skblen; return err ? : copied; } EXPORT_SYMBOL(bt_sock_recvmsg); static long bt_sock_data_wait(struct sock *sk, long timeo) { DECLARE_WAITQUEUE(wait, current); add_wait_queue(sk_sleep(sk), &wait); for (;;) { set_current_state(TASK_INTERRUPTIBLE); if (!skb_queue_empty(&sk->sk_receive_queue)) break; if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN)) break; if (signal_pending(current) || !timeo) break; sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return timeo; } int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; int err = 0; size_t target, copied = 0; long timeo; if (flags & MSG_OOB) return -EOPNOTSUPP; BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); do { struct sk_buff *skb; int chunk; skb = skb_dequeue(&sk->sk_receive_queue); if (!skb) { if (copied >= target) break; err = sock_error(sk); if (err) break; if (sk->sk_shutdown & RCV_SHUTDOWN) break; err = -EAGAIN; if (!timeo) break; timeo = bt_sock_data_wait(sk, timeo); if (signal_pending(current)) { err = sock_intr_errno(timeo); goto out; } continue; } chunk = min_t(unsigned int, skb->len, size); if (skb_copy_datagram_msg(skb, 0, msg, chunk)) { skb_queue_head(&sk->sk_receive_queue, skb); if (!copied) copied = -EFAULT; break; } copied += chunk; size -= chunk; sock_recv_cmsgs(msg, sk, skb); if (!(flags & MSG_PEEK)) { int skb_len = skb_headlen(skb); if (chunk <= skb_len) { __skb_pull(skb, chunk); } else { struct sk_buff *frag; __skb_pull(skb, skb_len); chunk -= skb_len; skb_walk_frags(skb, frag) { if (chunk <= frag->len) { /* Pulling partial data */ skb->len -= chunk; skb->data_len -= chunk; __skb_pull(frag, chunk); break; } else if (frag->len) { /* Pulling all frag data */ chunk -= frag->len; skb->len -= frag->len; skb->data_len -= frag->len; __skb_pull(frag, frag->len); } } } if (skb->len) { skb_queue_head(&sk->sk_receive_queue, skb); break; } kfree_skb(skb); } else { /* put message back and return */ skb_queue_head(&sk->sk_receive_queue, skb); break; } } while (size); out: release_sock(sk); return copied ? : err; } EXPORT_SYMBOL(bt_sock_stream_recvmsg); static inline __poll_t bt_accept_poll(struct sock *parent) { struct bt_sock *s, *n; struct sock *sk; list_for_each_entry_safe(s, n, &bt_sk(parent)->accept_q, accept_q) { sk = (struct sock *)s; if (sk->sk_state == BT_CONNECTED || (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags) && sk->sk_state == BT_CONNECT2)) return EPOLLIN | EPOLLRDNORM; } return 0; } __poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; __poll_t mask = 0; poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == BT_LISTEN) return bt_accept_poll(sk); if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue)) mask |= EPOLLERR | (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= EPOLLHUP; if (!skb_queue_empty_lockless(&sk->sk_receive_queue)) mask |= EPOLLIN | EPOLLRDNORM; if (sk->sk_state == BT_CLOSED) mask |= EPOLLHUP; if (sk->sk_state == BT_CONNECT || sk->sk_state == BT_CONNECT2 || sk->sk_state == BT_CONFIG) return mask; if (!test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags) && sock_writeable(sk)) mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND; else sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); return mask; } EXPORT_SYMBOL(bt_sock_poll); static int bt_ethtool_get_ts_info(struct sock *sk, unsigned int index, void __user *useraddr) { struct ethtool_ts_info info; struct kernel_ethtool_ts_info ts_info = {}; int ret; ret = hci_ethtool_ts_info(index, sk->sk_protocol, &ts_info); if (ret == -ENODEV) return ret; else if (ret < 0) return -EIO; memset(&info, 0, sizeof(info)); info.cmd = ETHTOOL_GET_TS_INFO; info.so_timestamping = ts_info.so_timestamping; info.phc_index = ts_info.phc_index; info.tx_types = ts_info.tx_types; info.rx_filters = ts_info.rx_filters; if (copy_to_user(useraddr, &info, sizeof(info))) return -EFAULT; return 0; } static int bt_ethtool(struct sock *sk, const struct ifreq *ifr, void __user *useraddr) { unsigned int index; u32 ethcmd; int n; if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd))) return -EFAULT; if (sscanf(ifr->ifr_name, "hci%u%n", &index, &n) != 1 || n != strlen(ifr->ifr_name)) return -ENODEV; switch (ethcmd) { case ETHTOOL_GET_TS_INFO: return bt_ethtool_get_ts_info(sk, index, useraddr); } return -EOPNOTSUPP; } static int bt_dev_ioctl(struct socket *sock, unsigned int cmd, void __user *arg) { struct sock *sk = sock->sk; struct ifreq ifr = {}; void __user *data; char *colon; int ret = -ENOIOCTLCMD; if (get_user_ifreq(&ifr, &data, arg)) return -EFAULT; ifr.ifr_name[IFNAMSIZ - 1] = 0; colon = strchr(ifr.ifr_name, ':'); if (colon) *colon = 0; switch (cmd) { case SIOCETHTOOL: ret = bt_ethtool(sk, &ifr, data); break; } if (colon) *colon = ':'; if (put_user_ifreq(&ifr, arg)) return -EFAULT; return ret; } int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) { struct sock *sk = sock->sk; struct sk_buff *skb; long amount; int err; BT_DBG("sk %p cmd %x arg %lx", sk, cmd, arg); switch (cmd) { case TIOCOUTQ: if (sk->sk_state == BT_LISTEN) return -EINVAL; amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); if (amount < 0) amount = 0; err = put_user(amount, (int __user *)arg); break; case TIOCINQ: if (sk->sk_state == BT_LISTEN) return -EINVAL; spin_lock(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); amount = skb ? skb->len : 0; spin_unlock(&sk->sk_receive_queue.lock); err = put_user(amount, (int __user *)arg); break; case SIOCETHTOOL: err = bt_dev_ioctl(sock, cmd, (void __user *)arg); break; default: err = -ENOIOCTLCMD; break; } return err; } EXPORT_SYMBOL(bt_sock_ioctl); /* This function expects the sk lock to be held when called */ int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo) { DECLARE_WAITQUEUE(wait, current); int err = 0; BT_DBG("sk %p", sk); add_wait_queue(sk_sleep(sk), &wait); set_current_state(TASK_INTERRUPTIBLE); while (sk->sk_state != state) { if (!timeo) { err = -EINPROGRESS; break; } if (signal_pending(current)) { err = sock_intr_errno(timeo); break; } release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return err; } EXPORT_SYMBOL(bt_sock_wait_state); /* This function expects the sk lock to be held when called */ int bt_sock_wait_ready(struct sock *sk, unsigned int msg_flags) { DECLARE_WAITQUEUE(wait, current); unsigned long timeo; int err = 0; BT_DBG("sk %p", sk); timeo = sock_sndtimeo(sk, !!(msg_flags & MSG_DONTWAIT)); add_wait_queue(sk_sleep(sk), &wait); set_current_state(TASK_INTERRUPTIBLE); while (test_bit(BT_SK_SUSPEND, &bt_sk(sk)->flags)) { if (!timeo) { err = -EAGAIN; break; } if (signal_pending(current)) { err = sock_intr_errno(timeo); break; } release_sock(sk); timeo = schedule_timeout(timeo); lock_sock(sk); set_current_state(TASK_INTERRUPTIBLE); err = sock_error(sk); if (err) break; } __set_current_state(TASK_RUNNING); remove_wait_queue(sk_sleep(sk), &wait); return err; } EXPORT_SYMBOL(bt_sock_wait_ready); #ifdef CONFIG_PROC_FS static void *bt_seq_start(struct seq_file *seq, loff_t *pos) __acquires(seq->private->l->lock) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); read_lock(&l->lock); return seq_hlist_start_head(&l->head, *pos); } static void *bt_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); return seq_hlist_next(v, &l->head, pos); } static void bt_seq_stop(struct seq_file *seq, void *v) __releases(seq->private->l->lock) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); read_unlock(&l->lock); } static int bt_seq_show(struct seq_file *seq, void *v) { struct bt_sock_list *l = pde_data(file_inode(seq->file)); if (v == SEQ_START_TOKEN) { seq_puts(seq, "sk RefCnt Rmem Wmem User Inode Parent"); if (l->custom_seq_show) { seq_putc(seq, ' '); l->custom_seq_show(seq, v); } seq_putc(seq, '\n'); } else { struct sock *sk = sk_entry(v); struct bt_sock *bt = bt_sk(sk); seq_printf(seq, "%pK %-6d %-6u %-6u %-6u %-6lu %-6lu", sk, refcount_read(&sk->sk_refcnt), sk_rmem_alloc_get(sk), sk_wmem_alloc_get(sk), from_kuid(seq_user_ns(seq), sk_uid(sk)), sock_i_ino(sk), bt->parent ? sock_i_ino(bt->parent) : 0LU); if (l->custom_seq_show) { seq_putc(seq, ' '); l->custom_seq_show(seq, v); } seq_putc(seq, '\n'); } return 0; } static const struct seq_operations bt_seq_ops = { .start = bt_seq_start, .next = bt_seq_next, .stop = bt_seq_stop, .show = bt_seq_show, }; int bt_procfs_init(struct net *net, const char *name, struct bt_sock_list *sk_list, int (*seq_show)(struct seq_file *, void *)) { sk_list->custom_seq_show = seq_show; if (!proc_create_seq_data(name, 0, net->proc_net, &bt_seq_ops, sk_list)) return -ENOMEM; return 0; } void bt_procfs_cleanup(struct net *net, const char *name) { remove_proc_entry(name, net->proc_net); } #else int bt_procfs_init(struct net *net, const char *name, struct bt_sock_list *sk_list, int (*seq_show)(struct seq_file *, void *)) { return 0; } void bt_procfs_cleanup(struct net *net, const char *name) { } #endif EXPORT_SYMBOL(bt_procfs_init); EXPORT_SYMBOL(bt_procfs_cleanup); static const struct net_proto_family bt_sock_family_ops = { .owner = THIS_MODULE, .family = PF_BLUETOOTH, .create = bt_sock_create, }; struct dentry *bt_debugfs; EXPORT_SYMBOL_GPL(bt_debugfs); #define VERSION __stringify(BT_SUBSYS_VERSION) "." \ __stringify(BT_SUBSYS_REVISION) static int __init bt_init(void) { int err; sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); BT_INFO("Core ver %s", VERSION); err = bt_selftest(); if (err < 0) return err; bt_debugfs = debugfs_create_dir("bluetooth", NULL); bt_leds_init(); err = bt_sysfs_init(); if (err < 0) goto cleanup_led; err = sock_register(&bt_sock_family_ops); if (err) goto cleanup_sysfs; BT_INFO("HCI device and connection manager initialized"); err = hci_sock_init(); if (err) goto unregister_socket; err = l2cap_init(); if (err) goto cleanup_socket; err = sco_init(); if (err) goto cleanup_cap; err = mgmt_init(); if (err) goto cleanup_sco; return 0; cleanup_sco: sco_exit(); cleanup_cap: l2cap_exit(); cleanup_socket: hci_sock_cleanup(); unregister_socket: sock_unregister(PF_BLUETOOTH); cleanup_sysfs: bt_sysfs_cleanup(); cleanup_led: bt_leds_cleanup(); debugfs_remove_recursive(bt_debugfs); return err; } static void __exit bt_exit(void) { iso_exit(); mgmt_exit(); sco_exit(); l2cap_exit(); hci_sock_cleanup(); sock_unregister(PF_BLUETOOTH); bt_sysfs_cleanup(); bt_leds_cleanup(); debugfs_remove_recursive(bt_debugfs); } subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann <marcel@holtmann.org>"); MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH);
14 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 /* SPDX-License-Identifier: GPL-2.0-or-later */ /* * Public Key Signature Algorithm * * Copyright (c) 2023 Herbert Xu <herbert@gondor.apana.org.au> */ #ifndef _CRYPTO_INTERNAL_SIG_H #define _CRYPTO_INTERNAL_SIG_H #include <crypto/algapi.h> #include <crypto/sig.h> struct sig_instance { void (*free)(struct sig_instance *inst); union { struct { char head[offsetof(struct sig_alg, base)]; struct crypto_instance base; }; struct sig_alg alg; }; }; struct crypto_sig_spawn { struct crypto_spawn base; }; static inline void *crypto_sig_ctx(struct crypto_sig *tfm) { return crypto_tfm_ctx(&tfm->base); } /** * crypto_register_sig() -- Register public key signature algorithm * * Function registers an implementation of a public key signature algorithm * * @alg: algorithm definition * * Return: zero on success; error code in case of error */ int crypto_register_sig(struct sig_alg *alg); /** * crypto_unregister_sig() -- Unregister public key signature algorithm * * Function unregisters an implementation of a public key signature algorithm * * @alg: algorithm definition */ void crypto_unregister_sig(struct sig_alg *alg); int sig_register_instance(struct crypto_template *tmpl, struct sig_instance *inst); static inline struct sig_instance *sig_instance(struct crypto_instance *inst) { return container_of(&inst->alg, struct sig_instance, alg.base); } static inline struct sig_instance *sig_alg_instance(struct crypto_sig *tfm) { return sig_instance(crypto_tfm_alg_instance(&tfm->base)); } static inline struct crypto_instance *sig_crypto_instance(struct sig_instance *inst) { return container_of(&inst->alg.base, struct crypto_instance, alg); } static inline void *sig_instance_ctx(struct sig_instance *inst) { return crypto_instance_ctx(sig_crypto_instance(inst)); } int crypto_grab_sig(struct crypto_sig_spawn *spawn, struct crypto_instance *inst, const char *name, u32 type, u32 mask); static inline struct crypto_sig *crypto_spawn_sig(struct crypto_sig_spawn *spawn) { return crypto_spawn_tfm2(&spawn->base); } static inline void crypto_drop_sig(struct crypto_sig_spawn *spawn) { crypto_drop_spawn(&spawn->base); } static inline struct sig_alg *crypto_spawn_sig_alg(struct crypto_sig_spawn *spawn) { return container_of(spawn->base.alg, struct sig_alg, base); } #endif
3 4 4 4 4 4 2 2 4 4 1 1 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 // SPDX-License-Identifier: GPL-2.0-only /* * AppArmor security module * * This file contains basic common functions used in AppArmor * * Copyright (C) 1998-2008 Novell/SUSE * Copyright 2009-2010 Canonical Ltd. */ #include <linux/ctype.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/string.h> #include <linux/vmalloc.h> #include "include/audit.h" #include "include/apparmor.h" #include "include/lib.h" #include "include/perms.h" #include "include/policy.h" struct aa_perms nullperms; struct aa_perms allperms = { .allow = ALL_PERMS_MASK, .quiet = ALL_PERMS_MASK, .hide = ALL_PERMS_MASK }; struct val_table_ent { const char *str; int value; }; static struct val_table_ent debug_values_table[] = { { "N", DEBUG_NONE }, { "none", DEBUG_NONE }, { "n", DEBUG_NONE }, { "0", DEBUG_NONE }, { "all", DEBUG_ALL }, { "Y", DEBUG_ALL }, { "y", DEBUG_ALL }, { "1", DEBUG_ALL }, { "abs_root", DEBUG_LABEL_ABS_ROOT }, { "label", DEBUG_LABEL }, { "domain", DEBUG_DOMAIN }, { "policy", DEBUG_POLICY }, { "interface", DEBUG_INTERFACE }, { NULL, 0 } }; static struct val_table_ent *val_table_find_ent(struct val_table_ent *table, const char *name, size_t len) { struct val_table_ent *entry; for (entry = table; entry->str != NULL; entry++) { if (strncmp(entry->str, name, len) == 0 && strlen(entry->str) == len) return entry; } return NULL; } int aa_parse_debug_params(const char *str) { struct val_table_ent *ent; const char *next; int val = 0; do { size_t n = strcspn(str, "\r\n,"); next = str + n; ent = val_table_find_ent(debug_values_table, str, next - str); if (ent) val |= ent->value; else AA_DEBUG(DEBUG_INTERFACE, "unknown debug type '%.*s'", (int)(next - str), str); str = next + 1; } while (*next != 0); return val; } /** * val_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) * @size: size of the @str buffer * @table: NUL-terminated character buffer of permission characters (NOT NULL) * @mask: permission mask to convert */ static int val_mask_to_str(char *str, size_t size, const struct val_table_ent *table, u32 mask) { const struct val_table_ent *ent; int total = 0; for (ent = table; ent->str; ent++) { if (ent->value && (ent->value & mask) == ent->value) { int len = scnprintf(str, size, "%s%s", total ? "," : "", ent->str); size -= len; str += len; total += len; mask &= ~ent->value; } } return total; } int aa_print_debug_params(char *buffer) { if (!aa_g_debug) return sprintf(buffer, "N"); return val_mask_to_str(buffer, PAGE_SIZE, debug_values_table, aa_g_debug); } bool aa_resize_str_table(struct aa_str_table *t, int newsize, gfp_t gfp) { char **n; int i; if (t->size == newsize) return true; n = kcalloc(newsize, sizeof(*n), gfp); if (!n) return false; for (i = 0; i < min(t->size, newsize); i++) n[i] = t->table[i]; for (; i < t->size; i++) kfree_sensitive(t->table[i]); if (newsize > t->size) memset(&n[t->size], 0, (newsize-t->size)*sizeof(*n)); kfree_sensitive(t->table); t->table = n; t->size = newsize; return true; } /** * aa_free_str_table - free entries str table * @t: the string table to free (MAYBE NULL) */ void aa_free_str_table(struct aa_str_table *t) { int i; if (t) { if (!t->table) return; for (i = 0; i < t->size; i++) kfree_sensitive(t->table[i]); kfree_sensitive(t->table); t->table = NULL; t->size = 0; } } /** * skipn_spaces - Removes leading whitespace from @str. * @str: The string to be stripped. * @n: length of str to parse, will stop at \0 if encountered before n * * Returns a pointer to the first non-whitespace character in @str. * if all whitespace will return NULL */ const char *skipn_spaces(const char *str, size_t n) { for (; n && isspace(*str); --n) ++str; if (n) return (char *)str; return NULL; } const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name, size_t *ns_len) { const char *end = fqname + n; const char *name = skipn_spaces(fqname, n); *ns_name = NULL; *ns_len = 0; if (!name) return NULL; if (name[0] == ':') { char *split = strnchr(&name[1], end - &name[1], ':'); *ns_name = skipn_spaces(&name[1], end - &name[1]); if (!*ns_name) return NULL; if (split) { *ns_len = split - *ns_name; if (*ns_len == 0) *ns_name = NULL; split++; if (end - split > 1 && strncmp(split, "//", 2) == 0) split += 2; name = skipn_spaces(split, end - split); } else { /* a ns name without a following profile is allowed */ name = NULL; *ns_len = end - *ns_name; } } if (name && *name == 0) name = NULL; return name; } /** * aa_info_message - log a none profile related status message * @str: message to log */ void aa_info_message(const char *str) { if (audit_enabled) { DEFINE_AUDIT_DATA(ad, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL); ad.info = str; aa_audit_msg(AUDIT_APPARMOR_STATUS, &ad, NULL); } printk(KERN_INFO "AppArmor: %s\n", str); } __counted char *aa_str_alloc(int size, gfp_t gfp) { struct counted_str *str; str = kmalloc(struct_size(str, name, size), gfp); if (!str) return NULL; kref_init(&str->count); return str->name; } void aa_str_kref(struct kref *kref) { kfree(container_of(kref, struct counted_str, count)); } const char aa_file_perm_chrs[] = "xwracd km l "; const char *aa_file_perm_names[] = { "exec", "write", "read", "append", "create", "delete", "open", "rename", "setattr", "getattr", "setcred", "getcred", "chmod", "chown", "chgrp", "lock", "mmap", "mprot", "link", "snapshot", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "unknown", "stack", "change_onexec", "change_profile", "change_hat", }; /** * aa_perm_mask_to_str - convert a perm mask to its short string * @str: character buffer to store string in (at least 10 characters) * @str_size: size of the @str buffer * @chrs: NUL-terminated character buffer of permission characters * @mask: permission mask to convert */ void aa_perm_mask_to_str(char *str, size_t str_size, const char *chrs, u32 mask) { unsigned int i, perm = 1; size_t num_chrs = strlen(chrs); for (i = 0; i < num_chrs; perm <<= 1, i++) { if (mask & perm) { /* Ensure that one byte is left for NUL-termination */ if (WARN_ON_ONCE(str_size <= 1)) break; *str++ = chrs[i]; str_size--; } } *str = '\0'; } void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names, u32 mask) { const char *fmt = "%s"; unsigned int i, perm = 1; bool prev = false; for (i = 0; i < 32; perm <<= 1, i++) { if (mask & perm) { audit_log_format(ab, fmt, names[i]); if (!prev) { prev = true; fmt = " %s"; } } } } void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs, u32 chrsmask, const char * const *names, u32 namesmask) { char str[33]; audit_log_format(ab, "\""); if ((mask & chrsmask) && chrs) { aa_perm_mask_to_str(str, sizeof(str), chrs, mask & chrsmask); mask &= ~chrsmask; audit_log_format(ab, "%s", str); if (mask & namesmask) audit_log_format(ab, " "); } if ((mask & namesmask) && names) aa_audit_perm_names(ab, names, mask & namesmask); audit_log_format(ab, "\""); } /** * aa_apply_modes_to_perms - apply namespace and profile flags to perms * @profile: that perms where computed from * @perms: perms to apply mode modifiers to * * TODO: split into profile and ns based flags for when accumulating perms */ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms) { switch (AUDIT_MODE(profile)) { case AUDIT_ALL: perms->audit = ALL_PERMS_MASK; fallthrough; case AUDIT_NOQUIET: perms->quiet = 0; break; case AUDIT_QUIET: perms->audit = 0; fallthrough; case AUDIT_QUIET_DENIED: perms->quiet = ALL_PERMS_MASK; break; } if (KILL_MODE(profile)) perms->kill = ALL_PERMS_MASK; else if (COMPLAIN_MODE(profile)) perms->complain = ALL_PERMS_MASK; else if (USER_MODE(profile)) perms->prompt = ALL_PERMS_MASK; } void aa_profile_match_label(struct aa_profile *profile, struct aa_ruleset *rules, struct aa_label *label, int type, u32 request, struct aa_perms *perms) { /* TODO: doesn't yet handle extended types */ aa_state_t state; state = aa_dfa_next(rules->policy->dfa, rules->policy->start[AA_CLASS_LABEL], type); aa_label_match(profile, rules, label, state, false, request, perms); } /** * aa_check_perms - do audit mode selection based on perms set * @profile: profile being checked * @perms: perms computed for the request * @request: requested perms * @ad: initialized audit structure (MAY BE NULL if not auditing) * @cb: callback fn for type specific fields (MAY BE NULL) * * Returns: 0 if permission else error code * * Note: profile audit modes need to be set before calling by setting the * perm masks appropriately. * * If not auditing then complain mode is not enabled and the * error code will indicate whether there was an explicit deny * with a positive value. */ int aa_check_perms(struct aa_profile *profile, struct aa_perms *perms, u32 request, struct apparmor_audit_data *ad, void (*cb)(struct audit_buffer *, void *)) { int type, error; u32 denied = request & (~perms->allow | perms->deny); if (likely(!denied)) { /* mask off perms that are not being force audited */ request &= perms->audit; if (!request || !ad) return 0; type = AUDIT_APPARMOR_AUDIT; error = 0; } else { error = -EACCES; if (denied & perms->kill) type = AUDIT_APPARMOR_KILL; else if (denied == (denied & perms->complain)) type = AUDIT_APPARMOR_ALLOWED; else type = AUDIT_APPARMOR_DENIED; if (denied == (denied & perms->hide)) error = -ENOENT; denied &= ~perms->quiet; if (!ad || !denied) return error; } if (ad) { ad->subj_label = &profile->label; ad->request = request; ad->denied = denied; ad->error = error; aa_audit_msg(type, ad, cb); } if (type == AUDIT_APPARMOR_ALLOWED) error = 0; return error; } /** * aa_policy_init - initialize a policy structure * @policy: policy to initialize (NOT NULL) * @prefix: prefix name if any is required. (MAYBE NULL) * @name: name of the policy, init will make a copy of it (NOT NULL) * @gfp: allocation mode * * Note: this fn creates a copy of strings passed in * * Returns: true if policy init successful */ bool aa_policy_init(struct aa_policy *policy, const char *prefix, const char *name, gfp_t gfp) { char *hname; /* freed by policy_free */ if (prefix) { hname = aa_str_alloc(strlen(prefix) + strlen(name) + 3, gfp); if (hname) sprintf(hname, "%s//%s", prefix, name); } else { hname = aa_str_alloc(strlen(name) + 1, gfp); if (hname) strcpy(hname, name); } if (!hname) return false; policy->hname = hname; /* base.name is a substring of fqname */ policy->name = basename(policy->hname); INIT_LIST_HEAD(&policy->list); INIT_LIST_HEAD(&policy->profiles); return true; } /** * aa_policy_destroy - free the elements referenced by @policy * @policy: policy that is to have its elements freed (NOT NULL) */ void aa_policy_destroy(struct aa_policy *policy) { AA_BUG(on_list_rcu(&policy->profiles)); AA_BUG(on_list_rcu(&policy->list)); /* don't free name as its a subset of hname */ aa_put_str(policy->hname); }
9 7 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 // SPDX-License-Identifier: GPL-2.0-only /* * "security" table * * This is for use by Mandatory Access Control (MAC) security models, * which need to be able to manage security policy in separate context * to DAC. * * Based on iptable_mangle.c * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> */ #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/slab.h> #include <net/ip.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); MODULE_DESCRIPTION("iptables security table, for MAC rules"); #define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ (1 << NF_INET_FORWARD) | \ (1 << NF_INET_LOCAL_OUT) static const struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, .me = THIS_MODULE, .af = NFPROTO_IPV4, .priority = NF_IP_PRI_SECURITY, }; static struct nf_hook_ops *sectbl_ops __read_mostly; static int iptable_security_table_init(struct net *net) { struct ipt_replace *repl; int ret; repl = ipt_alloc_initial_table(&security_table); if (repl == NULL) return -ENOMEM; ret = ipt_register_table(net, &security_table, repl, sectbl_ops); kfree(repl); return ret; } static void __net_exit iptable_security_net_pre_exit(struct net *net) { ipt_unregister_table_pre_exit(net, "security"); } static void __net_exit iptable_security_net_exit(struct net *net) { ipt_unregister_table_exit(net, "security"); } static struct pernet_operations iptable_security_net_ops = { .pre_exit = iptable_security_net_pre_exit, .exit = iptable_security_net_exit, }; static int __init iptable_security_init(void) { int ret = xt_register_template(&security_table, iptable_security_table_init); if (ret < 0) return ret; sectbl_ops = xt_hook_ops_alloc(&security_table, ipt_do_table); if (IS_ERR(sectbl_ops)) { xt_unregister_template(&security_table); return PTR_ERR(sectbl_ops); } ret = register_pernet_subsys(&iptable_security_net_ops); if (ret < 0) { xt_unregister_template(&security_table); kfree(sectbl_ops); return ret; } return ret; } static void __exit iptable_security_fini(void) { unregister_pernet_subsys(&iptable_security_net_ops); kfree(sectbl_ops); xt_unregister_template(&security_table); } module_init(iptable_security_init); module_exit(iptable_security_fini);
1 1 2 2 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 // SPDX-License-Identifier: GPL-2.0 /* * NVMe over Fabrics TCP target. * Copyright (c) 2018 Lightbits Labs. All rights reserved. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/init.h> #include <linux/slab.h> #include <linux/crc32c.h> #include <linux/err.h> #include <linux/nvme-tcp.h> #include <linux/nvme-keyring.h> #include <net/sock.h> #include <net/tcp.h> #include <net/tls.h> #include <net/tls_prot.h> #include <net/handshake.h> #include <linux/inet.h> #include <linux/llist.h> #include <trace/events/sock.h> #include "nvmet.h" #define NVMET_TCP_DEF_INLINE_DATA_SIZE (4 * PAGE_SIZE) #define NVMET_TCP_MAXH2CDATA 0x400000 /* 16M arbitrary limit */ #define NVMET_TCP_BACKLOG 128 static int param_store_val(const char *str, int *val, int min, int max) { int ret, new_val; ret = kstrtoint(str, 10, &new_val); if (ret) return -EINVAL; if (new_val < min || new_val > max) return -EINVAL; *val = new_val; return 0; } static int set_params(const char *str, const struct kernel_param *kp) { return param_store_val(str, kp->arg, 0, INT_MAX); } static const struct kernel_param_ops set_param_ops = { .set = set_params, .get = param_get_int, }; /* Define the socket priority to use for connections were it is desirable * that the NIC consider performing optimized packet processing or filtering. * A non-zero value being sufficient to indicate general consideration of any * possible optimization. Making it a module param allows for alternative * values that may be unique for some NIC implementations. */ static int so_priority; device_param_cb(so_priority, &set_param_ops, &so_priority, 0644); MODULE_PARM_DESC(so_priority, "nvmet tcp socket optimize priority: Default 0"); /* Define a time period (in usecs) that io_work() shall sample an activated * queue before determining it to be idle. This optional module behavior * can enable NIC solutions that support socket optimized packet processing * using advanced interrupt moderation techniques. */ static int idle_poll_period_usecs; device_param_cb(idle_poll_period_usecs, &set_param_ops, &idle_poll_period_usecs, 0644); MODULE_PARM_DESC(idle_poll_period_usecs, "nvmet tcp io_work poll till idle time period in usecs: Default 0"); #ifdef CONFIG_NVME_TARGET_TCP_TLS /* * TLS handshake timeout */ static int tls_handshake_timeout = 10; module_param(tls_handshake_timeout, int, 0644); MODULE_PARM_DESC(tls_handshake_timeout, "nvme TLS handshake timeout in seconds (default 10)"); #endif #define NVMET_TCP_RECV_BUDGET 8 #define NVMET_TCP_SEND_BUDGET 8 #define NVMET_TCP_IO_WORK_BUDGET 64 enum nvmet_tcp_send_state { NVMET_TCP_SEND_DATA_PDU, NVMET_TCP_SEND_DATA, NVMET_TCP_SEND_R2T, NVMET_TCP_SEND_DDGST, NVMET_TCP_SEND_RESPONSE }; enum nvmet_tcp_recv_state { NVMET_TCP_RECV_PDU, NVMET_TCP_RECV_DATA, NVMET_TCP_RECV_DDGST, NVMET_TCP_RECV_ERR, }; enum { NVMET_TCP_F_INIT_FAILED = (1 << 0), }; struct nvmet_tcp_cmd { struct nvmet_tcp_queue *queue; struct nvmet_req req; struct nvme_tcp_cmd_pdu *cmd_pdu; struct nvme_tcp_rsp_pdu *rsp_pdu; struct nvme_tcp_data_pdu *data_pdu; struct nvme_tcp_r2t_pdu *r2t_pdu; u32 rbytes_done; u32 wbytes_done; u32 pdu_len; u32 pdu_recv; int sg_idx; char recv_cbuf[CMSG_LEN(sizeof(char))]; struct msghdr recv_msg; struct bio_vec *iov; u32 flags; struct list_head entry; struct llist_node lentry; /* send state */ u32 offset; struct scatterlist *cur_sg; enum nvmet_tcp_send_state state; __le32 exp_ddgst; __le32 recv_ddgst; }; enum nvmet_tcp_queue_state { NVMET_TCP_Q_CONNECTING, NVMET_TCP_Q_TLS_HANDSHAKE, NVMET_TCP_Q_LIVE, NVMET_TCP_Q_DISCONNECTING, NVMET_TCP_Q_FAILED, }; struct nvmet_tcp_queue { struct socket *sock; struct nvmet_tcp_port *port; struct work_struct io_work; struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct kref kref; /* send state */ struct nvmet_tcp_cmd *cmds; unsigned int nr_cmds; struct list_head free_list; struct llist_head resp_list; struct list_head resp_send_list; int send_list_len; struct nvmet_tcp_cmd *snd_cmd; /* recv state */ int offset; int left; enum nvmet_tcp_recv_state rcv_state; struct nvmet_tcp_cmd *cmd; union nvme_tcp_pdu pdu; /* digest state */ bool hdr_digest; bool data_digest; /* TLS state */ key_serial_t tls_pskid; struct delayed_work tls_handshake_tmo_work; unsigned long poll_end; spinlock_t state_lock; enum nvmet_tcp_queue_state state; struct sockaddr_storage sockaddr; struct sockaddr_storage sockaddr_peer; struct work_struct release_work; int idx; struct list_head queue_list; struct nvmet_tcp_cmd connect; struct page_frag_cache pf_cache; void (*data_ready)(struct sock *); void (*state_change)(struct sock *); void (*write_space)(struct sock *); }; struct nvmet_tcp_port { struct socket *sock; struct work_struct accept_work; struct nvmet_port *nport; struct sockaddr_storage addr; void (*data_ready)(struct sock *); }; static DEFINE_IDA(nvmet_tcp_queue_ida); static LIST_HEAD(nvmet_tcp_queue_list); static DEFINE_MUTEX(nvmet_tcp_queue_mutex); static struct workqueue_struct *nvmet_tcp_wq; static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd); static inline u16 nvmet_tcp_cmd_tag(struct nvmet_tcp_queue *queue, struct nvmet_tcp_cmd *cmd) { if (unlikely(!queue->nr_cmds)) { /* We didn't allocate cmds yet, send 0xffff */ return USHRT_MAX; } return cmd - queue->cmds; } static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd) { return nvme_is_write(cmd->req.cmd) && cmd->rbytes_done < cmd->req.transfer_len; } static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd) { return nvmet_tcp_has_data_in(cmd) && !cmd->req.cqe->status; } static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd) { return !nvme_is_write(cmd->req.cmd) && cmd->req.transfer_len > 0 && !cmd->req.cqe->status; } static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd) { return nvme_is_write(cmd->req.cmd) && cmd->pdu_len && !cmd->rbytes_done; } static inline struct nvmet_tcp_cmd * nvmet_tcp_get_cmd(struct nvmet_tcp_queue *queue) { struct nvmet_tcp_cmd *cmd; cmd = list_first_entry_or_null(&queue->free_list, struct nvmet_tcp_cmd, entry); if (!cmd) return NULL; list_del_init(&cmd->entry); cmd->rbytes_done = cmd->wbytes_done = 0; cmd->pdu_len = 0; cmd->pdu_recv = 0; cmd->iov = NULL; cmd->flags = 0; return cmd; } static inline void nvmet_tcp_put_cmd(struct nvmet_tcp_cmd *cmd) { if (unlikely(cmd == &cmd->queue->connect)) return; list_add_tail(&cmd->entry, &cmd->queue->free_list); } static inline int queue_cpu(struct nvmet_tcp_queue *queue) { return queue->sock->sk->sk_incoming_cpu; } static inline u8 nvmet_tcp_hdgst_len(struct nvmet_tcp_queue *queue) { return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0; } static inline u8 nvmet_tcp_ddgst_len(struct nvmet_tcp_queue *queue) { return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; } static inline void nvmet_tcp_hdgst(void *pdu, size_t len) { put_unaligned_le32(~crc32c(~0, pdu, len), pdu + len); } static int nvmet_tcp_verify_hdgst(struct nvmet_tcp_queue *queue, void *pdu, size_t len) { struct nvme_tcp_hdr *hdr = pdu; __le32 recv_digest; __le32 exp_digest; if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) { pr_err("queue %d: header digest enabled but no header digest\n", queue->idx); return -EPROTO; } recv_digest = *(__le32 *)(pdu + hdr->hlen); nvmet_tcp_hdgst(pdu, len); exp_digest = *(__le32 *)(pdu + hdr->hlen); if (recv_digest != exp_digest) { pr_err("queue %d: header digest error: recv %#x expected %#x\n", queue->idx, le32_to_cpu(recv_digest), le32_to_cpu(exp_digest)); return -EPROTO; } return 0; } static int nvmet_tcp_check_ddgst(struct nvmet_tcp_queue *queue, void *pdu) { struct nvme_tcp_hdr *hdr = pdu; u8 digest_len = nvmet_tcp_hdgst_len(queue); u32 len; len = le32_to_cpu(hdr->plen) - hdr->hlen - (hdr->flags & NVME_TCP_F_HDGST ? digest_len : 0); if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) { pr_err("queue %d: data digest flag is cleared\n", queue->idx); return -EPROTO; } return 0; } /* If cmd buffers are NULL, no operation is performed */ static void nvmet_tcp_free_cmd_buffers(struct nvmet_tcp_cmd *cmd) { kfree(cmd->iov); sgl_free(cmd->req.sg); cmd->iov = NULL; cmd->req.sg = NULL; } static void nvmet_tcp_build_pdu_iovec(struct nvmet_tcp_cmd *cmd) { struct bio_vec *iov = cmd->iov; struct scatterlist *sg; u32 length, offset, sg_offset; int nr_pages; length = cmd->pdu_len; nr_pages = DIV_ROUND_UP(length, PAGE_SIZE); offset = cmd->rbytes_done; cmd->sg_idx = offset / PAGE_SIZE; sg_offset = offset % PAGE_SIZE; sg = &cmd->req.sg[cmd->sg_idx]; while (length) { u32 iov_len = min_t(u32, length, sg->length - sg_offset); bvec_set_page(iov, sg_page(sg), iov_len, sg->offset + sg_offset); length -= iov_len; sg = sg_next(sg); iov++; sg_offset = 0; } iov_iter_bvec(&cmd->recv_msg.msg_iter, ITER_DEST, cmd->iov, nr_pages, cmd->pdu_len); } static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) { queue->rcv_state = NVMET_TCP_RECV_ERR; if (queue->nvme_sq.ctrl) nvmet_ctrl_fatal_error(queue->nvme_sq.ctrl); else kernel_sock_shutdown(queue->sock, SHUT_RDWR); } static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) { queue->rcv_state = NVMET_TCP_RECV_ERR; if (status == -EPIPE || status == -ECONNRESET) kernel_sock_shutdown(queue->sock, SHUT_RDWR); else nvmet_tcp_fatal_error(queue); } static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) { struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl; u32 len = le32_to_cpu(sgl->length); if (!len) return 0; if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET)) { if (!nvme_is_write(cmd->req.cmd)) return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; if (len > cmd->req.port->inline_data_size) return NVME_SC_SGL_INVALID_OFFSET | NVME_STATUS_DNR; cmd->pdu_len = len; } cmd->req.transfer_len += len; cmd->req.sg = sgl_alloc(len, GFP_KERNEL, &cmd->req.sg_cnt); if (!cmd->req.sg) return NVME_SC_INTERNAL; cmd->cur_sg = cmd->req.sg; if (nvmet_tcp_has_data_in(cmd)) { cmd->iov = kmalloc_array(cmd->req.sg_cnt, sizeof(*cmd->iov), GFP_KERNEL); if (!cmd->iov) goto err; } return 0; err: nvmet_tcp_free_cmd_buffers(cmd); return NVME_SC_INTERNAL; } static void nvmet_tcp_calc_ddgst(struct nvmet_tcp_cmd *cmd) { size_t total_len = cmd->req.transfer_len; struct scatterlist *sg = cmd->req.sg; u32 crc = ~0; while (total_len) { size_t len = min_t(size_t, total_len, sg->length); /* * Note that the scatterlist does not contain any highmem pages, * as it was allocated by sgl_alloc() with GFP_KERNEL. */ crc = crc32c(crc, sg_virt(sg), len); total_len -= len; sg = sg_next(sg); } cmd->exp_ddgst = cpu_to_le32(~crc); } static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) { struct nvme_tcp_data_pdu *pdu = cmd->data_pdu; struct nvmet_tcp_queue *queue = cmd->queue; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); u8 ddgst = nvmet_tcp_ddgst_len(cmd->queue); cmd->offset = 0; cmd->state = NVMET_TCP_SEND_DATA_PDU; pdu->hdr.type = nvme_tcp_c2h_data; pdu->hdr.flags = NVME_TCP_F_DATA_LAST | (queue->nvme_sq.sqhd_disabled ? NVME_TCP_F_DATA_SUCCESS : 0); pdu->hdr.hlen = sizeof(*pdu); pdu->hdr.pdo = pdu->hdr.hlen + hdgst; pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst + cmd->req.transfer_len + ddgst); pdu->command_id = cmd->req.cqe->command_id; pdu->data_length = cpu_to_le32(cmd->req.transfer_len); pdu->data_offset = cpu_to_le32(cmd->wbytes_done); if (queue->data_digest) { pdu->hdr.flags |= NVME_TCP_F_DDGST; nvmet_tcp_calc_ddgst(cmd); } if (cmd->queue->hdr_digest) { pdu->hdr.flags |= NVME_TCP_F_HDGST; nvmet_tcp_hdgst(pdu, sizeof(*pdu)); } } static void nvmet_setup_r2t_pdu(struct nvmet_tcp_cmd *cmd) { struct nvme_tcp_r2t_pdu *pdu = cmd->r2t_pdu; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); cmd->offset = 0; cmd->state = NVMET_TCP_SEND_R2T; pdu->hdr.type = nvme_tcp_r2t; pdu->hdr.flags = 0; pdu->hdr.hlen = sizeof(*pdu); pdu->hdr.pdo = 0; pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); pdu->command_id = cmd->req.cmd->common.command_id; pdu->ttag = nvmet_tcp_cmd_tag(cmd->queue, cmd); pdu->r2t_length = cpu_to_le32(cmd->req.transfer_len - cmd->rbytes_done); pdu->r2t_offset = cpu_to_le32(cmd->rbytes_done); if (cmd->queue->hdr_digest) { pdu->hdr.flags |= NVME_TCP_F_HDGST; nvmet_tcp_hdgst(pdu, sizeof(*pdu)); } } static void nvmet_setup_response_pdu(struct nvmet_tcp_cmd *cmd) { struct nvme_tcp_rsp_pdu *pdu = cmd->rsp_pdu; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); cmd->offset = 0; cmd->state = NVMET_TCP_SEND_RESPONSE; pdu->hdr.type = nvme_tcp_rsp; pdu->hdr.flags = 0; pdu->hdr.hlen = sizeof(*pdu); pdu->hdr.pdo = 0; pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); if (cmd->queue->hdr_digest) { pdu->hdr.flags |= NVME_TCP_F_HDGST; nvmet_tcp_hdgst(pdu, sizeof(*pdu)); } } static void nvmet_tcp_process_resp_list(struct nvmet_tcp_queue *queue) { struct llist_node *node; struct nvmet_tcp_cmd *cmd; for (node = llist_del_all(&queue->resp_list); node; node = node->next) { cmd = llist_entry(node, struct nvmet_tcp_cmd, lentry); list_add(&cmd->entry, &queue->resp_send_list); queue->send_list_len++; } } static struct nvmet_tcp_cmd *nvmet_tcp_fetch_cmd(struct nvmet_tcp_queue *queue) { queue->snd_cmd = list_first_entry_or_null(&queue->resp_send_list, struct nvmet_tcp_cmd, entry); if (!queue->snd_cmd) { nvmet_tcp_process_resp_list(queue); queue->snd_cmd = list_first_entry_or_null(&queue->resp_send_list, struct nvmet_tcp_cmd, entry); if (unlikely(!queue->snd_cmd)) return NULL; } list_del_init(&queue->snd_cmd->entry); queue->send_list_len--; if (nvmet_tcp_need_data_out(queue->snd_cmd)) nvmet_setup_c2h_data_pdu(queue->snd_cmd); else if (nvmet_tcp_need_data_in(queue->snd_cmd)) nvmet_setup_r2t_pdu(queue->snd_cmd); else nvmet_setup_response_pdu(queue->snd_cmd); return queue->snd_cmd; } static void nvmet_tcp_queue_response(struct nvmet_req *req) { struct nvmet_tcp_cmd *cmd = container_of(req, struct nvmet_tcp_cmd, req); struct nvmet_tcp_queue *queue = cmd->queue; enum nvmet_tcp_recv_state queue_state; struct nvmet_tcp_cmd *queue_cmd; struct nvme_sgl_desc *sgl; u32 len; /* Pairs with store_release in nvmet_prepare_receive_pdu() */ queue_state = smp_load_acquire(&queue->rcv_state); queue_cmd = READ_ONCE(queue->cmd); if (unlikely(cmd == queue_cmd)) { sgl = &cmd->req.cmd->common.dptr.sgl; len = le32_to_cpu(sgl->length); /* * Wait for inline data before processing the response. * Avoid using helpers, this might happen before * nvmet_req_init is completed. */ if (queue_state == NVMET_TCP_RECV_PDU && len && len <= cmd->req.port->inline_data_size && nvme_is_write(cmd->req.cmd)) return; } llist_add(&cmd->lentry, &queue->resp_list); queue_work_on(queue_cpu(queue), nvmet_tcp_wq, &cmd->queue->io_work); } static void nvmet_tcp_execute_request(struct nvmet_tcp_cmd *cmd) { if (unlikely(cmd->flags & NVMET_TCP_F_INIT_FAILED)) nvmet_tcp_queue_response(&cmd->req); else cmd->req.execute(&cmd->req); } static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_MORE | MSG_SPLICE_PAGES, }; struct bio_vec bvec; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); int left = sizeof(*cmd->data_pdu) - cmd->offset + hdgst; int ret; bvec_set_virt(&bvec, (void *)cmd->data_pdu + cmd->offset, left); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left); ret = sock_sendmsg(cmd->queue->sock, &msg); if (ret <= 0) return ret; cmd->offset += ret; left -= ret; if (left) return -EAGAIN; cmd->state = NVMET_TCP_SEND_DATA; cmd->offset = 0; return 1; } static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct nvmet_tcp_queue *queue = cmd->queue; int ret; while (cmd->cur_sg) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, }; struct page *page = sg_page(cmd->cur_sg); struct bio_vec bvec; u32 left = cmd->cur_sg->length - cmd->offset; if ((!last_in_batch && cmd->queue->send_list_len) || cmd->wbytes_done + left < cmd->req.transfer_len || queue->data_digest || !queue->nvme_sq.sqhd_disabled) msg.msg_flags |= MSG_MORE; bvec_set_page(&bvec, page, left, cmd->offset); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left); ret = sock_sendmsg(cmd->queue->sock, &msg); if (ret <= 0) return ret; cmd->offset += ret; cmd->wbytes_done += ret; /* Done with sg?*/ if (cmd->offset == cmd->cur_sg->length) { cmd->cur_sg = sg_next(cmd->cur_sg); cmd->offset = 0; } } if (queue->data_digest) { cmd->state = NVMET_TCP_SEND_DDGST; cmd->offset = 0; } else { if (queue->nvme_sq.sqhd_disabled) { cmd->queue->snd_cmd = NULL; nvmet_tcp_put_cmd(cmd); } else { nvmet_setup_response_pdu(cmd); } } if (queue->nvme_sq.sqhd_disabled) nvmet_tcp_free_cmd_buffers(cmd); return 1; } static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, }; struct bio_vec bvec; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); int left = sizeof(*cmd->rsp_pdu) - cmd->offset + hdgst; int ret; if (!last_in_batch && cmd->queue->send_list_len) msg.msg_flags |= MSG_MORE; else msg.msg_flags |= MSG_EOR; bvec_set_virt(&bvec, (void *)cmd->rsp_pdu + cmd->offset, left); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left); ret = sock_sendmsg(cmd->queue->sock, &msg); if (ret <= 0) return ret; cmd->offset += ret; left -= ret; if (left) return -EAGAIN; nvmet_tcp_free_cmd_buffers(cmd); cmd->queue->snd_cmd = NULL; nvmet_tcp_put_cmd(cmd); return 1; } static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, }; struct bio_vec bvec; u8 hdgst = nvmet_tcp_hdgst_len(cmd->queue); int left = sizeof(*cmd->r2t_pdu) - cmd->offset + hdgst; int ret; if (!last_in_batch && cmd->queue->send_list_len) msg.msg_flags |= MSG_MORE; else msg.msg_flags |= MSG_EOR; bvec_set_virt(&bvec, (void *)cmd->r2t_pdu + cmd->offset, left); iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, left); ret = sock_sendmsg(cmd->queue->sock, &msg); if (ret <= 0) return ret; cmd->offset += ret; left -= ret; if (left) return -EAGAIN; cmd->queue->snd_cmd = NULL; return 1; } static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) { struct nvmet_tcp_queue *queue = cmd->queue; int left = NVME_TCP_DIGEST_LENGTH - cmd->offset; struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; struct kvec iov = { .iov_base = (u8 *)&cmd->exp_ddgst + cmd->offset, .iov_len = left }; int ret; if (!last_in_batch && cmd->queue->send_list_len) msg.msg_flags |= MSG_MORE; else msg.msg_flags |= MSG_EOR; ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (unlikely(ret <= 0)) return ret; cmd->offset += ret; left -= ret; if (left) return -EAGAIN; if (queue->nvme_sq.sqhd_disabled) { cmd->queue->snd_cmd = NULL; nvmet_tcp_put_cmd(cmd); } else { nvmet_setup_response_pdu(cmd); } return 1; } static int nvmet_tcp_try_send_one(struct nvmet_tcp_queue *queue, bool last_in_batch) { struct nvmet_tcp_cmd *cmd = queue->snd_cmd; int ret = 0; if (!cmd || queue->state == NVMET_TCP_Q_DISCONNECTING) { cmd = nvmet_tcp_fetch_cmd(queue); if (unlikely(!cmd)) return 0; } if (cmd->state == NVMET_TCP_SEND_DATA_PDU) { ret = nvmet_try_send_data_pdu(cmd); if (ret <= 0) goto done_send; } if (cmd->state == NVMET_TCP_SEND_DATA) { ret = nvmet_try_send_data(cmd, last_in_batch); if (ret <= 0) goto done_send; } if (cmd->state == NVMET_TCP_SEND_DDGST) { ret = nvmet_try_send_ddgst(cmd, last_in_batch); if (ret <= 0) goto done_send; } if (cmd->state == NVMET_TCP_SEND_R2T) { ret = nvmet_try_send_r2t(cmd, last_in_batch); if (ret <= 0) goto done_send; } if (cmd->state == NVMET_TCP_SEND_RESPONSE) ret = nvmet_try_send_response(cmd, last_in_batch); done_send: if (ret < 0) { if (ret == -EAGAIN) return 0; return ret; } return 1; } static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue, int budget, int *sends) { int i, ret = 0; for (i = 0; i < budget; i++) { ret = nvmet_tcp_try_send_one(queue, i == budget - 1); if (unlikely(ret < 0)) { nvmet_tcp_socket_error(queue, ret); goto done; } else if (ret == 0) { break; } (*sends)++; } done: return ret; } static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue) { queue->offset = 0; queue->left = sizeof(struct nvme_tcp_hdr); WRITE_ONCE(queue->cmd, NULL); /* Ensure rcv_state is visible only after queue->cmd is set */ smp_store_release(&queue->rcv_state, NVMET_TCP_RECV_PDU); } static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) { struct nvme_tcp_icreq_pdu *icreq = &queue->pdu.icreq; struct nvme_tcp_icresp_pdu *icresp = &queue->pdu.icresp; struct msghdr msg = {}; struct kvec iov; int ret; if (le32_to_cpu(icreq->hdr.plen) != sizeof(struct nvme_tcp_icreq_pdu)) { pr_err("bad nvme-tcp pdu length (%d)\n", le32_to_cpu(icreq->hdr.plen)); nvmet_tcp_fatal_error(queue); return -EPROTO; } if (icreq->pfv != NVME_TCP_PFV_1_0) { pr_err("queue %d: bad pfv %d\n", queue->idx, icreq->pfv); return -EPROTO; } if (icreq->hpda != 0) { pr_err("queue %d: unsupported hpda %d\n", queue->idx, icreq->hpda); return -EPROTO; } queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE); queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE); memset(icresp, 0, sizeof(*icresp)); icresp->hdr.type = nvme_tcp_icresp; icresp->hdr.hlen = sizeof(*icresp); icresp->hdr.pdo = 0; icresp->hdr.plen = cpu_to_le32(icresp->hdr.hlen); icresp->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); icresp->maxdata = cpu_to_le32(NVMET_TCP_MAXH2CDATA); icresp->cpda = 0; if (queue->hdr_digest) icresp->digest |= NVME_TCP_HDR_DIGEST_ENABLE; if (queue->data_digest) icresp->digest |= NVME_TCP_DATA_DIGEST_ENABLE; iov.iov_base = icresp; iov.iov_len = sizeof(*icresp); ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (ret < 0) { queue->state = NVMET_TCP_Q_FAILED; return ret; /* queue removal will cleanup */ } queue->state = NVMET_TCP_Q_LIVE; nvmet_prepare_receive_pdu(queue); return 0; } static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, struct nvmet_tcp_cmd *cmd, struct nvmet_req *req) { size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length); int ret; /* * This command has not been processed yet, hence we are trying to * figure out if there is still pending data left to receive. If * we don't, we can simply prepare for the next pdu and bail out, * otherwise we will need to prepare a buffer and receive the * stale data before continuing forward. */ if (!nvme_is_write(cmd->req.cmd) || !data_len || data_len > cmd->req.port->inline_data_size) { nvmet_prepare_receive_pdu(queue); return; } ret = nvmet_tcp_map_data(cmd); if (unlikely(ret)) { pr_err("queue %d: failed to map data\n", queue->idx); nvmet_tcp_fatal_error(queue); return; } queue->rcv_sta